Skip to main content

nvme/workers/
admin.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Admin queue handler.
5
6use super::IoQueueEntrySizes;
7use super::MAX_DATA_TRANSFER_SIZE;
8use super::io::IoHandler;
9use super::io::IoState;
10use crate::DOORBELL_STRIDE_BITS;
11use crate::MAX_QES;
12use crate::NVME_VERSION;
13use crate::PAGE_MASK;
14use crate::PAGE_SIZE;
15use crate::VENDOR_ID;
16use crate::error::CommandResult;
17use crate::error::NvmeError;
18use crate::namespace::Namespace;
19use crate::prp::PrpRange;
20use crate::queue::CompletionQueue;
21use crate::queue::DoorbellMemory;
22use crate::queue::QueueError;
23use crate::queue::SubmissionQueue;
24use crate::spec;
25use disk_backend::Disk;
26use futures::FutureExt;
27use futures::SinkExt;
28use futures::StreamExt;
29use futures_concurrency::future::Race;
30use guestmem::GuestMemory;
31use guid::Guid;
32use inspect::Inspect;
33use pal_async::task::Spawn;
34use pal_async::task::Task;
35use parking_lot::Mutex;
36use parking_lot::RwLock;
37use std::collections::BTreeMap;
38use std::collections::btree_map;
39use std::future::pending;
40use std::future::poll_fn;
41use std::io::Cursor;
42use std::io::Write;
43use std::sync::Arc;
44use task_control::AsyncRun;
45use task_control::Cancelled;
46use task_control::InspectTask;
47use task_control::StopTask;
48use task_control::TaskControl;
49use thiserror::Error;
50use vmcore::interrupt::Interrupt;
51use vmcore::vm_task::VmTaskDriver;
52use vmcore::vm_task::VmTaskDriverSource;
53use zerocopy::FromBytes;
54use zerocopy::FromZeros;
55use zerocopy::IntoBytes;
56
57const IOSQES: u8 = 6;
58const IOCQES: u8 = 4;
59const MAX_ASYNC_EVENT_REQUESTS: u8 = 4; // minimum recommended by spec
60const ERROR_LOG_PAGE_ENTRIES: u8 = 1;
61
62#[derive(Inspect)]
63pub struct AdminConfig {
64    #[inspect(skip)]
65    pub driver_source: VmTaskDriverSource,
66    #[inspect(skip)]
67    pub mem: GuestMemory,
68    #[inspect(skip)]
69    pub interrupts: Vec<Interrupt>,
70    #[inspect(skip)]
71    pub doorbells: Arc<RwLock<DoorbellMemory>>,
72    #[inspect(display)]
73    pub subsystem_id: Guid,
74    pub max_sqs: u16,
75    pub max_cqs: u16,
76    pub qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
77}
78
79#[derive(Inspect)]
80pub struct AdminHandler {
81    driver: VmTaskDriver,
82    config: AdminConfig,
83    #[inspect(iter_by_key)]
84    namespaces: BTreeMap<u32, Arc<Namespace>>,
85}
86
87#[derive(Inspect)]
88pub struct AdminState {
89    admin_sq: SubmissionQueue,
90    admin_cq: CompletionQueue,
91    #[inspect(with = "|x| inspect::iter_by_index(x).map_key(|x| x + 1)")]
92    io_sqs: Vec<Option<IoSq>>,
93    #[inspect(with = "|x| inspect::iter_by_index(x).map_key(|x| x + 1)")]
94    io_cqs: Vec<IoCq>,
95    #[inspect(skip)]
96    sq_delete_response: mesh::Receiver<u16>,
97    #[inspect(iter_by_index)]
98    asynchronous_event_requests: Vec<u16>,
99    #[inspect(
100        rename = "namespaces",
101        with = "|x| inspect::iter_by_key(x.iter().map(|v| (v, ChangedNamespace { changed: true })))"
102    )]
103    changed_namespaces: Vec<u32>,
104    notified_changed_namespaces: bool,
105    /// Asynchronous Event Configuration (Set Features FID 0x0B / CDW11),
106    /// stored verbatim and echoed back via Get Features. The NVMe Base
107    /// specification lists this Feature as mandatory for I/O controllers
108    /// (Base 2.0c section 3.1.2.1.1 / Base 2.3 section 3.1.3.6, "Feature
109    /// Support Requirements"). Each bit in CDW11 enables a class of
110    /// asynchronous event notification (refer to
111    /// [`spec::Cdw11FeatureAsyncEventConfig`]). Initiators that strictly
112    /// follow the spec may refuse to allocate any Asynchronous Event
113    /// Request resources when the Set Features command for this Feature
114    /// is rejected, which breaks AEN delivery (including the
115    /// changed-namespace AEN that drives namespace hot-add notification).
116    ///
117    /// Defaults to all bits set so that any AEN class the controller
118    /// chooses to fire is enabled until the host explicitly narrows the
119    /// mask via Set Features.
120    async_event_config: u32,
121    #[inspect(skip)]
122    recv_changed_namespace: futures::channel::mpsc::Receiver<u32>,
123    #[inspect(skip)]
124    send_changed_namespace: futures::channel::mpsc::Sender<u32>,
125    #[inspect(skip)]
126    poll_namespace_change: BTreeMap<u32, Task<()>>,
127}
128
129#[derive(Inspect)]
130struct ChangedNamespace {
131    changed: bool,
132}
133
134#[derive(Inspect)]
135struct IoSq {
136    pending_delete_cid: Option<u16>,
137    sq_idx: usize,
138    cqid: u16,
139}
140
141#[derive(Inspect)]
142struct IoCq {
143    driver: VmTaskDriver,
144    #[inspect(flatten)]
145    task: TaskControl<IoHandler, IoState>,
146}
147
148impl AdminState {
149    pub fn new(handler: &AdminHandler, asq: u64, asqs: u16, acq: u64, acqs: u16) -> Self {
150        // Start polling for namespace changes. Use a bounded channel to avoid
151        // unbounded memory allocation when the queue is stuck.
152        #[expect(clippy::disallowed_methods)] // TODO
153        let (send_changed_namespace, recv_changed_namespace) = futures::channel::mpsc::channel(256);
154        let poll_namespace_change = handler
155            .namespaces
156            .iter()
157            .map(|(&nsid, namespace)| {
158                (
159                    nsid,
160                    spawn_namespace_notifier(
161                        &handler.driver,
162                        nsid,
163                        namespace.clone(),
164                        send_changed_namespace.clone(),
165                    ),
166                )
167            })
168            .collect();
169
170        let admin_cq = CompletionQueue::new(
171            handler.config.doorbells.clone(),
172            1,
173            handler.config.mem.clone(),
174            Some(handler.config.interrupts[0].clone()),
175            acq,
176            acqs,
177        );
178        let mut state = Self {
179            admin_sq: SubmissionQueue::new(&admin_cq, 0, asq, asqs),
180            admin_cq,
181            io_sqs: Vec::new(),
182            io_cqs: Vec::new(),
183            sq_delete_response: Default::default(),
184            asynchronous_event_requests: Vec::new(),
185            changed_namespaces: Vec::new(),
186            notified_changed_namespaces: false,
187            async_event_config: u32::MAX,
188            recv_changed_namespace,
189            send_changed_namespace,
190            poll_namespace_change,
191        };
192        state.set_max_queues(handler, handler.config.max_sqs, handler.config.max_cqs);
193        state
194    }
195
196    /// Stops all submission queues and drains them of any pending IO.
197    ///
198    /// This future may be dropped and reissued.
199    pub async fn drain(&mut self) {
200        for cq in &mut self.io_cqs {
201            cq.task.stop().await;
202            if let Some(state) = cq.task.state_mut() {
203                state.drain().await;
204                cq.task.remove();
205            }
206        }
207    }
208
209    /// Caller must ensure that no queues are active.
210    fn set_max_queues(&mut self, handler: &AdminHandler, num_sqs: u16, num_cqs: u16) {
211        self.io_sqs.truncate(num_sqs.into());
212        self.io_sqs.resize_with(num_sqs.into(), || None);
213        self.io_cqs.resize_with(num_cqs.into(), || {
214            // This driver doesn't explicitly do any IO (that's handled by
215            // the storage backends), so the target VP doesn't matter. But
216            // set it anyway as a hint to the backend that this queue needs
217            // its own thread.
218            let driver = handler
219                .config
220                .driver_source
221                .builder()
222                .run_on_target(false)
223                .target_vp(0)
224                .build("nvme");
225
226            IoCq {
227                driver,
228                task: TaskControl::new(IoHandler::new(
229                    handler.config.mem.clone(),
230                    self.sq_delete_response.sender(),
231                )),
232            }
233        });
234    }
235
236    fn add_changed_namespace(&mut self, nsid: u32) {
237        if let Err(i) = self.changed_namespaces.binary_search(&nsid) {
238            self.changed_namespaces.insert(i, nsid);
239        }
240    }
241
242    async fn add_namespace(
243        &mut self,
244        driver: &VmTaskDriver,
245        nsid: u32,
246        namespace: &Arc<Namespace>,
247    ) {
248        // Update the IO queues.
249        for cq in &mut self.io_cqs {
250            let io_running = cq.task.stop().await;
251            if let Some(io_state) = cq.task.state_mut() {
252                io_state.add_namespace(nsid, namespace.clone());
253            }
254            if io_running {
255                cq.task.start();
256            }
257        }
258
259        // Start polling.
260        let old = self.poll_namespace_change.insert(
261            nsid,
262            spawn_namespace_notifier(
263                driver,
264                nsid,
265                namespace.clone(),
266                self.send_changed_namespace.clone(),
267            ),
268        );
269        assert!(old.is_none());
270
271        // Notify the guest driver of the change.
272        self.add_changed_namespace(nsid);
273    }
274
275    async fn remove_namespace(&mut self, nsid: u32) {
276        // Update the IO queues.
277        for cq in &mut self.io_cqs {
278            let io_running = cq.task.stop().await;
279            if let Some(io_state) = cq.task.state_mut() {
280                io_state.remove_namespace(nsid);
281            }
282            if io_running {
283                cq.task.start();
284            }
285        }
286
287        // Stop polling.
288        self.poll_namespace_change
289            .remove(&nsid)
290            .unwrap()
291            .cancel()
292            .await;
293
294        // Notify the guest driver of the change.
295        self.add_changed_namespace(nsid);
296    }
297}
298
299fn spawn_namespace_notifier(
300    driver: &VmTaskDriver,
301    nsid: u32,
302    namespace: Arc<Namespace>,
303    mut send_changed_namespace: futures::channel::mpsc::Sender<u32>,
304) -> Task<()> {
305    driver.spawn("wait_resize", async move {
306        let mut counter = None;
307        loop {
308            counter = Some(namespace.wait_change(counter).await);
309            tracing::info!(nsid, "namespace changed");
310            if send_changed_namespace.send(nsid).await.is_err() {
311                break;
312            }
313        }
314    })
315}
316
317#[derive(Debug, Error)]
318#[error("invalid queue identifier {qid}")]
319struct InvalidQueueIdentifier {
320    qid: u16,
321    #[source]
322    reason: InvalidQueueIdentifierReason,
323}
324
325#[derive(Debug, Error)]
326enum InvalidQueueIdentifierReason {
327    #[error("queue id is out of bounds")]
328    Oob,
329    #[error("queue id is in use")]
330    InUse,
331    #[error("queue id is not in use")]
332    NotInUse,
333}
334
335impl From<InvalidQueueIdentifier> for NvmeError {
336    fn from(err: InvalidQueueIdentifier) -> Self {
337        Self::new(spec::Status::INVALID_QUEUE_IDENTIFIER, err)
338    }
339}
340
341enum Event {
342    Command(Result<spec::Command, QueueError>),
343    SqDeleteComplete(u16),
344    NamespaceChange(u32),
345}
346
347/// Error returned when adding a namespace with a conflicting ID.
348#[derive(Debug, Error)]
349#[error("namespace id conflict for {0}")]
350pub struct NsidConflict(u32);
351
352impl AdminHandler {
353    pub fn new(driver: VmTaskDriver, config: AdminConfig) -> Self {
354        Self {
355            driver,
356            config,
357            namespaces: Default::default(),
358        }
359    }
360
361    pub async fn add_namespace(
362        &mut self,
363        state: Option<&mut AdminState>,
364        nsid: u32,
365        disk: Disk,
366    ) -> Result<(), NsidConflict> {
367        let namespace = &*match self.namespaces.entry(nsid) {
368            btree_map::Entry::Vacant(entry) => entry.insert(Arc::new(Namespace::new(
369                self.config.mem.clone(),
370                nsid,
371                disk,
372            ))),
373            btree_map::Entry::Occupied(_) => return Err(NsidConflict(nsid)),
374        };
375
376        if let Some(state) = state {
377            state.add_namespace(&self.driver, nsid, namespace).await;
378        }
379
380        Ok(())
381    }
382
383    pub async fn remove_namespace(&mut self, state: Option<&mut AdminState>, nsid: u32) -> bool {
384        if self.namespaces.remove(&nsid).is_none() {
385            return false;
386        }
387
388        if let Some(state) = state {
389            state.remove_namespace(nsid).await;
390        }
391
392        true
393    }
394
395    async fn next_event(&mut self, state: &mut AdminState) -> Result<Event, QueueError> {
396        let event = loop {
397            // Wait for there to be room for a completion for the next
398            // command or the completed sq deletion.
399            poll_fn(|cx| state.admin_cq.poll_ready(cx)).await?;
400
401            // Fire the changed-namespace AEN only when the host has
402            // enabled the Attached Namespace Attribute Notices class via
403            // Set Features 0Bh (NVMe Base 2.0c section 5.21.1.11 /
404            // Base 2.3 section 5.2.26.1.5, CDW11 bit 8). Per spec,
405            // "If this bit is cleared to '0', then the controller shall
406            // not send the Attached Namespace Attribute Changed
407            // asynchronous event to the host." The mask defaults to all
408            // bits set, so this only suppresses delivery when the host
409            // has explicitly opted out via Set Features.
410            let ns_aen_enabled = spec::Cdw11FeatureAsyncEventConfig::from(state.async_event_config)
411                .namespace_attribute_notices();
412
413            if !state.changed_namespaces.is_empty()
414                && !state.notified_changed_namespaces
415                && ns_aen_enabled
416            {
417                if let Some(cid) = state.asynchronous_event_requests.pop() {
418                    state.admin_cq.write(
419                        spec::Completion {
420                            dw0: spec::AsynchronousEventRequestDw0::new()
421                                .with_event_type(spec::AsynchronousEventType::NOTICE.0)
422                                .with_log_page_identifier(spec::LogPageIdentifier::CHANGED_NAMESPACE_LIST.0)
423                                .with_information(spec::AsynchronousEventInformationNotice::NAMESPACE_ATTRIBUTE_CHANGED.0)
424                                .into(),
425                            dw1: 0,
426                            sqhd: state.admin_sq.sqhd(),
427                            sqid: 0,
428                            cid,
429                            status: spec::CompletionStatus::new(),
430                        },
431                    )?;
432
433                    state.notified_changed_namespaces = true;
434                    continue;
435                }
436            }
437
438            let next_command = poll_fn(|cx| state.admin_sq.poll_next(cx)).map(Event::Command);
439            let sq_delete_complete = async {
440                let Some(sqid) = state.sq_delete_response.next().await else {
441                    pending().await
442                };
443                Event::SqDeleteComplete(sqid)
444            };
445            let changed_namespace = async {
446                let Some(nsid) = state.recv_changed_namespace.next().await else {
447                    pending().await
448                };
449                Event::NamespaceChange(nsid)
450            };
451
452            break (next_command, sq_delete_complete, changed_namespace)
453                .race()
454                .await;
455        };
456        Ok(event)
457    }
458
459    async fn process_event(
460        &mut self,
461        state: &mut AdminState,
462        event: Result<Event, QueueError>,
463    ) -> Result<(), QueueError> {
464        let (cid, result) = match event? {
465            Event::Command(command) => {
466                let command = command?;
467                let opcode = spec::AdminOpcode(command.cdw0.opcode());
468
469                tracing::debug!(?opcode, ?command, "command");
470
471                let result = match opcode {
472                    spec::AdminOpcode::IDENTIFY => self
473                        .handle_identify(state, &command)
474                        .map(|()| Some(Default::default())),
475                    spec::AdminOpcode::GET_FEATURES => {
476                        self.handle_get_features(state, &command).await.map(Some)
477                    }
478                    spec::AdminOpcode::SET_FEATURES => {
479                        self.handle_set_features(state, &command).map(Some)
480                    }
481                    spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE => self
482                        .handle_create_io_completion_queue(state, &command)
483                        .map(|()| Some(Default::default())),
484                    spec::AdminOpcode::CREATE_IO_SUBMISSION_QUEUE => self
485                        .handle_create_io_submission_queue(state, &command)
486                        .await
487                        .map(|()| Some(Default::default())),
488                    spec::AdminOpcode::DELETE_IO_COMPLETION_QUEUE => self
489                        .handle_delete_io_completion_queue(state, &command)
490                        .await
491                        .map(|()| Some(Default::default())),
492                    spec::AdminOpcode::DELETE_IO_SUBMISSION_QUEUE => {
493                        self.handle_delete_io_submission_queue(state, &command)
494                            .await
495                    }
496                    spec::AdminOpcode::ASYNCHRONOUS_EVENT_REQUEST => {
497                        self.handle_asynchronous_event_request(state, &command)
498                    }
499                    spec::AdminOpcode::ABORT => self.handle_abort(),
500                    spec::AdminOpcode::GET_LOG_PAGE => self
501                        .handle_get_log_page(state, &command)
502                        .map(|()| Some(Default::default())),
503                    spec::AdminOpcode::DOORBELL_BUFFER_CONFIG
504                        if self.supports_shadow_doorbells(state) =>
505                    {
506                        self.handle_doorbell_buffer_config(state, &command)
507                            .await
508                            .map(|()| Some(Default::default()))
509                    }
510                    opcode => {
511                        tracelimit::warn_ratelimited!(?opcode, "unsupported opcode");
512                        Err(spec::Status::INVALID_COMMAND_OPCODE.into())
513                    }
514                };
515
516                let result = match result {
517                    Ok(Some(cr)) => cr,
518                    Ok(None) => return Ok(()),
519                    Err(err) => {
520                        tracelimit::warn_ratelimited!(
521                            error = &err as &dyn std::error::Error,
522                            cid = command.cdw0.cid(),
523                            ?opcode,
524                            "command error"
525                        );
526                        err.into()
527                    }
528                };
529
530                (command.cdw0.cid(), result)
531            }
532            Event::SqDeleteComplete(sqid) => {
533                let sq = state.io_sqs[sqid as usize - 1].take().unwrap();
534                let cid = sq.pending_delete_cid.unwrap();
535                (cid, Default::default())
536            }
537            Event::NamespaceChange(nsid) => {
538                state.add_changed_namespace(nsid);
539                return Ok(());
540            }
541        };
542
543        let status = spec::CompletionStatus::new().with_status(result.status.0);
544
545        let completion = spec::Completion {
546            dw0: result.dw[0],
547            dw1: result.dw[1],
548            sqid: 0,
549            sqhd: state.admin_sq.sqhd(),
550            status,
551            cid,
552        };
553
554        state.admin_cq.write(completion)?;
555        Ok(())
556    }
557
558    fn handle_identify(
559        &mut self,
560        state: &AdminState,
561        command: &spec::Command,
562    ) -> Result<(), NvmeError> {
563        let cdw10: spec::Cdw10Identify = command.cdw10.into();
564        // All identify results are 4096 bytes.
565        let mut buf = [0u64; 512];
566        let buf = buf.as_mut_bytes();
567        match spec::Cns(cdw10.cns()) {
568            spec::Cns::CONTROLLER => {
569                let id = spec::IdentifyController::mut_from_prefix(buf).unwrap().0; // TODO: zerocopy: from-prefix (mut_from_prefix): use-rest-of-range (https://github.com/microsoft/openvmm/issues/759)
570                *id = self.identify_controller(state);
571
572                write!(
573                    Cursor::new(&mut id.subnqn[..]),
574                    "nqn.2014-08.org.nvmexpress:uuid:{}",
575                    self.config.subsystem_id
576                )
577                .unwrap();
578            }
579            spec::Cns::ACTIVE_NAMESPACES => {
580                if command.nsid >= 0xfffffffe {
581                    return Err(spec::Status::INVALID_NAMESPACE_OR_FORMAT.into());
582                }
583                let nsids = <[u32]>::mut_from_bytes(buf).unwrap();
584                for (ns, nsid) in self
585                    .namespaces
586                    .keys()
587                    .filter(|&ns| *ns > command.nsid)
588                    .zip(nsids)
589                {
590                    *nsid = *ns;
591                }
592            }
593            spec::Cns::NAMESPACE => {
594                if let Some(ns) = self.namespaces.get(&command.nsid) {
595                    ns.identify(buf);
596                } else {
597                    tracelimit::warn_ratelimited!(nsid = command.nsid, "unknown namespace id");
598                }
599            }
600            spec::Cns::DESCRIPTOR_NAMESPACE => {
601                if let Some(ns) = self.namespaces.get(&command.nsid) {
602                    ns.namespace_id_descriptor(buf);
603                } else {
604                    tracelimit::warn_ratelimited!(nsid = command.nsid, "unknown namespace id");
605                }
606            }
607            cns => {
608                tracelimit::warn_ratelimited!(?cns, "unsupported cns");
609                return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
610            }
611        };
612        PrpRange::parse(&self.config.mem, buf.len(), command.dptr)?.write(&self.config.mem, buf)?;
613        Ok(())
614    }
615
616    fn identify_controller(&self, state: &AdminState) -> spec::IdentifyController {
617        spec::IdentifyController {
618            vid: VENDOR_ID,
619            ssvid: VENDOR_ID,
620            mdts: (MAX_DATA_TRANSFER_SIZE / PAGE_SIZE).trailing_zeros() as u8,
621            ver: NVME_VERSION,
622            rtd3r: 400000,
623            rtd3e: 400000,
624            sqes: spec::QueueEntrySize::new()
625                .with_min(IOSQES)
626                .with_max(IOSQES),
627            cqes: spec::QueueEntrySize::new()
628                .with_min(IOCQES)
629                .with_max(IOCQES),
630            frmw: spec::FirmwareUpdates::new().with_ffsro(true).with_nofs(1),
631            nn: self.namespaces.keys().copied().max().unwrap_or(0),
632            ieee: [0x74, 0xe2, 0x8c], // Microsoft
633            fr: (*b"v1.00000").into(),
634            mn: (*b"MSFT NVMe Accelerator v1.0              ").into(),
635            sn: (*b"SN: 000001          ").into(),
636            aerl: MAX_ASYNC_EVENT_REQUESTS - 1,
637            elpe: ERROR_LOG_PAGE_ENTRIES - 1,
638            oaes: spec::Oaes::new().with_namespace_attribute(true),
639            oncs: spec::Oncs::new()
640                .with_dataset_management(true)
641                // Namespaces still have to opt in individually via `rescap`.
642                .with_reservations(true),
643            vwc: spec::VolatileWriteCache::new()
644                .with_present(true)
645                .with_broadcast_flush_behavior(spec::BroadcastFlushBehavior::NOT_SUPPORTED.0),
646            cntrltype: spec::ControllerType::IO_CONTROLLER,
647            oacs: spec::OptionalAdminCommandSupport::new()
648                .with_doorbell_buffer_config(self.supports_shadow_doorbells(state)),
649            ..FromZeros::new_zeroed()
650        }
651    }
652
653    fn handle_set_features(
654        &mut self,
655        state: &mut AdminState,
656        command: &spec::Command,
657    ) -> Result<CommandResult, NvmeError> {
658        let cdw10: spec::Cdw10SetFeatures = command.cdw10.into();
659        let mut dw = [0; 2];
660        // Note that we don't support non-zero cdw10.save, since ONCS.save == 0.
661        match spec::Feature(cdw10.fid()) {
662            spec::Feature::NUMBER_OF_QUEUES => {
663                if state.io_sqs.iter().any(|sq| sq.is_some())
664                    || state.io_cqs.iter().any(|cq| cq.task.has_state())
665                {
666                    return Err(spec::Status::COMMAND_SEQUENCE_ERROR.into());
667                }
668                let cdw11: spec::Cdw11FeatureNumberOfQueues = command.cdw11.into();
669                if cdw11.ncq_z() == u16::MAX || cdw11.nsq_z() == u16::MAX {
670                    return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
671                }
672                let num_sqs = (cdw11.nsq_z() + 1).min(self.config.max_sqs);
673                let num_cqs = (cdw11.ncq_z() + 1).min(self.config.max_cqs);
674                state.set_max_queues(self, num_sqs, num_cqs);
675
676                dw[0] = spec::Cdw11FeatureNumberOfQueues::new()
677                    .with_ncq_z(num_cqs - 1)
678                    .with_nsq_z(num_sqs - 1)
679                    .into();
680            }
681            spec::Feature::VOLATILE_WRITE_CACHE => {
682                let cdw11 = spec::Cdw11FeatureVolatileWriteCache::from(command.cdw11);
683                if !cdw11.wce() {
684                    tracelimit::warn_ratelimited!(
685                        "ignoring unsupported attempt to disable write cache"
686                    );
687                }
688            }
689            spec::Feature::ASYNC_EVENT_CONFIG => {
690                // The Asynchronous Event Configuration feature is mandatory
691                // for I/O controllers per the NVMe Base specification's
692                // Feature Support Requirements table (Base 2.0c section
693                // 3.1.2.1.1 / Base 2.3 section 3.1.3.6). The host sets bits
694                // in CDW11 to enable each class of asynchronous event
695                // notification. We store the value verbatim; Get Features
696                // echoes it back, and the AEN dispatch loop consults the
697                // relevant bits before firing each notification class.
698                state.async_event_config = command.cdw11;
699            }
700            feature => {
701                tracelimit::warn_ratelimited!(?feature, "unsupported feature");
702                return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
703            }
704        }
705        Ok(CommandResult::new(spec::Status::SUCCESS, dw))
706    }
707
708    async fn handle_get_features(
709        &mut self,
710        state: &mut AdminState,
711        command: &spec::Command,
712    ) -> Result<CommandResult, NvmeError> {
713        let cdw10: spec::Cdw10GetFeatures = command.cdw10.into();
714        let mut dw = [0; 2];
715
716        // Note that we don't support non-zero cdw10.sel, since ONCS.save == 0.
717        match spec::Feature(cdw10.fid()) {
718            spec::Feature::NUMBER_OF_QUEUES => {
719                let num_cqs = state.io_cqs.len();
720                let num_sqs = state.io_sqs.len();
721                dw[0] = spec::Cdw11FeatureNumberOfQueues::new()
722                    .with_ncq_z((num_cqs - 1) as u16)
723                    .with_nsq_z((num_sqs - 1) as u16)
724                    .into();
725            }
726            spec::Feature::VOLATILE_WRITE_CACHE => {
727                // Write cache is always enabled.
728                dw[0] = spec::Cdw11FeatureVolatileWriteCache::new()
729                    .with_wce(true)
730                    .into();
731            }
732            spec::Feature::ASYNC_EVENT_CONFIG => {
733                // Echo back the most recently configured mask. The cache
734                // is initialized to all bits set (refer to
735                // [`AdminState::new`]) so that a host which never issues
736                // Set Features 0Bh still sees every notification class
737                // reported as enabled, preserving the pre-existing
738                // behavior of unconditional AEN delivery.
739                dw[0] = state.async_event_config;
740            }
741            spec::Feature::NVM_RESERVATION_PERSISTENCE => {
742                let namespace = self
743                    .namespaces
744                    .get(&command.nsid)
745                    .ok_or(spec::Status::INVALID_NAMESPACE_OR_FORMAT)?;
746
747                return namespace.get_feature(command).await;
748            }
749            feature => {
750                tracelimit::warn_ratelimited!(?feature, "unsupported feature");
751                return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
752            }
753        }
754        Ok(CommandResult::new(spec::Status::SUCCESS, dw))
755    }
756
757    fn handle_create_io_completion_queue(
758        &mut self,
759        state: &mut AdminState,
760        command: &spec::Command,
761    ) -> Result<(), NvmeError> {
762        let cdw10: spec::Cdw10CreateIoQueue = command.cdw10.into();
763        let cdw11: spec::Cdw11CreateIoCompletionQueue = command.cdw11.into();
764        if !cdw11.pc() {
765            return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
766        }
767        let cqid = cdw10.qid();
768        let cq = state
769            .io_cqs
770            .get_mut((cqid as usize).wrapping_sub(1))
771            .ok_or(InvalidQueueIdentifier {
772                qid: cqid,
773                reason: InvalidQueueIdentifierReason::Oob,
774            })?;
775
776        if cq.task.has_state() {
777            return Err(InvalidQueueIdentifier {
778                qid: cqid,
779                reason: InvalidQueueIdentifierReason::InUse,
780            }
781            .into());
782        }
783
784        let interrupt = if cdw11.ien() {
785            let iv = cdw11.iv();
786            if iv as usize >= self.config.interrupts.len() {
787                return Err(spec::Status::INVALID_INTERRUPT_VECTOR.into());
788            };
789            Some(iv)
790        } else {
791            None
792        };
793        let gpa = command.dptr[0] & PAGE_MASK;
794        let len0 = cdw10.qsize_z();
795        if len0 == 0 || len0 >= MAX_QES || self.config.qe_sizes.lock().cqe_bits != IOCQES {
796            return Err(spec::Status::INVALID_QUEUE_SIZE.into());
797        }
798
799        let interrupt = interrupt.map(|iv| self.config.interrupts[iv as usize].clone());
800        let namespaces = self.namespaces.clone();
801
802        let state = IoState::new(
803            &self.config.mem,
804            self.config.doorbells.clone(),
805            gpa,
806            len0 + 1,
807            cqid,
808            interrupt,
809            namespaces,
810        );
811
812        cq.task.insert(&cq.driver, "nvme-io", state);
813        cq.task.start();
814        Ok(())
815    }
816
817    async fn handle_create_io_submission_queue(
818        &mut self,
819        state: &mut AdminState,
820        command: &spec::Command,
821    ) -> Result<(), NvmeError> {
822        let cdw10: spec::Cdw10CreateIoQueue = command.cdw10.into();
823        let cdw11: spec::Cdw11CreateIoSubmissionQueue = command.cdw11.into();
824        if !cdw11.pc() {
825            return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
826        }
827        let sqid = cdw10.qid();
828        let sq = state
829            .io_sqs
830            .get_mut((sqid as usize).wrapping_sub(1))
831            .ok_or(InvalidQueueIdentifier {
832                qid: sqid,
833                reason: InvalidQueueIdentifierReason::Oob,
834            })?;
835
836        if sq.is_some() {
837            return Err(InvalidQueueIdentifier {
838                qid: sqid,
839                reason: InvalidQueueIdentifierReason::InUse,
840            }
841            .into());
842        }
843
844        let cqid = cdw11.cqid();
845        let cq = state
846            .io_cqs
847            .get_mut((cqid as usize).wrapping_sub(1))
848            .ok_or(spec::Status::COMPLETION_QUEUE_INVALID)?;
849
850        if !cq.task.has_state() {
851            return Err(spec::Status::COMPLETION_QUEUE_INVALID.into());
852        }
853
854        let sq_gpa = command.dptr[0] & PAGE_MASK;
855        let len0 = cdw10.qsize_z();
856        if len0 == 0 || len0 >= MAX_QES || self.config.qe_sizes.lock().sqe_bits != IOSQES {
857            return Err(spec::Status::INVALID_QUEUE_SIZE.into());
858        }
859
860        let running = cq.task.stop().await;
861        let sq_idx = cq
862            .task
863            .state_mut()
864            .unwrap()
865            .create_sq(sqid, sq_gpa, len0 + 1);
866        if running {
867            cq.task.start();
868        }
869        *sq = Some(IoSq {
870            sq_idx,
871            pending_delete_cid: None,
872            cqid,
873        });
874        Ok(())
875    }
876
877    async fn handle_delete_io_submission_queue(
878        &self,
879        state: &mut AdminState,
880        command: &spec::Command,
881    ) -> Result<Option<CommandResult>, NvmeError> {
882        let cdw10: spec::Cdw10DeleteIoQueue = command.cdw10.into();
883        let sqid = cdw10.qid();
884        let sq = state
885            .io_sqs
886            .get_mut((sqid as usize).wrapping_sub(1))
887            .ok_or(InvalidQueueIdentifier {
888                qid: sqid,
889                reason: InvalidQueueIdentifierReason::Oob,
890            })?
891            .as_mut()
892            .ok_or(InvalidQueueIdentifier {
893                qid: sqid,
894                reason: InvalidQueueIdentifierReason::NotInUse,
895            })?;
896
897        if sq.pending_delete_cid.is_some() {
898            return Err(InvalidQueueIdentifier {
899                qid: sqid,
900                reason: InvalidQueueIdentifierReason::NotInUse,
901            }
902            .into());
903        }
904
905        let cq = &mut state.io_cqs[(sq.cqid as usize).wrapping_sub(1)];
906        let running = cq.task.stop().await;
907        cq.task.state_mut().unwrap().delete_sq(sq.sq_idx);
908        if running {
909            cq.task.start();
910        }
911        sq.pending_delete_cid = Some(command.cdw0.cid());
912        Ok(None)
913    }
914
915    async fn handle_delete_io_completion_queue(
916        &self,
917        state: &mut AdminState,
918        command: &spec::Command,
919    ) -> Result<(), NvmeError> {
920        let cdw10: spec::Cdw10DeleteIoQueue = command.cdw10.into();
921        let cqid = cdw10.qid();
922        let cq = state
923            .io_cqs
924            .get_mut((cqid as usize).wrapping_sub(1))
925            .ok_or(InvalidQueueIdentifier {
926                qid: cqid,
927                reason: InvalidQueueIdentifierReason::Oob,
928            })?;
929
930        if !cq.task.has_state() {
931            return Err(InvalidQueueIdentifier {
932                qid: cqid,
933                reason: InvalidQueueIdentifierReason::NotInUse,
934            }
935            .into());
936        }
937        let running = cq.task.stop().await;
938        if cq.task.state().unwrap().has_sqs() {
939            if running {
940                cq.task.start();
941            }
942            return Err(spec::Status::INVALID_QUEUE_DELETION.into());
943        }
944        cq.task.remove();
945        Ok(())
946    }
947
948    fn handle_asynchronous_event_request(
949        &self,
950        state: &mut AdminState,
951        command: &spec::Command,
952    ) -> Result<Option<CommandResult>, NvmeError> {
953        if state.asynchronous_event_requests.len() >= MAX_ASYNC_EVENT_REQUESTS as usize {
954            return Err(spec::Status::ASYNCHRONOUS_EVENT_REQUEST_LIMIT_EXCEEDED.into());
955        }
956        state.asynchronous_event_requests.push(command.cdw0.cid());
957        Ok(None)
958    }
959
960    /// Abort is a required command, but a legal implementation is to just
961    /// complete it with a status that means "I'm sorry, that command couldn't
962    /// be aborted."
963    fn handle_abort(&self) -> Result<Option<CommandResult>, NvmeError> {
964        Ok(Some(CommandResult {
965            status: spec::Status::SUCCESS,
966            dw: [1, 0],
967        }))
968    }
969
970    fn handle_get_log_page(
971        &self,
972        state: &mut AdminState,
973        command: &spec::Command,
974    ) -> Result<(), NvmeError> {
975        let cdw10 = spec::Cdw10GetLogPage::from(command.cdw10);
976        let cdw11 = spec::Cdw11GetLogPage::from(command.cdw11);
977        let numd =
978            ((cdw10.numdl_z() as u32) | ((cdw11.numdu() as u32) << 16)).saturating_add(1) as usize;
979        let len = numd * 4;
980        let prp = PrpRange::parse(&self.config.mem, len, command.dptr)?;
981
982        match spec::LogPageIdentifier(cdw10.lid()) {
983            spec::LogPageIdentifier::ERROR_INFORMATION => {
984                // Write empty log entries.
985                prp.zero(
986                    &self.config.mem,
987                    len.min(ERROR_LOG_PAGE_ENTRIES as usize * 64),
988                )?;
989            }
990            spec::LogPageIdentifier::HEALTH_INFORMATION => {
991                if command.nsid != !0 {
992                    return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
993                }
994                // Write an empty page.
995                prp.zero(&self.config.mem, len.min(512))?;
996            }
997            spec::LogPageIdentifier::FIRMWARE_SLOT_INFORMATION => {
998                // Write an empty page.
999                prp.zero(&self.config.mem, len.min(512))?;
1000            }
1001            spec::LogPageIdentifier::CHANGED_NAMESPACE_LIST => {
1002                // Zero the whole list.
1003                prp.zero(&self.config.mem, len.min(4096))?;
1004                // Now write in the changed namespaces.
1005                if state.changed_namespaces.len() > 1024 {
1006                    // Too many to fit, write !0 so the driver scans everything.
1007                    prp.write(&self.config.mem, (!0u32).as_bytes())?;
1008                } else {
1009                    let count = state.changed_namespaces.len().min(numd);
1010                    prp.write(
1011                        &self.config.mem,
1012                        state.changed_namespaces[..count].as_bytes(),
1013                    )?;
1014                }
1015                state.changed_namespaces.clear();
1016                if !cdw10.rae() {
1017                    state.notified_changed_namespaces = false;
1018                }
1019            }
1020            lid => {
1021                tracelimit::warn_ratelimited!(?lid, "unsupported log page");
1022                return Err(spec::Status::INVALID_LOG_PAGE.into());
1023            }
1024        }
1025
1026        Ok(())
1027    }
1028
1029    fn supports_shadow_doorbells(&self, state: &AdminState) -> bool {
1030        let num_queues = state.io_sqs.len().max(state.io_cqs.len()) + 1;
1031        let len = num_queues * (2 << DOORBELL_STRIDE_BITS);
1032        // The spec only allows a single shadow doorbell page.
1033        len <= PAGE_SIZE
1034    }
1035
1036    async fn handle_doorbell_buffer_config(
1037        &self,
1038        state: &mut AdminState,
1039        command: &spec::Command,
1040    ) -> Result<(), NvmeError> {
1041        // Validated by caller.
1042        assert!(self.supports_shadow_doorbells(state));
1043
1044        let shadow_db_gpa = command.dptr[0];
1045        let event_idx_gpa = command.dptr[1];
1046        if (shadow_db_gpa | event_idx_gpa) & !PAGE_MASK != 0 {
1047            return Err(NvmeError::from(spec::Status::INVALID_FIELD_IN_COMMAND));
1048        }
1049
1050        self.config
1051            .doorbells
1052            .write()
1053            .replace_mem(self.config.mem.clone(), shadow_db_gpa, Some(event_idx_gpa))
1054            .map_err(|err| NvmeError::new(spec::Status::DATA_TRANSFER_ERROR, err))?;
1055
1056        Ok(())
1057    }
1058}
1059
1060impl AsyncRun<AdminState> for AdminHandler {
1061    async fn run(
1062        &mut self,
1063        stop: &mut StopTask<'_>,
1064        state: &mut AdminState,
1065    ) -> Result<(), Cancelled> {
1066        loop {
1067            let event = stop.until_stopped(self.next_event(state)).await?;
1068            if let Err(err) = self.process_event(state, event).await {
1069                tracing::error!(
1070                    error = &err as &dyn std::error::Error,
1071                    "admin queue failure"
1072                );
1073                break;
1074            }
1075        }
1076        Ok(())
1077    }
1078}
1079
1080impl InspectTask<AdminState> for AdminHandler {
1081    fn inspect(&self, req: inspect::Request<'_>, state: Option<&AdminState>) {
1082        req.respond().merge(self).merge(state);
1083    }
1084}