nvme/workers/
admin.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Admin queue handler.
5
6use super::IoQueueEntrySizes;
7use super::MAX_DATA_TRANSFER_SIZE;
8use super::io::IoHandler;
9use super::io::IoState;
10use crate::DOORBELL_STRIDE_BITS;
11use crate::MAX_QES;
12use crate::NVME_VERSION;
13use crate::PAGE_MASK;
14use crate::PAGE_SIZE;
15use crate::VENDOR_ID;
16use crate::error::CommandResult;
17use crate::error::NvmeError;
18use crate::namespace::Namespace;
19use crate::prp::PrpRange;
20use crate::queue::CompletionQueue;
21use crate::queue::DoorbellMemory;
22use crate::queue::QueueError;
23use crate::queue::SubmissionQueue;
24use crate::spec;
25use disk_backend::Disk;
26use futures::FutureExt;
27use futures::SinkExt;
28use futures::StreamExt;
29use futures_concurrency::future::Race;
30use guestmem::GuestMemory;
31use guid::Guid;
32use inspect::Inspect;
33use pal_async::task::Spawn;
34use pal_async::task::Task;
35use parking_lot::Mutex;
36use parking_lot::RwLock;
37use std::collections::BTreeMap;
38use std::collections::btree_map;
39use std::future::pending;
40use std::future::poll_fn;
41use std::io::Cursor;
42use std::io::Write;
43use std::sync::Arc;
44use task_control::AsyncRun;
45use task_control::Cancelled;
46use task_control::InspectTask;
47use task_control::StopTask;
48use task_control::TaskControl;
49use thiserror::Error;
50use vmcore::interrupt::Interrupt;
51use vmcore::vm_task::VmTaskDriver;
52use vmcore::vm_task::VmTaskDriverSource;
53use zerocopy::FromBytes;
54use zerocopy::FromZeros;
55use zerocopy::IntoBytes;
56
57const IOSQES: u8 = 6;
58const IOCQES: u8 = 4;
59const MAX_ASYNC_EVENT_REQUESTS: u8 = 4; // minimum recommended by spec
60const ERROR_LOG_PAGE_ENTRIES: u8 = 1;
61
62#[derive(Inspect)]
63pub struct AdminConfig {
64    #[inspect(skip)]
65    pub driver_source: VmTaskDriverSource,
66    #[inspect(skip)]
67    pub mem: GuestMemory,
68    #[inspect(skip)]
69    pub interrupts: Vec<Interrupt>,
70    #[inspect(skip)]
71    pub doorbells: Arc<RwLock<DoorbellMemory>>,
72    #[inspect(display)]
73    pub subsystem_id: Guid,
74    pub max_sqs: u16,
75    pub max_cqs: u16,
76    pub qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
77}
78
79#[derive(Inspect)]
80pub struct AdminHandler {
81    driver: VmTaskDriver,
82    config: AdminConfig,
83    #[inspect(iter_by_key)]
84    namespaces: BTreeMap<u32, Arc<Namespace>>,
85}
86
87#[derive(Inspect)]
88pub struct AdminState {
89    admin_sq: SubmissionQueue,
90    admin_cq: CompletionQueue,
91    #[inspect(with = "|x| inspect::iter_by_index(x).map_key(|x| x + 1)")]
92    io_sqs: Vec<Option<IoSq>>,
93    #[inspect(with = "|x| inspect::iter_by_index(x).map_key(|x| x + 1)")]
94    io_cqs: Vec<IoCq>,
95    #[inspect(skip)]
96    sq_delete_response: mesh::Receiver<u16>,
97    #[inspect(iter_by_index)]
98    asynchronous_event_requests: Vec<u16>,
99    #[inspect(
100        rename = "namespaces",
101        with = "|x| inspect::iter_by_key(x.iter().map(|v| (v, ChangedNamespace { changed: true })))"
102    )]
103    changed_namespaces: Vec<u32>,
104    notified_changed_namespaces: bool,
105    #[inspect(skip)]
106    recv_changed_namespace: futures::channel::mpsc::Receiver<u32>,
107    #[inspect(skip)]
108    send_changed_namespace: futures::channel::mpsc::Sender<u32>,
109    #[inspect(skip)]
110    poll_namespace_change: BTreeMap<u32, Task<()>>,
111}
112
113#[derive(Inspect)]
114struct ChangedNamespace {
115    changed: bool,
116}
117
118#[derive(Inspect)]
119struct IoSq {
120    pending_delete_cid: Option<u16>,
121    sq_idx: usize,
122    cqid: u16,
123}
124
125#[derive(Inspect)]
126struct IoCq {
127    driver: VmTaskDriver,
128    #[inspect(flatten)]
129    task: TaskControl<IoHandler, IoState>,
130}
131
132impl AdminState {
133    pub fn new(handler: &AdminHandler, asq: u64, asqs: u16, acq: u64, acqs: u16) -> Self {
134        // Start polling for namespace changes. Use a bounded channel to avoid
135        // unbounded memory allocation when the queue is stuck.
136        #[expect(clippy::disallowed_methods)] // TODO
137        let (send_changed_namespace, recv_changed_namespace) = futures::channel::mpsc::channel(256);
138        let poll_namespace_change = handler
139            .namespaces
140            .iter()
141            .map(|(&nsid, namespace)| {
142                (
143                    nsid,
144                    spawn_namespace_notifier(
145                        &handler.driver,
146                        nsid,
147                        namespace.clone(),
148                        send_changed_namespace.clone(),
149                    ),
150                )
151            })
152            .collect();
153
154        let admin_cq = CompletionQueue::new(
155            handler.config.doorbells.clone(),
156            1,
157            handler.config.mem.clone(),
158            Some(handler.config.interrupts[0].clone()),
159            acq,
160            acqs,
161        );
162        let mut state = Self {
163            admin_sq: SubmissionQueue::new(&admin_cq, 0, asq, asqs),
164            admin_cq,
165            io_sqs: Vec::new(),
166            io_cqs: Vec::new(),
167            sq_delete_response: Default::default(),
168            asynchronous_event_requests: Vec::new(),
169            changed_namespaces: Vec::new(),
170            notified_changed_namespaces: false,
171            recv_changed_namespace,
172            send_changed_namespace,
173            poll_namespace_change,
174        };
175        state.set_max_queues(handler, handler.config.max_sqs, handler.config.max_cqs);
176        state
177    }
178
179    /// Stops all submission queues and drains them of any pending IO.
180    ///
181    /// This future may be dropped and reissued.
182    pub async fn drain(&mut self) {
183        for cq in &mut self.io_cqs {
184            cq.task.stop().await;
185            if let Some(state) = cq.task.state_mut() {
186                state.drain().await;
187                cq.task.remove();
188            }
189        }
190    }
191
192    /// Caller must ensure that no queues are active.
193    fn set_max_queues(&mut self, handler: &AdminHandler, num_sqs: u16, num_cqs: u16) {
194        self.io_sqs.truncate(num_sqs.into());
195        self.io_sqs.resize_with(num_sqs.into(), || None);
196        self.io_cqs.resize_with(num_cqs.into(), || {
197            // This driver doesn't explicitly do any IO (that's handled by
198            // the storage backends), so the target VP doesn't matter. But
199            // set it anyway as a hint to the backend that this queue needs
200            // its own thread.
201            let driver = handler
202                .config
203                .driver_source
204                .builder()
205                .run_on_target(false)
206                .target_vp(0)
207                .build("nvme");
208
209            IoCq {
210                driver,
211                task: TaskControl::new(IoHandler::new(
212                    handler.config.mem.clone(),
213                    self.sq_delete_response.sender(),
214                )),
215            }
216        });
217    }
218
219    fn add_changed_namespace(&mut self, nsid: u32) {
220        if let Err(i) = self.changed_namespaces.binary_search(&nsid) {
221            self.changed_namespaces.insert(i, nsid);
222        }
223    }
224
225    async fn add_namespace(
226        &mut self,
227        driver: &VmTaskDriver,
228        nsid: u32,
229        namespace: &Arc<Namespace>,
230    ) {
231        // Update the IO queues.
232        for cq in &mut self.io_cqs {
233            let io_running = cq.task.stop().await;
234            if let Some(io_state) = cq.task.state_mut() {
235                io_state.add_namespace(nsid, namespace.clone());
236            }
237            if io_running {
238                cq.task.start();
239            }
240        }
241
242        // Start polling.
243        let old = self.poll_namespace_change.insert(
244            nsid,
245            spawn_namespace_notifier(
246                driver,
247                nsid,
248                namespace.clone(),
249                self.send_changed_namespace.clone(),
250            ),
251        );
252        assert!(old.is_none());
253
254        // Notify the guest driver of the change.
255        self.add_changed_namespace(nsid);
256    }
257
258    async fn remove_namespace(&mut self, nsid: u32) {
259        // Update the IO queues.
260        for cq in &mut self.io_cqs {
261            let io_running = cq.task.stop().await;
262            if let Some(io_state) = cq.task.state_mut() {
263                io_state.remove_namespace(nsid);
264            }
265            if io_running {
266                cq.task.start();
267            }
268        }
269
270        // Stop polling.
271        self.poll_namespace_change
272            .remove(&nsid)
273            .unwrap()
274            .cancel()
275            .await;
276
277        // Notify the guest driver of the change.
278        self.add_changed_namespace(nsid);
279    }
280}
281
282fn spawn_namespace_notifier(
283    driver: &VmTaskDriver,
284    nsid: u32,
285    namespace: Arc<Namespace>,
286    mut send_changed_namespace: futures::channel::mpsc::Sender<u32>,
287) -> Task<()> {
288    driver.spawn("wait_resize", async move {
289        let mut counter = None;
290        loop {
291            counter = Some(namespace.wait_change(counter).await);
292            tracing::info!(nsid, "namespace changed");
293            if send_changed_namespace.send(nsid).await.is_err() {
294                break;
295            }
296        }
297    })
298}
299
300#[derive(Debug, Error)]
301#[error("invalid queue identifier {qid}")]
302struct InvalidQueueIdentifier {
303    qid: u16,
304    #[source]
305    reason: InvalidQueueIdentifierReason,
306}
307
308#[derive(Debug, Error)]
309enum InvalidQueueIdentifierReason {
310    #[error("queue id is out of bounds")]
311    Oob,
312    #[error("queue id is in use")]
313    InUse,
314    #[error("queue id is not in use")]
315    NotInUse,
316}
317
318impl From<InvalidQueueIdentifier> for NvmeError {
319    fn from(err: InvalidQueueIdentifier) -> Self {
320        Self::new(spec::Status::INVALID_QUEUE_IDENTIFIER, err)
321    }
322}
323
324enum Event {
325    Command(Result<spec::Command, QueueError>),
326    SqDeleteComplete(u16),
327    NamespaceChange(u32),
328}
329
330/// Error returned when adding a namespace with a conflicting ID.
331#[derive(Debug, Error)]
332#[error("namespace id conflict for {0}")]
333pub struct NsidConflict(u32);
334
335impl AdminHandler {
336    pub fn new(driver: VmTaskDriver, config: AdminConfig) -> Self {
337        Self {
338            driver,
339            config,
340            namespaces: Default::default(),
341        }
342    }
343
344    pub async fn add_namespace(
345        &mut self,
346        state: Option<&mut AdminState>,
347        nsid: u32,
348        disk: Disk,
349    ) -> Result<(), NsidConflict> {
350        let namespace = &*match self.namespaces.entry(nsid) {
351            btree_map::Entry::Vacant(entry) => entry.insert(Arc::new(Namespace::new(
352                self.config.mem.clone(),
353                nsid,
354                disk,
355            ))),
356            btree_map::Entry::Occupied(_) => return Err(NsidConflict(nsid)),
357        };
358
359        if let Some(state) = state {
360            state.add_namespace(&self.driver, nsid, namespace).await;
361        }
362
363        Ok(())
364    }
365
366    pub async fn remove_namespace(&mut self, state: Option<&mut AdminState>, nsid: u32) -> bool {
367        if self.namespaces.remove(&nsid).is_none() {
368            return false;
369        }
370
371        if let Some(state) = state {
372            state.remove_namespace(nsid).await;
373        }
374
375        true
376    }
377
378    async fn next_event(&mut self, state: &mut AdminState) -> Result<Event, QueueError> {
379        let event = loop {
380            // Wait for there to be room for a completion for the next
381            // command or the completed sq deletion.
382            poll_fn(|cx| state.admin_cq.poll_ready(cx)).await?;
383
384            if !state.changed_namespaces.is_empty() && !state.notified_changed_namespaces {
385                if let Some(cid) = state.asynchronous_event_requests.pop() {
386                    state.admin_cq.write(
387                        spec::Completion {
388                            dw0: spec::AsynchronousEventRequestDw0::new()
389                                .with_event_type(spec::AsynchronousEventType::NOTICE.0)
390                                .with_log_page_identifier(spec::LogPageIdentifier::CHANGED_NAMESPACE_LIST.0)
391                                .with_information(spec::AsynchronousEventInformationNotice::NAMESPACE_ATTRIBUTE_CHANGED.0)
392                                .into(),
393                            dw1: 0,
394                            sqhd: state.admin_sq.sqhd(),
395                            sqid: 0,
396                            cid,
397                            status: spec::CompletionStatus::new(),
398                        },
399                    )?;
400
401                    state.notified_changed_namespaces = true;
402                    continue;
403                }
404            }
405
406            let next_command = poll_fn(|cx| state.admin_sq.poll_next(cx)).map(Event::Command);
407            let sq_delete_complete = async {
408                let Some(sqid) = state.sq_delete_response.next().await else {
409                    pending().await
410                };
411                Event::SqDeleteComplete(sqid)
412            };
413            let changed_namespace = async {
414                let Some(nsid) = state.recv_changed_namespace.next().await else {
415                    pending().await
416                };
417                Event::NamespaceChange(nsid)
418            };
419
420            break (next_command, sq_delete_complete, changed_namespace)
421                .race()
422                .await;
423        };
424        Ok(event)
425    }
426
427    async fn process_event(
428        &mut self,
429        state: &mut AdminState,
430        event: Result<Event, QueueError>,
431    ) -> Result<(), QueueError> {
432        let (cid, result) = match event? {
433            Event::Command(command) => {
434                let command = command?;
435                let opcode = spec::AdminOpcode(command.cdw0.opcode());
436
437                tracing::debug!(?opcode, ?command, "command");
438
439                let result = match opcode {
440                    spec::AdminOpcode::IDENTIFY => self
441                        .handle_identify(state, &command)
442                        .map(|()| Some(Default::default())),
443                    spec::AdminOpcode::GET_FEATURES => {
444                        self.handle_get_features(state, &command).await.map(Some)
445                    }
446                    spec::AdminOpcode::SET_FEATURES => {
447                        self.handle_set_features(state, &command).map(Some)
448                    }
449                    spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE => self
450                        .handle_create_io_completion_queue(state, &command)
451                        .map(|()| Some(Default::default())),
452                    spec::AdminOpcode::CREATE_IO_SUBMISSION_QUEUE => self
453                        .handle_create_io_submission_queue(state, &command)
454                        .await
455                        .map(|()| Some(Default::default())),
456                    spec::AdminOpcode::DELETE_IO_COMPLETION_QUEUE => self
457                        .handle_delete_io_completion_queue(state, &command)
458                        .await
459                        .map(|()| Some(Default::default())),
460                    spec::AdminOpcode::DELETE_IO_SUBMISSION_QUEUE => {
461                        self.handle_delete_io_submission_queue(state, &command)
462                            .await
463                    }
464                    spec::AdminOpcode::ASYNCHRONOUS_EVENT_REQUEST => {
465                        self.handle_asynchronous_event_request(state, &command)
466                    }
467                    spec::AdminOpcode::ABORT => self.handle_abort(),
468                    spec::AdminOpcode::GET_LOG_PAGE => self
469                        .handle_get_log_page(state, &command)
470                        .map(|()| Some(Default::default())),
471                    spec::AdminOpcode::DOORBELL_BUFFER_CONFIG
472                        if self.supports_shadow_doorbells(state) =>
473                    {
474                        self.handle_doorbell_buffer_config(state, &command)
475                            .await
476                            .map(|()| Some(Default::default()))
477                    }
478                    opcode => {
479                        tracelimit::warn_ratelimited!(?opcode, "unsupported opcode");
480                        Err(spec::Status::INVALID_COMMAND_OPCODE.into())
481                    }
482                };
483
484                let result = match result {
485                    Ok(Some(cr)) => cr,
486                    Ok(None) => return Ok(()),
487                    Err(err) => {
488                        tracelimit::warn_ratelimited!(
489                            error = &err as &dyn std::error::Error,
490                            cid = command.cdw0.cid(),
491                            ?opcode,
492                            "command error"
493                        );
494                        err.into()
495                    }
496                };
497
498                (command.cdw0.cid(), result)
499            }
500            Event::SqDeleteComplete(sqid) => {
501                let sq = state.io_sqs[sqid as usize - 1].take().unwrap();
502                let cid = sq.pending_delete_cid.unwrap();
503                (cid, Default::default())
504            }
505            Event::NamespaceChange(nsid) => {
506                state.add_changed_namespace(nsid);
507                return Ok(());
508            }
509        };
510
511        let status = spec::CompletionStatus::new().with_status(result.status.0);
512
513        let completion = spec::Completion {
514            dw0: result.dw[0],
515            dw1: result.dw[1],
516            sqid: 0,
517            sqhd: state.admin_sq.sqhd(),
518            status,
519            cid,
520        };
521
522        state.admin_cq.write(completion)?;
523        Ok(())
524    }
525
526    fn handle_identify(
527        &mut self,
528        state: &AdminState,
529        command: &spec::Command,
530    ) -> Result<(), NvmeError> {
531        let cdw10: spec::Cdw10Identify = command.cdw10.into();
532        // All identify results are 4096 bytes.
533        let mut buf = [0u64; 512];
534        let buf = buf.as_mut_bytes();
535        match spec::Cns(cdw10.cns()) {
536            spec::Cns::CONTROLLER => {
537                let id = spec::IdentifyController::mut_from_prefix(buf).unwrap().0; // TODO: zerocopy: from-prefix (mut_from_prefix): use-rest-of-range (https://github.com/microsoft/openvmm/issues/759)
538                *id = self.identify_controller(state);
539
540                write!(
541                    Cursor::new(&mut id.subnqn[..]),
542                    "nqn.2014-08.org.nvmexpress:uuid:{}",
543                    self.config.subsystem_id
544                )
545                .unwrap();
546            }
547            spec::Cns::ACTIVE_NAMESPACES => {
548                if command.nsid >= 0xfffffffe {
549                    return Err(spec::Status::INVALID_NAMESPACE_OR_FORMAT.into());
550                }
551                let nsids = <[u32]>::mut_from_bytes(buf).unwrap();
552                for (ns, nsid) in self
553                    .namespaces
554                    .keys()
555                    .filter(|&ns| *ns > command.nsid)
556                    .zip(nsids)
557                {
558                    *nsid = *ns;
559                }
560            }
561            spec::Cns::NAMESPACE => {
562                if let Some(ns) = self.namespaces.get(&command.nsid) {
563                    ns.identify(buf);
564                } else {
565                    tracelimit::warn_ratelimited!(nsid = command.nsid, "unknown namespace id");
566                }
567            }
568            spec::Cns::DESCRIPTOR_NAMESPACE => {
569                if let Some(ns) = self.namespaces.get(&command.nsid) {
570                    ns.namespace_id_descriptor(buf);
571                } else {
572                    tracelimit::warn_ratelimited!(nsid = command.nsid, "unknown namespace id");
573                }
574            }
575            cns => {
576                tracelimit::warn_ratelimited!(?cns, "unsupported cns");
577                return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
578            }
579        };
580        PrpRange::parse(&self.config.mem, buf.len(), command.dptr)?.write(&self.config.mem, buf)?;
581        Ok(())
582    }
583
584    fn identify_controller(&self, state: &AdminState) -> spec::IdentifyController {
585        spec::IdentifyController {
586            vid: VENDOR_ID,
587            ssvid: VENDOR_ID,
588            mdts: (MAX_DATA_TRANSFER_SIZE / PAGE_SIZE).trailing_zeros() as u8,
589            ver: NVME_VERSION,
590            rtd3r: 400000,
591            rtd3e: 400000,
592            sqes: spec::QueueEntrySize::new()
593                .with_min(IOSQES)
594                .with_max(IOSQES),
595            cqes: spec::QueueEntrySize::new()
596                .with_min(IOCQES)
597                .with_max(IOCQES),
598            frmw: spec::FirmwareUpdates::new().with_ffsro(true).with_nofs(1),
599            nn: self.namespaces.keys().copied().max().unwrap_or(0),
600            ieee: [0x74, 0xe2, 0x8c], // Microsoft
601            fr: (*b"v1.00000").into(),
602            mn: (*b"MSFT NVMe Accelerator v1.0              ").into(),
603            sn: (*b"SN: 000001          ").into(),
604            aerl: MAX_ASYNC_EVENT_REQUESTS - 1,
605            elpe: ERROR_LOG_PAGE_ENTRIES - 1,
606            oaes: spec::Oaes::new().with_namespace_attribute(true),
607            oncs: spec::Oncs::new()
608                .with_dataset_management(true)
609                // Namespaces still have to opt in individually via `rescap`.
610                .with_reservations(true),
611            vwc: spec::VolatileWriteCache::new()
612                .with_present(true)
613                .with_broadcast_flush_behavior(spec::BroadcastFlushBehavior::NOT_SUPPORTED.0),
614            cntrltype: spec::ControllerType::IO_CONTROLLER,
615            oacs: spec::OptionalAdminCommandSupport::new()
616                .with_doorbell_buffer_config(self.supports_shadow_doorbells(state)),
617            ..FromZeros::new_zeroed()
618        }
619    }
620
621    fn handle_set_features(
622        &mut self,
623        state: &mut AdminState,
624        command: &spec::Command,
625    ) -> Result<CommandResult, NvmeError> {
626        let cdw10: spec::Cdw10SetFeatures = command.cdw10.into();
627        let mut dw = [0; 2];
628        // Note that we don't support non-zero cdw10.save, since ONCS.save == 0.
629        match spec::Feature(cdw10.fid()) {
630            spec::Feature::NUMBER_OF_QUEUES => {
631                if state.io_sqs.iter().any(|sq| sq.is_some())
632                    || state.io_cqs.iter().any(|cq| cq.task.has_state())
633                {
634                    return Err(spec::Status::COMMAND_SEQUENCE_ERROR.into());
635                }
636                let cdw11: spec::Cdw11FeatureNumberOfQueues = command.cdw11.into();
637                if cdw11.ncq_z() == u16::MAX || cdw11.nsq_z() == u16::MAX {
638                    return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
639                }
640                let num_sqs = (cdw11.nsq_z() + 1).min(self.config.max_sqs);
641                let num_cqs = (cdw11.ncq_z() + 1).min(self.config.max_cqs);
642                state.set_max_queues(self, num_sqs, num_cqs);
643
644                dw[0] = spec::Cdw11FeatureNumberOfQueues::new()
645                    .with_ncq_z(num_cqs - 1)
646                    .with_nsq_z(num_sqs - 1)
647                    .into();
648            }
649            spec::Feature::VOLATILE_WRITE_CACHE => {
650                let cdw11 = spec::Cdw11FeatureVolatileWriteCache::from(command.cdw11);
651                if !cdw11.wce() {
652                    tracelimit::warn_ratelimited!(
653                        "ignoring unsupported attempt to disable write cache"
654                    );
655                }
656            }
657            feature => {
658                tracelimit::warn_ratelimited!(?feature, "unsupported feature");
659                return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
660            }
661        }
662        Ok(CommandResult::new(spec::Status::SUCCESS, dw))
663    }
664
665    async fn handle_get_features(
666        &mut self,
667        state: &mut AdminState,
668        command: &spec::Command,
669    ) -> Result<CommandResult, NvmeError> {
670        let cdw10: spec::Cdw10GetFeatures = command.cdw10.into();
671        let mut dw = [0; 2];
672
673        // Note that we don't support non-zero cdw10.sel, since ONCS.save == 0.
674        match spec::Feature(cdw10.fid()) {
675            spec::Feature::NUMBER_OF_QUEUES => {
676                let num_cqs = state.io_cqs.len();
677                let num_sqs = state.io_sqs.len();
678                dw[0] = spec::Cdw11FeatureNumberOfQueues::new()
679                    .with_ncq_z((num_cqs - 1) as u16)
680                    .with_nsq_z((num_sqs - 1) as u16)
681                    .into();
682            }
683            spec::Feature::VOLATILE_WRITE_CACHE => {
684                // Write cache is always enabled.
685                dw[0] = spec::Cdw11FeatureVolatileWriteCache::new()
686                    .with_wce(true)
687                    .into();
688            }
689            spec::Feature::NVM_RESERVATION_PERSISTENCE => {
690                let namespace = self
691                    .namespaces
692                    .get(&command.nsid)
693                    .ok_or(spec::Status::INVALID_NAMESPACE_OR_FORMAT)?;
694
695                return namespace.get_feature(command).await;
696            }
697            feature => {
698                tracelimit::warn_ratelimited!(?feature, "unsupported feature");
699                return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
700            }
701        }
702        Ok(CommandResult::new(spec::Status::SUCCESS, dw))
703    }
704
705    fn handle_create_io_completion_queue(
706        &mut self,
707        state: &mut AdminState,
708        command: &spec::Command,
709    ) -> Result<(), NvmeError> {
710        let cdw10: spec::Cdw10CreateIoQueue = command.cdw10.into();
711        let cdw11: spec::Cdw11CreateIoCompletionQueue = command.cdw11.into();
712        if !cdw11.pc() {
713            return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
714        }
715        let cqid = cdw10.qid();
716        let cq = state
717            .io_cqs
718            .get_mut((cqid as usize).wrapping_sub(1))
719            .ok_or(InvalidQueueIdentifier {
720                qid: cqid,
721                reason: InvalidQueueIdentifierReason::Oob,
722            })?;
723
724        if cq.task.has_state() {
725            return Err(InvalidQueueIdentifier {
726                qid: cqid,
727                reason: InvalidQueueIdentifierReason::InUse,
728            }
729            .into());
730        }
731
732        let interrupt = if cdw11.ien() {
733            let iv = cdw11.iv();
734            if iv as usize >= self.config.interrupts.len() {
735                return Err(spec::Status::INVALID_INTERRUPT_VECTOR.into());
736            };
737            Some(iv)
738        } else {
739            None
740        };
741        let gpa = command.dptr[0] & PAGE_MASK;
742        let len0 = cdw10.qsize_z();
743        if len0 == 0 || len0 >= MAX_QES || self.config.qe_sizes.lock().cqe_bits != IOCQES {
744            return Err(spec::Status::INVALID_QUEUE_SIZE.into());
745        }
746
747        let interrupt = interrupt.map(|iv| self.config.interrupts[iv as usize].clone());
748        let namespaces = self.namespaces.clone();
749
750        let state = IoState::new(
751            &self.config.mem,
752            self.config.doorbells.clone(),
753            gpa,
754            len0 + 1,
755            cqid,
756            interrupt,
757            namespaces,
758        );
759
760        cq.task.insert(&cq.driver, "nvme-io", state);
761        cq.task.start();
762        Ok(())
763    }
764
765    async fn handle_create_io_submission_queue(
766        &mut self,
767        state: &mut AdminState,
768        command: &spec::Command,
769    ) -> Result<(), NvmeError> {
770        let cdw10: spec::Cdw10CreateIoQueue = command.cdw10.into();
771        let cdw11: spec::Cdw11CreateIoSubmissionQueue = command.cdw11.into();
772        if !cdw11.pc() {
773            return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
774        }
775        let sqid = cdw10.qid();
776        let sq = state
777            .io_sqs
778            .get_mut((sqid as usize).wrapping_sub(1))
779            .ok_or(InvalidQueueIdentifier {
780                qid: sqid,
781                reason: InvalidQueueIdentifierReason::Oob,
782            })?;
783
784        if sq.is_some() {
785            return Err(InvalidQueueIdentifier {
786                qid: sqid,
787                reason: InvalidQueueIdentifierReason::InUse,
788            }
789            .into());
790        }
791
792        let cqid = cdw11.cqid();
793        let cq = state
794            .io_cqs
795            .get_mut((cqid as usize).wrapping_sub(1))
796            .ok_or(spec::Status::COMPLETION_QUEUE_INVALID)?;
797
798        if !cq.task.has_state() {
799            return Err(spec::Status::COMPLETION_QUEUE_INVALID.into());
800        }
801
802        let sq_gpa = command.dptr[0] & PAGE_MASK;
803        let len0 = cdw10.qsize_z();
804        if len0 == 0 || len0 >= MAX_QES || self.config.qe_sizes.lock().sqe_bits != IOSQES {
805            return Err(spec::Status::INVALID_QUEUE_SIZE.into());
806        }
807
808        let running = cq.task.stop().await;
809        let sq_idx = cq
810            .task
811            .state_mut()
812            .unwrap()
813            .create_sq(sqid, sq_gpa, len0 + 1);
814        if running {
815            cq.task.start();
816        }
817        *sq = Some(IoSq {
818            sq_idx,
819            pending_delete_cid: None,
820            cqid,
821        });
822        Ok(())
823    }
824
825    async fn handle_delete_io_submission_queue(
826        &self,
827        state: &mut AdminState,
828        command: &spec::Command,
829    ) -> Result<Option<CommandResult>, NvmeError> {
830        let cdw10: spec::Cdw10DeleteIoQueue = command.cdw10.into();
831        let sqid = cdw10.qid();
832        let sq = state
833            .io_sqs
834            .get_mut((sqid as usize).wrapping_sub(1))
835            .ok_or(InvalidQueueIdentifier {
836                qid: sqid,
837                reason: InvalidQueueIdentifierReason::Oob,
838            })?
839            .as_mut()
840            .ok_or(InvalidQueueIdentifier {
841                qid: sqid,
842                reason: InvalidQueueIdentifierReason::NotInUse,
843            })?;
844
845        if sq.pending_delete_cid.is_some() {
846            return Err(InvalidQueueIdentifier {
847                qid: sqid,
848                reason: InvalidQueueIdentifierReason::NotInUse,
849            }
850            .into());
851        }
852
853        let cq = &mut state.io_cqs[(sq.cqid as usize).wrapping_sub(1)];
854        let running = cq.task.stop().await;
855        cq.task.state_mut().unwrap().delete_sq(sq.sq_idx);
856        if running {
857            cq.task.start();
858        }
859        sq.pending_delete_cid = Some(command.cdw0.cid());
860        Ok(None)
861    }
862
863    async fn handle_delete_io_completion_queue(
864        &self,
865        state: &mut AdminState,
866        command: &spec::Command,
867    ) -> Result<(), NvmeError> {
868        let cdw10: spec::Cdw10DeleteIoQueue = command.cdw10.into();
869        let cqid = cdw10.qid();
870        let cq = state
871            .io_cqs
872            .get_mut((cqid as usize).wrapping_sub(1))
873            .ok_or(InvalidQueueIdentifier {
874                qid: cqid,
875                reason: InvalidQueueIdentifierReason::Oob,
876            })?;
877
878        if !cq.task.has_state() {
879            return Err(InvalidQueueIdentifier {
880                qid: cqid,
881                reason: InvalidQueueIdentifierReason::NotInUse,
882            }
883            .into());
884        }
885        let running = cq.task.stop().await;
886        if cq.task.state().unwrap().has_sqs() {
887            if running {
888                cq.task.start();
889            }
890            return Err(spec::Status::INVALID_QUEUE_DELETION.into());
891        }
892        cq.task.remove();
893        Ok(())
894    }
895
896    fn handle_asynchronous_event_request(
897        &self,
898        state: &mut AdminState,
899        command: &spec::Command,
900    ) -> Result<Option<CommandResult>, NvmeError> {
901        if state.asynchronous_event_requests.len() >= MAX_ASYNC_EVENT_REQUESTS as usize {
902            return Err(spec::Status::ASYNCHRONOUS_EVENT_REQUEST_LIMIT_EXCEEDED.into());
903        }
904        state.asynchronous_event_requests.push(command.cdw0.cid());
905        Ok(None)
906    }
907
908    /// Abort is a required command, but a legal implementation is to just
909    /// complete it with a status that means "I'm sorry, that command couldn't
910    /// be aborted."
911    fn handle_abort(&self) -> Result<Option<CommandResult>, NvmeError> {
912        Ok(Some(CommandResult {
913            status: spec::Status::SUCCESS,
914            dw: [1, 0],
915        }))
916    }
917
918    fn handle_get_log_page(
919        &self,
920        state: &mut AdminState,
921        command: &spec::Command,
922    ) -> Result<(), NvmeError> {
923        let cdw10 = spec::Cdw10GetLogPage::from(command.cdw10);
924        let cdw11 = spec::Cdw11GetLogPage::from(command.cdw11);
925        let numd =
926            ((cdw10.numdl_z() as u32) | ((cdw11.numdu() as u32) << 16)).saturating_add(1) as usize;
927        let len = numd * 4;
928        let prp = PrpRange::parse(&self.config.mem, len, command.dptr)?;
929
930        match spec::LogPageIdentifier(cdw10.lid()) {
931            spec::LogPageIdentifier::ERROR_INFORMATION => {
932                // Write empty log entries.
933                prp.zero(
934                    &self.config.mem,
935                    len.min(ERROR_LOG_PAGE_ENTRIES as usize * 64),
936                )?;
937            }
938            spec::LogPageIdentifier::HEALTH_INFORMATION => {
939                if command.nsid != !0 {
940                    return Err(spec::Status::INVALID_FIELD_IN_COMMAND.into());
941                }
942                // Write an empty page.
943                prp.zero(&self.config.mem, len.min(512))?;
944            }
945            spec::LogPageIdentifier::FIRMWARE_SLOT_INFORMATION => {
946                // Write an empty page.
947                prp.zero(&self.config.mem, len.min(512))?;
948            }
949            spec::LogPageIdentifier::CHANGED_NAMESPACE_LIST => {
950                // Zero the whole list.
951                prp.zero(&self.config.mem, len.min(4096))?;
952                // Now write in the changed namespaces.
953                if state.changed_namespaces.len() > 1024 {
954                    // Too many to fit, write !0 so the driver scans everything.
955                    prp.write(&self.config.mem, (!0u32).as_bytes())?;
956                } else {
957                    let count = state.changed_namespaces.len().min(numd);
958                    prp.write(
959                        &self.config.mem,
960                        state.changed_namespaces[..count].as_bytes(),
961                    )?;
962                }
963                state.changed_namespaces.clear();
964                if !cdw10.rae() {
965                    state.notified_changed_namespaces = false;
966                }
967            }
968            lid => {
969                tracelimit::warn_ratelimited!(?lid, "unsupported log page");
970                return Err(spec::Status::INVALID_LOG_PAGE.into());
971            }
972        }
973
974        Ok(())
975    }
976
977    fn supports_shadow_doorbells(&self, state: &AdminState) -> bool {
978        let num_queues = state.io_sqs.len().max(state.io_cqs.len()) + 1;
979        let len = num_queues * (2 << DOORBELL_STRIDE_BITS);
980        // The spec only allows a single shadow doorbell page.
981        len <= PAGE_SIZE
982    }
983
984    async fn handle_doorbell_buffer_config(
985        &self,
986        state: &mut AdminState,
987        command: &spec::Command,
988    ) -> Result<(), NvmeError> {
989        // Validated by caller.
990        assert!(self.supports_shadow_doorbells(state));
991
992        let shadow_db_gpa = command.dptr[0];
993        let event_idx_gpa = command.dptr[1];
994        if (shadow_db_gpa | event_idx_gpa) & !PAGE_MASK != 0 {
995            return Err(NvmeError::from(spec::Status::INVALID_FIELD_IN_COMMAND));
996        }
997
998        self.config
999            .doorbells
1000            .write()
1001            .replace_mem(self.config.mem.clone(), shadow_db_gpa, Some(event_idx_gpa))
1002            .map_err(|err| NvmeError::new(spec::Status::DATA_TRANSFER_ERROR, err))?;
1003
1004        Ok(())
1005    }
1006}
1007
1008impl AsyncRun<AdminState> for AdminHandler {
1009    async fn run(
1010        &mut self,
1011        stop: &mut StopTask<'_>,
1012        state: &mut AdminState,
1013    ) -> Result<(), Cancelled> {
1014        loop {
1015            let event = stop.until_stopped(self.next_event(state)).await?;
1016            if let Err(err) = self.process_event(state, event).await {
1017                tracing::error!(
1018                    error = &err as &dyn std::error::Error,
1019                    "admin queue failure"
1020                );
1021                break;
1022            }
1023        }
1024        Ok(())
1025    }
1026}
1027
1028impl InspectTask<AdminState> for AdminHandler {
1029    fn inspect(&self, req: inspect::Request<'_>, state: Option<&AdminState>) {
1030        req.respond().merge(self).merge(state);
1031    }
1032}