mana_driver/
mana.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! The interface to the MANA device.
5
6pub use crate::bnic_driver::RxConfig;
7pub use crate::resources::ResourceArena;
8pub use crate::save_restore::ManaDeviceSavedState;
9
10use crate::bnic_driver::BnicDriver;
11use crate::bnic_driver::WqConfig;
12use crate::gdma_driver::GdmaDriver;
13use crate::queues;
14use crate::queues::Doorbell;
15use crate::queues::DoorbellPage;
16use anyhow::Context;
17use futures::StreamExt;
18use futures::lock::Mutex;
19use gdma_defs::GdmaDevId;
20use gdma_defs::GdmaDevType;
21use gdma_defs::GdmaQueueType;
22use gdma_defs::GdmaRegisterDeviceResp;
23use gdma_defs::bnic::ManaQueryDeviceCfgResp;
24use gdma_defs::bnic::ManaQueryFilterStateResponse;
25use gdma_defs::bnic::ManaQueryStatisticsResponse;
26use gdma_defs::bnic::ManaQueryVportCfgResp;
27use gdma_defs::bnic::STATISTICS_FLAGS_ALL;
28use inspect::Inspect;
29use net_backend_resources::mac_address::MacAddress;
30use pal_async::driver::SpawnDriver;
31use pal_async::task::Spawn;
32use pal_async::task::Task;
33use std::sync::Arc;
34use tracing::Instrument;
35use user_driver::DeviceBacking;
36use user_driver::DmaClient;
37use user_driver::DmaPool;
38use user_driver::interrupt::DeviceInterrupt;
39use user_driver::memory::MemoryBlock;
40use user_driver::memory::PAGE_SIZE;
41use vmcore::vm_task::VmTaskDriverSource;
42
43enum LinkStatus {
44    Default,
45    Pending(bool),
46    Active {
47        sender: mesh::Sender<bool>,
48        connected: bool,
49    },
50}
51
52/// A MANA device.
53#[derive(Inspect)]
54pub struct ManaDevice<T: DeviceBacking> {
55    #[inspect(skip)]
56    inner: Arc<Inner<T>>,
57    #[inspect(skip)]
58    inspect_task: Task<()>,
59    #[inspect(skip)]
60    hwc_task: Option<Task<()>>,
61    #[inspect(flatten, send = "|x| x")]
62    inspect_send: mesh::Sender<inspect::Deferred>,
63}
64
65struct Inner<T: DeviceBacking> {
66    gdma: Mutex<GdmaDriver<T>>,
67    dev_id: GdmaDevId,
68    dev_data: GdmaRegisterDeviceResp,
69    dev_config: ManaQueryDeviceCfgResp,
70    doorbell: Arc<dyn Doorbell>,
71    vport_link_status: Arc<Mutex<Vec<LinkStatus>>>,
72    vf_reconfig_sender: Arc<Mutex<Option<mesh::Sender<()>>>>,
73}
74
75impl<T: DeviceBacking> ManaDevice<T> {
76    /// Initializes the MANA driver on `device`.
77    pub async fn new(
78        driver: &impl SpawnDriver,
79        device: T,
80        num_vps: u32,
81        max_queues_per_vport: u16,
82        mana_state: Option<&ManaDeviceSavedState>,
83    ) -> anyhow::Result<Self> {
84        let mut gdma = if let Some(mana_state) = mana_state {
85            let memory = device.dma_client().attach_pending_buffers()?;
86            let gdma_memory = memory
87                .iter()
88                .find(|m| m.pfns()[0] == mana_state.gdma.mem.base_pfn)
89                .expect("gdma restored memory not found")
90                .clone();
91
92            GdmaDriver::restore(mana_state.gdma.clone(), device, gdma_memory)
93                .instrument(tracing::info_span!("restore_gdma_driver"))
94                .await?
95        } else {
96            GdmaDriver::new(driver, device, num_vps, None)
97                .instrument(tracing::info_span!("new_gdma_driver"))
98                .await?
99        };
100
101        gdma.test_eq().await?;
102
103        gdma.verify_vf_driver_version().await?;
104
105        let dev_id = gdma
106            .list_devices()
107            .await?
108            .iter()
109            .copied()
110            .find(|dev_id| dev_id.ty == GdmaDevType::GDMA_DEVICE_MANA)
111            .context("no mana device found")?;
112
113        let dev_data = if let Some(mana_state) = mana_state {
114            GdmaRegisterDeviceResp {
115                pdid: mana_state.gdma.pdid,
116                gpa_mkey: mana_state.gdma.gpa_mkey,
117                db_id: mana_state.gdma.db_id as u32,
118            }
119        } else {
120            gdma.register_device(dev_id).await?
121        };
122
123        let mut bnic = BnicDriver::new(&mut gdma, dev_id);
124        let dev_config = bnic.query_dev_config().await?;
125        tracing::info!(mana_dev_config = ?dev_config);
126        let num_queues_needed = dev_config.max_num_vports as u32 * max_queues_per_vport as u32;
127        gdma.check_vf_resources(num_vps, num_queues_needed);
128
129        let doorbell = gdma.doorbell();
130        let vport_link_status = (0..dev_config.max_num_vports)
131            .map(|_| LinkStatus::Default)
132            .collect();
133        let inner = Arc::new(Inner {
134            gdma: Mutex::new(gdma),
135            dev_id,
136            dev_data,
137            dev_config,
138            doorbell,
139            vport_link_status: Arc::new(Mutex::new(vport_link_status)),
140            vf_reconfig_sender: Arc::new(Mutex::new(None)),
141        });
142
143        let (inspect_send, mut inspect_recv) = mesh::channel::<inspect::Deferred>();
144        let inspect_task = driver.spawn("mana-inspect", {
145            let inner = inner.clone();
146            async move {
147                while let Some(deferred) = inspect_recv.next().await {
148                    let Inner {
149                        gdma,
150                        dev_id: _,
151                        dev_data: _,
152                        dev_config: _,
153                        doorbell: _,
154                        vport_link_status: _,
155                        vf_reconfig_sender: _,
156                    } = inner.as_ref();
157                    let gdma = gdma.lock().await;
158                    deferred.respond(|resp| {
159                        resp.merge(&*gdma);
160                    })
161                }
162            }
163        });
164
165        let device = Self {
166            inner,
167            inspect_send,
168            inspect_task,
169            hwc_task: None,
170        };
171        Ok(device)
172    }
173
174    /// Saves the device's state for servicing
175    pub async fn save(self) -> (anyhow::Result<ManaDeviceSavedState>, T) {
176        self.inspect_task.cancel().await;
177        if let Some(hwc_task) = self.hwc_task {
178            hwc_task.cancel().await;
179        }
180        let inner = Arc::into_inner(self.inner).unwrap();
181        let mut driver = inner.gdma.into_inner();
182
183        if let Ok(saved_state) = driver.save().await {
184            let mana_saved_state = ManaDeviceSavedState { gdma: saved_state };
185
186            (Ok(mana_saved_state), driver.into_device())
187        } else {
188            tracing::error!("Failed to save MANA device state");
189            (
190                Err(anyhow::anyhow!("Failed to save MANA device state")),
191                driver.into_device(),
192            )
193        }
194    }
195
196    /// Returns the number of vports the device supports.
197    pub fn num_vports(&self) -> u32 {
198        self.inner.dev_config.max_num_vports.into()
199    }
200
201    /// Returns the device configuration.
202    pub fn dev_config(&self) -> &ManaQueryDeviceCfgResp {
203        &self.inner.dev_config
204    }
205
206    /// Starts a hardware channel (HWC) task that listens to events on the HWC
207    /// and calls the appropriate provided callsbacks/closure.
208    pub async fn start_notification_task(&mut self, driver_source: &VmTaskDriverSource) {
209        if self.hwc_task.is_some() {
210            return;
211        }
212
213        let inner = self.inner.clone();
214        let hwc_task = driver_source.simple().spawn("mana-hwc", {
215            let mut gdma = self.inner.gdma.lock().await;
216            let mut hwc_event = gdma.hwc_subscribe();
217            async move {
218                loop {
219                    hwc_event.wait().await;
220                    let mut gdma = inner.gdma.lock().await;
221                    if gdma.process_all_eqs() {
222                        let mut vport_link_status = inner.vport_link_status.lock().await;
223                        for (vport_index, current) in gdma.get_link_toggle_list() {
224                            let vport_index = vport_index as usize;
225                            if vport_index >= vport_link_status.len() {
226                                tracing::error!(vport_index, "Invalid vport index");
227                                continue;
228                            }
229                            if let LinkStatus::Active { sender, connected } =
230                                &mut vport_link_status[vport_index]
231                            {
232                                *connected = current;
233                                sender.send(*connected);
234                            } else {
235                                let _ = std::mem::replace(
236                                    &mut vport_link_status[vport_index],
237                                    LinkStatus::Pending(current),
238                                );
239                            }
240                        }
241                        if gdma.get_vf_reconfiguration_pending() {
242                            if let Some(sender) = inner.vf_reconfig_sender.lock().await.as_ref() {
243                                sender.send(());
244                            }
245                        }
246                    }
247                }
248            }
249        });
250        self.hwc_task = Some(hwc_task);
251    }
252
253    /// Initializes and returns the vport number `index`.
254    pub async fn new_vport(
255        &self,
256        index: u32,
257        vport_state: Option<VportState>,
258        dev_config: &ManaQueryDeviceCfgResp,
259    ) -> anyhow::Result<Vport<T>> {
260        let vport_config = self.query_vport_config(index).await?;
261
262        let vport_state = vport_state.unwrap_or(VportState::new(None, None));
263
264        let vport = Vport {
265            inner: self.inner.clone(),
266            config: vport_config,
267            vport_state,
268            id: index,
269        };
270
271        if dev_config.cap_filter_state_query() {
272            if let Ok(resp) = vport.query_filter_state(vport.id.into()).await {
273                tracing::debug!(
274                    mac_address = %vport.mac_address(),
275                    direction_to_vtl0 = resp.direction_to_vtl0,
276                    "query_filter_state"
277                );
278                vport
279                    .vport_state
280                    .set_direction_to_vtl0(resp.direction_to_vtl0 == 1);
281            }
282        }
283
284        Ok(vport)
285    }
286
287    /// Subscribes to VF reconfiguration events.
288    /// Returned receiver will receive a message on VF reconfiguration events.
289    pub async fn subscribe_vf_reconfig(&self) -> mesh::Receiver<()> {
290        tracing::debug!("subscribing to VF reconfiguration events");
291        let mut vf_reconfig_sender = self.inner.vf_reconfig_sender.lock().await;
292        assert!(
293            vf_reconfig_sender.is_none(),
294            "multiple VF reconfiguration subscribers not supported"
295        );
296        let (sender, receiver) = mesh::channel();
297        *vf_reconfig_sender = Some(sender);
298        receiver
299    }
300
301    /// Shuts the device down.
302    pub async fn shutdown(self) -> (anyhow::Result<()>, T) {
303        self.inspect_task.cancel().await;
304        if let Some(hwc_task) = self.hwc_task {
305            hwc_task.cancel().await;
306        }
307        let inner = Arc::into_inner(self.inner).unwrap();
308        let mut driver = inner.gdma.into_inner();
309        let result = driver.deregister_device(inner.dev_id).await;
310        (result, driver.into_device())
311    }
312    /// Queries the configuration of a specific vport.
313    pub async fn query_vport_config(&self, vport: u32) -> anyhow::Result<ManaQueryVportCfgResp> {
314        let mut gdma = self.inner.gdma.lock().await;
315        BnicDriver::new(&mut *gdma, self.inner.dev_id)
316            .query_vport_config(vport)
317            .await
318    }
319}
320
321/// Tracks vport state and optionally notifies a listener of changes.
322#[derive(Clone)]
323pub struct VportState {
324    direction_to_vtl0: Arc<parking_lot::Mutex<Option<bool>>>,
325    state_change_callback: Arc<Option<Box<dyn Fn(bool) + Send + Sync>>>,
326}
327
328impl VportState {
329    /// Create a new VportState instance.
330    pub fn new(
331        direction_to_vtl0: Option<bool>,
332        state_change_callback: Option<Box<dyn Fn(bool) + Send + Sync>>,
333    ) -> Self {
334        Self {
335            direction_to_vtl0: Arc::new(parking_lot::Mutex::new(direction_to_vtl0)),
336            state_change_callback: Arc::new(state_change_callback),
337        }
338    }
339
340    /// Remember current filter setting.
341    pub fn set_direction_to_vtl0(&self, direction_to_vtl0: bool) {
342        *self.direction_to_vtl0.lock() = Some(direction_to_vtl0);
343        if let Some(callback) = self.state_change_callback.as_ref() {
344            (callback)(direction_to_vtl0);
345        }
346    }
347
348    /// Get current filter setting if known.
349    pub fn get_direction_to_vtl0(&self) -> Option<bool> {
350        *self.direction_to_vtl0.lock()
351    }
352}
353
354/// A MANA vport.
355pub struct Vport<T: DeviceBacking> {
356    inner: Arc<Inner<T>>,
357    config: ManaQueryVportCfgResp,
358    vport_state: VportState,
359    id: u32,
360}
361
362impl<T: DeviceBacking> Vport<T> {
363    /// Returns the maximum number of transmit queues.
364    pub fn max_tx_queues(&self) -> u32 {
365        self.config.max_num_sq
366    }
367
368    /// Returns the maximum number of receive queues.
369    pub fn max_rx_queues(&self) -> u32 {
370        self.config.max_num_rq
371    }
372
373    /// Returns the assigned MAC address.
374    pub fn mac_address(&self) -> MacAddress {
375        self.config.mac_addr.into()
376    }
377
378    /// Returns the memory key to refer to all of GPA space.
379    pub fn gpa_mkey(&self) -> u32 {
380        self.inner.dev_data.gpa_mkey
381    }
382
383    /// Returns this vport's id
384    pub fn id(&self) -> u32 {
385        self.id
386    }
387
388    /// Returns the number of indirection entries supported by the vport
389    pub fn num_indirection_ent(&self) -> u32 {
390        self.config.num_indirection_ent
391    }
392
393    /// Creates a new event queue.
394    pub async fn new_eq(
395        &self,
396        arena: &mut ResourceArena,
397        size: u32,
398        cpu: u32,
399    ) -> anyhow::Result<BnicEq> {
400        let mut gdma = self.inner.gdma.lock().await;
401        let dma_client = gdma.device().dma_client_for(DmaPool::Ephemeral)?;
402        let mem = dma_client
403            .allocate_dma_buffer(size as usize)
404            .context("Failed to allocate DMA buffer")?;
405
406        let gdma_region = gdma
407            .create_dma_region(arena, self.inner.dev_id, mem.clone())
408            .await
409            .context("failed to create eq dma region")?;
410        let (id, interrupt) = gdma
411            .create_eq(
412                arena,
413                self.inner.dev_id,
414                gdma_region,
415                size,
416                self.inner.dev_data.pdid,
417                self.inner.dev_data.db_id,
418                cpu,
419            )
420            .await
421            .context("failed to create eq")?;
422        Ok(BnicEq {
423            doorbell: DoorbellPage::new(self.inner.doorbell.clone(), self.inner.dev_data.db_id)?,
424            mem,
425            id,
426            interrupt,
427        })
428    }
429
430    /// Creates a new work queue (transmit or receive).
431    pub async fn new_wq(
432        &self,
433        arena: &mut ResourceArena,
434        is_send: bool,
435        wq_size: u32,
436        cq_size: u32,
437        eq_id: u32,
438    ) -> anyhow::Result<BnicWq> {
439        assert!(wq_size >= PAGE_SIZE as u32 && wq_size.is_power_of_two());
440        assert!(cq_size >= PAGE_SIZE as u32 && cq_size.is_power_of_two());
441        let mut gdma = self.inner.gdma.lock().await;
442
443        let dma_client = gdma.device().dma_client_for(DmaPool::Ephemeral)?;
444
445        let mem = dma_client
446            .allocate_dma_buffer((wq_size + cq_size) as usize)
447            .context("failed to allocate DMA buffer")?;
448
449        let wq_mem = mem.subblock(0, wq_size as usize);
450        let cq_mem = mem.subblock(wq_size as usize, cq_size as usize);
451
452        let wq_gdma_region = gdma
453            .create_dma_region(arena, self.inner.dev_id, wq_mem.clone())
454            .await?;
455        let cq_gdma_region = gdma
456            .create_dma_region(arena, self.inner.dev_id, cq_mem.clone())
457            .await?;
458        let wq_type = if is_send {
459            GdmaQueueType::GDMA_SQ
460        } else {
461            GdmaQueueType::GDMA_RQ
462        };
463        let doorbell = DoorbellPage::new(self.inner.doorbell.clone(), self.inner.dev_data.db_id)?;
464        let resp = BnicDriver::new(&mut *gdma, self.inner.dev_id)
465            .create_wq_obj(
466                arena,
467                self.config.vport,
468                wq_type,
469                &WqConfig {
470                    wq_gdma_region,
471                    cq_gdma_region,
472                    wq_size,
473                    cq_size,
474                    cq_moderation_ctx_id: 0,
475                    eq_id,
476                },
477            )
478            .await?;
479
480        Ok(BnicWq {
481            doorbell,
482            wq_mem,
483            cq_mem,
484            wq_id: resp.wq_id,
485            cq_id: resp.cq_id,
486            is_send,
487            wq_obj: resp.wq_obj,
488        })
489    }
490
491    /// Get the transmit configuration.
492    pub async fn config_tx(&self) -> anyhow::Result<TxConfig> {
493        let mut gdma = self.inner.gdma.lock().await;
494        let resp = BnicDriver::new(&mut *gdma, self.inner.dev_id)
495            .config_vport_tx(
496                self.config.vport,
497                self.inner.dev_data.pdid,
498                self.inner.dev_data.db_id,
499            )
500            .await?;
501
502        let config = TxConfig {
503            tx_vport_offset: resp.tx_vport_offset,
504        };
505        Ok(config)
506    }
507
508    /// Sets the receive configuration.
509    pub async fn config_rx(&self, config: &RxConfig<'_>) -> anyhow::Result<()> {
510        let mut gdma = self.inner.gdma.lock().await;
511        BnicDriver::new(&mut *gdma, self.inner.dev_id)
512            .config_vport_rx(self.config.vport, config)
513            .await?;
514
515        Ok(())
516    }
517
518    /// Move filter between VTL2 VF vport and VTL0 VF vport
519    pub async fn move_filter(&self, direction_to_vtl0: u8) -> anyhow::Result<()> {
520        if let Some(to_vtl0) = self.vport_state.get_direction_to_vtl0() {
521            if to_vtl0 == (direction_to_vtl0 == 1) {
522                return Ok(());
523            }
524        }
525        let mut gdma = self.inner.gdma.lock().await;
526        let hwc_activity_id = BnicDriver::new(&mut *gdma, self.inner.dev_id)
527            .move_vport_filter(self.config.vport, direction_to_vtl0)
528            .await?;
529        self.vport_state
530            .set_direction_to_vtl0(direction_to_vtl0 == 1);
531        tracing::info!(
532            mac_address = %self.mac_address(),
533            direction_to_vtl0,
534            hwc_activity_id,
535            "switch data path for mac",
536        );
537        Ok(())
538    }
539
540    /// Get current filter state.
541    pub async fn get_direction_to_vtl0(&self) -> Option<bool> {
542        self.vport_state.get_direction_to_vtl0()
543    }
544
545    /// Set the vport serial number
546    pub async fn set_serial_no(&self, serial_no: u32) -> anyhow::Result<()> {
547        let mut gdma = self.inner.gdma.lock().await;
548        BnicDriver::new(&mut *gdma, self.inner.dev_id)
549            .set_vport_serial_no(self.config.vport, serial_no)
550            .await?;
551        Ok(())
552    }
553
554    /// Gets stats. Note that these are adapter-wide and not really per-vport.
555    pub async fn query_stats(&self) -> anyhow::Result<ManaQueryStatisticsResponse> {
556        let mut gdma = self.inner.gdma.lock().await;
557        BnicDriver::new(&mut *gdma, self.inner.dev_id)
558            .query_stats(STATISTICS_FLAGS_ALL)
559            .await
560    }
561
562    /// Retrieves vport mac filter state from socamana
563    pub async fn query_filter_state(
564        &self,
565        vport: u64,
566    ) -> anyhow::Result<ManaQueryFilterStateResponse> {
567        let mut gdma = self.inner.gdma.lock().await;
568        BnicDriver::new(&mut *gdma, self.inner.dev_id)
569            .query_filter_state(vport)
570            .await
571    }
572
573    /// Destroys resources in `arena`.
574    pub async fn destroy(&self, arena: ResourceArena) {
575        let mut gdma = self.inner.gdma.lock().await;
576        arena.destroy(&mut *gdma).await;
577    }
578
579    /// Changes the target CPU for the given eq to `cpu`.
580    pub async fn retarget_interrupt(
581        &self,
582        eq_id: u32,
583        cpu: u32,
584    ) -> anyhow::Result<Option<DeviceInterrupt>> {
585        let mut gdma = self.inner.gdma.lock().await;
586        gdma.retarget_eq(self.inner.dev_id, eq_id, cpu).await
587    }
588
589    /// Registers for link status notification updates.
590    pub async fn register_link_status_notifier(&self, sender: mesh::Sender<bool>) {
591        let mut vport_link_status = self.inner.vport_link_status.lock().await;
592        let vport_index = self.id as usize;
593        let (send, connected) = match vport_link_status[vport_index] {
594            // Send any pending notifications, whatever it is.
595            LinkStatus::Pending(connected) => (true, connected),
596            // Endpoint reestablishing connection. Only send, if the link is down.
597            LinkStatus::Active { connected, .. } => (!connected, connected),
598            // Don't send anything when transitioning from the default state.
599            _ => (false, true),
600        };
601        if send {
602            sender.send(connected);
603        }
604        vport_link_status[vport_index] = LinkStatus::Active { sender, connected };
605    }
606
607    /// Returns an object that can allocate dma memory to be shared with the device.
608    pub async fn dma_client(&self) -> Arc<dyn DmaClient> {
609        self.inner.gdma.lock().await.device().dma_client()
610    }
611}
612
613/// Transmit configuration.
614pub struct TxConfig {
615    /// The vport offset to include in tx packets.
616    pub tx_vport_offset: u16,
617}
618
619/// An event queue.
620pub struct BnicEq {
621    doorbell: DoorbellPage,
622    mem: MemoryBlock,
623    id: u32,
624    interrupt: DeviceInterrupt,
625}
626
627impl BnicEq {
628    /// The event queue ID.
629    pub fn id(&self) -> u32 {
630        self.id
631    }
632
633    /// The interrupt that will be signaled when the armed event queue is ready.
634    pub fn interrupt(&self) -> DeviceInterrupt {
635        self.interrupt.clone()
636    }
637
638    /// Gets an object to access the queue's entries.
639    pub fn queue(&self) -> queues::Eq {
640        queues::Eq::new_eq(self.mem.clone(), self.doorbell.clone(), self.id)
641    }
642}
643
644/// A work queue (transmit or receive).
645pub struct BnicWq {
646    doorbell: DoorbellPage,
647    wq_mem: MemoryBlock,
648    cq_mem: MemoryBlock,
649    wq_id: u32,
650    cq_id: u32,
651    is_send: bool,
652    wq_obj: u64,
653}
654
655impl BnicWq {
656    /// Gets the work queue for sending requests.
657    pub fn wq(&self) -> queues::Wq {
658        if self.is_send {
659            queues::Wq::new_sq(self.wq_mem.clone(), self.doorbell.clone(), self.wq_id)
660        } else {
661            queues::Wq::new_rq(self.wq_mem.clone(), self.doorbell.clone(), self.wq_id)
662        }
663    }
664
665    /// Gets the completion queue for receiving results.
666    pub fn cq(&self) -> queues::Cq {
667        queues::Cq::new_cq(self.cq_mem.clone(), self.doorbell.clone(), self.cq_id)
668    }
669
670    /// Gets the work queue object ID.
671    pub fn wq_obj(&self) -> u64 {
672        self.wq_obj
673    }
674}