Skip to main content

mana_driver/
mana.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! The interface to the MANA device.
5
6pub use crate::bnic_driver::RxConfig;
7pub use crate::resources::ResourceArena;
8pub use crate::save_restore::ManaDeviceSavedState;
9
10use crate::bnic_driver::BnicDriver;
11use crate::bnic_driver::WqConfig;
12use crate::gdma_driver::GdmaDriver;
13use crate::queues;
14use crate::queues::Doorbell;
15use crate::queues::DoorbellPage;
16use anyhow::Context;
17use futures::StreamExt;
18use futures::lock::Mutex;
19use gdma_defs::GdmaDevId;
20use gdma_defs::GdmaDevType;
21use gdma_defs::GdmaQueueType;
22use gdma_defs::GdmaRegisterDeviceResp;
23use gdma_defs::bnic::ManaQueryDeviceCfgResp;
24use gdma_defs::bnic::ManaQueryFilterStateResponse;
25use gdma_defs::bnic::ManaQueryStatisticsResponse;
26use gdma_defs::bnic::ManaQueryVportCfgResp;
27use gdma_defs::bnic::STATISTICS_FLAGS_ALL;
28use inspect::Inspect;
29use net_backend_resources::mac_address::MacAddress;
30use pal_async::driver::SpawnDriver;
31use pal_async::task::Spawn;
32use pal_async::task::Task;
33use std::sync::Arc;
34use tracing::Instrument;
35use user_driver::DeviceBacking;
36use user_driver::DmaClient;
37use user_driver::DmaPool;
38use user_driver::interrupt::DeviceInterrupt;
39use user_driver::memory::MemoryBlock;
40use user_driver::memory::PAGE_SIZE;
41use vmcore::vm_task::VmTaskDriverSource;
42
43enum LinkStatus {
44    Default,
45    Pending(bool),
46    Active {
47        sender: mesh::Sender<bool>,
48        connected: bool,
49    },
50}
51
52/// A MANA device.
53#[derive(Inspect)]
54pub struct ManaDevice<T: DeviceBacking> {
55    #[inspect(skip)]
56    inner: Arc<Inner<T>>,
57    #[inspect(skip)]
58    inspect_task: Task<()>,
59    #[inspect(skip)]
60    hwc_task: Option<Task<()>>,
61    #[inspect(flatten, send = "|x| x")]
62    inspect_send: mesh::Sender<inspect::Deferred>,
63}
64
65struct Inner<T: DeviceBacking> {
66    gdma: Mutex<GdmaDriver<T>>,
67    dev_id: GdmaDevId,
68    dev_data: GdmaRegisterDeviceResp,
69    dev_config: ManaQueryDeviceCfgResp,
70    doorbell: Arc<dyn Doorbell>,
71    vport_link_status: Arc<Mutex<Vec<LinkStatus>>>,
72    vf_reset_request_sender: Arc<Mutex<Option<mesh::Sender<bool>>>>,
73}
74
75impl<T: DeviceBacking> ManaDevice<T> {
76    /// Initializes the MANA driver on `device`.
77    pub async fn new(
78        driver: &impl SpawnDriver,
79        device: T,
80        num_vps: u32,
81        max_queues_per_vport: u16,
82        mana_state: Option<&ManaDeviceSavedState>,
83    ) -> anyhow::Result<Self> {
84        let mut gdma = if let Some(mana_state) = mana_state {
85            let memory = device.dma_client().attach_pending_buffers()?;
86            let gdma_memory = memory
87                .iter()
88                .find(|m| m.pfns()[0] == mana_state.gdma.mem.base_pfn)
89                .ok_or_else(|| {
90                    anyhow::anyhow!(
91                        "gdma restored memory not found for base_pfn {}",
92                        mana_state.gdma.mem.base_pfn
93                    )
94                })?
95                .clone();
96
97            GdmaDriver::restore(mana_state.gdma.clone(), device, gdma_memory)
98                .instrument(tracing::info_span!("restore_gdma_driver"))
99                .await?
100        } else {
101            GdmaDriver::new(driver, device, num_vps, None)
102                .instrument(tracing::info_span!("new_gdma_driver"))
103                .await?
104        };
105
106        gdma.test_eq().await?;
107
108        gdma.verify_vf_driver_version().await?;
109
110        let dev_id = gdma
111            .list_devices()
112            .await?
113            .iter()
114            .copied()
115            .find(|dev_id| dev_id.ty == GdmaDevType::GDMA_DEVICE_MANA)
116            .context("no mana device found")?;
117
118        let dev_data = if let Some(mana_state) = mana_state {
119            GdmaRegisterDeviceResp {
120                pdid: mana_state.gdma.pdid,
121                gpa_mkey: mana_state.gdma.gpa_mkey,
122                db_id: mana_state.gdma.db_id as u32,
123            }
124        } else {
125            gdma.register_device(dev_id).await?
126        };
127
128        let mut bnic = BnicDriver::new(&mut gdma, dev_id);
129        let dev_config = bnic.query_dev_config().await?;
130        tracing::info!(mana_dev_config = ?dev_config);
131        let num_queues_needed = dev_config.max_num_vports as u32 * max_queues_per_vport as u32;
132        gdma.check_vf_resources(num_vps, num_queues_needed);
133
134        let doorbell = gdma.doorbell();
135        let vport_link_status = (0..dev_config.max_num_vports)
136            .map(|_| LinkStatus::Default)
137            .collect();
138        let inner = Arc::new(Inner {
139            gdma: Mutex::new(gdma),
140            dev_id,
141            dev_data,
142            dev_config,
143            doorbell,
144            vport_link_status: Arc::new(Mutex::new(vport_link_status)),
145            vf_reset_request_sender: Arc::new(Mutex::new(None)),
146        });
147
148        let (inspect_send, mut inspect_recv) = mesh::channel::<inspect::Deferred>();
149        let inspect_task = driver.spawn("mana-inspect", {
150            let inner = inner.clone();
151            async move {
152                while let Some(deferred) = inspect_recv.next().await {
153                    let Inner {
154                        gdma,
155                        dev_id: _,
156                        dev_data: _,
157                        dev_config: _,
158                        doorbell: _,
159                        vport_link_status: _,
160                        vf_reset_request_sender: _,
161                    } = inner.as_ref();
162                    let gdma = gdma.lock().await;
163                    deferred.respond(|resp| {
164                        resp.merge(&*gdma);
165                    })
166                }
167            }
168        });
169
170        let device = Self {
171            inner,
172            inspect_send,
173            inspect_task,
174            hwc_task: None,
175        };
176        Ok(device)
177    }
178
179    /// Saves the device's state for servicing
180    pub async fn save(self) -> (anyhow::Result<ManaDeviceSavedState>, T) {
181        self.inspect_task.cancel().await;
182        if let Some(hwc_task) = self.hwc_task {
183            hwc_task.cancel().await;
184        }
185
186        let inner = Arc::into_inner(self.inner)
187            .expect("MANA device save failed, multiple references remain.");
188        let mut driver = inner.gdma.into_inner();
189
190        if let Ok(saved_state) = driver.save().await {
191            let mana_saved_state = ManaDeviceSavedState { gdma: saved_state };
192
193            (Ok(mana_saved_state), driver.into_device())
194        } else {
195            tracing::error!("Failed to save MANA device state");
196            (
197                Err(anyhow::anyhow!("Failed to save MANA device state")),
198                driver.into_device(),
199            )
200        }
201    }
202
203    /// Returns the number of vports the device supports.
204    pub fn num_vports(&self) -> u32 {
205        self.inner.dev_config.max_num_vports.into()
206    }
207
208    /// Returns the device configuration.
209    pub fn dev_config(&self) -> &ManaQueryDeviceCfgResp {
210        &self.inner.dev_config
211    }
212
213    /// Starts a hardware channel (HWC) task that listens to events on the HWC
214    /// and calls the appropriate provided callsbacks/closure.
215    pub async fn start_notification_task(&mut self, driver_source: &VmTaskDriverSource) {
216        if self.hwc_task.is_some() {
217            return;
218        }
219
220        let inner = self.inner.clone();
221        let hwc_task = driver_source.simple().spawn("mana-hwc", {
222            let mut gdma = self.inner.gdma.lock().await;
223            let mut hwc_event = gdma.hwc_subscribe();
224            async move {
225                loop {
226                    hwc_event.wait().await;
227                    let mut gdma = inner.gdma.lock().await;
228                    if gdma.process_all_eqs() {
229                        let mut vport_link_status = inner.vport_link_status.lock().await;
230                        for (vport_index, current) in gdma.get_link_toggle_list() {
231                            let vport_index = vport_index as usize;
232                            if vport_index >= vport_link_status.len() {
233                                tracing::error!(vport_index, "Invalid vport index");
234                                continue;
235                            }
236                            if let LinkStatus::Active { sender, connected } =
237                                &mut vport_link_status[vport_index]
238                            {
239                                *connected = current;
240                                sender.send(*connected);
241                            } else {
242                                let _ = std::mem::replace(
243                                    &mut vport_link_status[vport_index],
244                                    LinkStatus::Pending(current),
245                                );
246                            }
247                        }
248                        if let Some(revoke_vtl0_vf) = gdma.get_reset_request_pending() {
249                            // `reset_request_pending` stays true until destruction.
250                            // Take the sender so we only notify once per lifetime.
251                            if let Some(sender) = inner.vf_reset_request_sender.lock().await.take()
252                            {
253                                sender.send(revoke_vtl0_vf);
254                            }
255                        }
256                    }
257                }
258            }
259        });
260        self.hwc_task = Some(hwc_task);
261    }
262
263    /// Initializes and returns the vport number `index`.
264    pub async fn new_vport(
265        &self,
266        index: u32,
267        vport_state: Option<VportState>,
268        dev_config: &ManaQueryDeviceCfgResp,
269    ) -> anyhow::Result<Vport<T>> {
270        let vport_config = self.query_vport_config(index).await?;
271
272        let vport_state = vport_state.unwrap_or(VportState::new(None, None));
273
274        let vport = Vport {
275            inner: self.inner.clone(),
276            config: vport_config,
277            vport_state,
278            id: index,
279        };
280
281        if dev_config.cap_filter_state_query() {
282            if let Ok(resp) = vport.query_filter_state(vport.id.into()).await {
283                tracing::debug!(
284                    mac_address = %vport.mac_address(),
285                    direction_to_vtl0 = resp.direction_to_vtl0,
286                    "query_filter_state"
287                );
288                vport
289                    .vport_state
290                    .set_direction_to_vtl0(resp.direction_to_vtl0 == 1);
291            }
292        }
293
294        Ok(vport)
295    }
296
297    /// Subscribes to HWC reset request events.
298    /// Returned receiver will receive a message on HWC reset request events.
299    pub async fn subscribe_vf_reset_request(&self) -> mesh::Receiver<bool> {
300        tracing::debug!("subscribing to HWC reset request events");
301        let mut reset_request_sender = self.inner.vf_reset_request_sender.lock().await;
302        assert!(
303            reset_request_sender.is_none(),
304            "multiple HWC reset request subscribers not supported"
305        );
306        let (sender, receiver) = mesh::channel();
307        *reset_request_sender = Some(sender);
308        receiver
309    }
310
311    /// Shuts the device down.
312    pub async fn shutdown(self) -> (anyhow::Result<()>, T) {
313        self.inspect_task.cancel().await;
314        if let Some(hwc_task) = self.hwc_task {
315            hwc_task.cancel().await;
316        }
317        let inner = Arc::into_inner(self.inner)
318            .expect("MANA device shutdown failed, multiple references remain.");
319        let mut driver = inner.gdma.into_inner();
320        let result = driver.deregister_device(inner.dev_id).await;
321        (result, driver.into_device())
322    }
323    /// Queries the configuration of a specific vport.
324    pub async fn query_vport_config(&self, vport: u32) -> anyhow::Result<ManaQueryVportCfgResp> {
325        let mut gdma = self.inner.gdma.lock().await;
326        BnicDriver::new(&mut *gdma, self.inner.dev_id)
327            .query_vport_config(vport)
328            .await
329    }
330}
331
332/// Tracks vport state and optionally notifies a listener of changes.
333#[derive(Clone)]
334pub struct VportState {
335    direction_to_vtl0: Arc<parking_lot::Mutex<Option<bool>>>,
336    state_change_callback: Arc<Option<Box<dyn Fn(bool) + Send + Sync>>>,
337}
338
339impl VportState {
340    /// Create a new VportState instance.
341    pub fn new(
342        direction_to_vtl0: Option<bool>,
343        state_change_callback: Option<Box<dyn Fn(bool) + Send + Sync>>,
344    ) -> Self {
345        Self {
346            direction_to_vtl0: Arc::new(parking_lot::Mutex::new(direction_to_vtl0)),
347            state_change_callback: Arc::new(state_change_callback),
348        }
349    }
350
351    /// Remember current filter setting.
352    pub fn set_direction_to_vtl0(&self, direction_to_vtl0: bool) {
353        *self.direction_to_vtl0.lock() = Some(direction_to_vtl0);
354        if let Some(callback) = self.state_change_callback.as_ref() {
355            (callback)(direction_to_vtl0);
356        }
357    }
358
359    /// Get current filter setting if known.
360    pub fn get_direction_to_vtl0(&self) -> Option<bool> {
361        *self.direction_to_vtl0.lock()
362    }
363}
364
365/// A MANA vport.
366pub struct Vport<T: DeviceBacking> {
367    inner: Arc<Inner<T>>,
368    config: ManaQueryVportCfgResp,
369    vport_state: VportState,
370    id: u32,
371}
372
373impl<T: DeviceBacking> Vport<T> {
374    /// Returns the maximum number of transmit queues.
375    pub fn max_tx_queues(&self) -> u32 {
376        self.config.max_num_sq
377    }
378
379    /// Returns the maximum number of receive queues.
380    pub fn max_rx_queues(&self) -> u32 {
381        self.config.max_num_rq
382    }
383
384    /// Returns the assigned MAC address.
385    pub fn mac_address(&self) -> MacAddress {
386        self.config.mac_addr.into()
387    }
388
389    /// Returns the memory key to refer to all of GPA space.
390    pub fn gpa_mkey(&self) -> u32 {
391        self.inner.dev_data.gpa_mkey
392    }
393
394    /// Returns this vport's id
395    pub fn id(&self) -> u32 {
396        self.id
397    }
398
399    /// Returns the number of indirection entries supported by the vport
400    pub fn num_indirection_ent(&self) -> u32 {
401        self.config.num_indirection_ent
402    }
403
404    /// Returns the adapter link speed in bits per second, as reported by the
405    /// device configuration.
406    pub fn link_speed_bps(&self) -> u64 {
407        self.inner.dev_config.link_speed_bps()
408    }
409
410    /// Creates a new event queue.
411    pub async fn new_eq(
412        &self,
413        arena: &mut ResourceArena,
414        size: u32,
415        cpu: u32,
416    ) -> anyhow::Result<BnicEq> {
417        let mut gdma = self.inner.gdma.lock().await;
418        let dma_client = gdma.device().dma_client_for(DmaPool::Ephemeral)?;
419        let mem = dma_client
420            .allocate_dma_buffer(size as usize)
421            .context("Failed to allocate DMA buffer")?;
422
423        let gdma_region = gdma
424            .create_dma_region(arena, self.inner.dev_id, mem.clone())
425            .await
426            .context("failed to create eq dma region")?;
427        let (id, interrupt) = gdma
428            .create_eq(
429                arena,
430                self.inner.dev_id,
431                gdma_region,
432                size,
433                self.inner.dev_data.pdid,
434                self.inner.dev_data.db_id,
435                cpu,
436            )
437            .await
438            .context("failed to create eq")?;
439        Ok(BnicEq {
440            doorbell: DoorbellPage::new(self.inner.doorbell.clone(), self.inner.dev_data.db_id)?,
441            mem,
442            id,
443            interrupt,
444        })
445    }
446
447    /// Creates a new work queue (transmit or receive).
448    pub async fn new_wq(
449        &self,
450        arena: &mut ResourceArena,
451        is_send: bool,
452        wq_size: u32,
453        cq_size: u32,
454        eq_id: u32,
455    ) -> anyhow::Result<BnicWq> {
456        assert!(wq_size >= PAGE_SIZE as u32 && wq_size.is_power_of_two());
457        assert!(cq_size >= PAGE_SIZE as u32 && cq_size.is_power_of_two());
458        let mut gdma = self.inner.gdma.lock().await;
459
460        let dma_client = gdma.device().dma_client_for(DmaPool::Ephemeral)?;
461
462        let mem = dma_client
463            .allocate_dma_buffer((wq_size + cq_size) as usize)
464            .context("failed to allocate DMA buffer")?;
465
466        let wq_mem = mem.subblock(0, wq_size as usize);
467        let cq_mem = mem.subblock(wq_size as usize, cq_size as usize);
468
469        let wq_gdma_region = gdma
470            .create_dma_region(arena, self.inner.dev_id, wq_mem.clone())
471            .await?;
472        let cq_gdma_region = gdma
473            .create_dma_region(arena, self.inner.dev_id, cq_mem.clone())
474            .await?;
475        let wq_type = if is_send {
476            GdmaQueueType::GDMA_SQ
477        } else {
478            GdmaQueueType::GDMA_RQ
479        };
480        let doorbell = DoorbellPage::new(self.inner.doorbell.clone(), self.inner.dev_data.db_id)?;
481        let resp = BnicDriver::new(&mut *gdma, self.inner.dev_id)
482            .create_wq_obj(
483                arena,
484                self.config.vport,
485                wq_type,
486                &WqConfig {
487                    wq_gdma_region,
488                    cq_gdma_region,
489                    wq_size,
490                    cq_size,
491                    cq_moderation_ctx_id: 0,
492                    eq_id,
493                },
494            )
495            .await?;
496
497        Ok(BnicWq {
498            doorbell,
499            wq_mem,
500            cq_mem,
501            wq_id: resp.wq_id,
502            cq_id: resp.cq_id,
503            is_send,
504            wq_obj: resp.wq_obj,
505        })
506    }
507
508    /// Get the transmit configuration.
509    pub async fn config_tx(&self) -> anyhow::Result<TxConfig> {
510        let mut gdma = self.inner.gdma.lock().await;
511        let resp = BnicDriver::new(&mut *gdma, self.inner.dev_id)
512            .config_vport_tx(
513                self.config.vport,
514                self.inner.dev_data.pdid,
515                self.inner.dev_data.db_id,
516            )
517            .await?;
518
519        let config = TxConfig {
520            tx_vport_offset: resp.tx_vport_offset,
521        };
522        Ok(config)
523    }
524
525    /// Sets the receive configuration.
526    pub async fn config_rx(&self, config: &RxConfig<'_>) -> anyhow::Result<()> {
527        let mut gdma = self.inner.gdma.lock().await;
528        BnicDriver::new(&mut *gdma, self.inner.dev_id)
529            .config_vport_rx(self.config.vport, config)
530            .await?;
531
532        Ok(())
533    }
534
535    /// Move filter between VTL2 VF vport and VTL0 VF vport
536    pub async fn move_filter(&self, direction_to_vtl0: u8) -> anyhow::Result<()> {
537        if let Some(to_vtl0) = self.vport_state.get_direction_to_vtl0() {
538            if to_vtl0 == (direction_to_vtl0 == 1) {
539                return Ok(());
540            }
541        }
542        let mut gdma = self.inner.gdma.lock().await;
543        let hwc_activity_id = BnicDriver::new(&mut *gdma, self.inner.dev_id)
544            .move_vport_filter(self.config.vport, direction_to_vtl0)
545            .await?;
546        self.vport_state
547            .set_direction_to_vtl0(direction_to_vtl0 == 1);
548        tracing::info!(
549            mac_address = %self.mac_address(),
550            direction_to_vtl0,
551            hwc_activity_id,
552            "switch data path for mac",
553        );
554        Ok(())
555    }
556
557    /// Get current filter state.
558    pub async fn get_direction_to_vtl0(&self) -> Option<bool> {
559        self.vport_state.get_direction_to_vtl0()
560    }
561
562    /// Set the vport serial number
563    pub async fn set_serial_no(&self, serial_no: u32) -> anyhow::Result<()> {
564        let mut gdma = self.inner.gdma.lock().await;
565        BnicDriver::new(&mut *gdma, self.inner.dev_id)
566            .set_vport_serial_no(self.config.vport, serial_no)
567            .await?;
568        Ok(())
569    }
570
571    /// Gets stats. Note that these are adapter-wide and not really per-vport.
572    pub async fn query_stats(&self) -> anyhow::Result<ManaQueryStatisticsResponse> {
573        let mut gdma = self.inner.gdma.lock().await;
574        BnicDriver::new(&mut *gdma, self.inner.dev_id)
575            .query_stats(STATISTICS_FLAGS_ALL)
576            .await
577    }
578
579    /// Retrieves vport mac filter state from socamana
580    pub async fn query_filter_state(
581        &self,
582        vport: u64,
583    ) -> anyhow::Result<ManaQueryFilterStateResponse> {
584        let mut gdma = self.inner.gdma.lock().await;
585        BnicDriver::new(&mut *gdma, self.inner.dev_id)
586            .query_filter_state(vport)
587            .await
588    }
589
590    /// Destroys resources in `arena`.
591    pub async fn destroy(&self, arena: ResourceArena) {
592        let mut gdma = self.inner.gdma.lock().await;
593        arena.destroy(&mut *gdma).await;
594    }
595
596    /// Changes the target CPU for the given eq to `cpu`.
597    pub async fn retarget_interrupt(
598        &self,
599        eq_id: u32,
600        cpu: u32,
601    ) -> anyhow::Result<Option<DeviceInterrupt>> {
602        let mut gdma = self.inner.gdma.lock().await;
603        gdma.retarget_eq(self.inner.dev_id, eq_id, cpu).await
604    }
605
606    /// Registers for link status notification updates.
607    pub async fn register_link_status_notifier(&self, sender: mesh::Sender<bool>) {
608        let mut vport_link_status = self.inner.vport_link_status.lock().await;
609        let vport_index = self.id as usize;
610        let (send, connected) = match vport_link_status[vport_index] {
611            // Send any pending notifications, whatever it is.
612            LinkStatus::Pending(connected) => (true, connected),
613            // Endpoint reestablishing connection. Only send, if the link is down.
614            LinkStatus::Active { connected, .. } => (!connected, connected),
615            // Don't send anything when transitioning from the default state.
616            _ => (false, true),
617        };
618        if send {
619            sender.send(connected);
620        }
621        vport_link_status[vport_index] = LinkStatus::Active { sender, connected };
622    }
623
624    /// Returns an object that can allocate dma memory to be shared with the device.
625    pub async fn dma_client(&self) -> Arc<dyn DmaClient> {
626        self.inner.gdma.lock().await.device().dma_client()
627    }
628}
629
630/// Transmit configuration.
631pub struct TxConfig {
632    /// The vport offset to include in tx packets.
633    pub tx_vport_offset: u16,
634}
635
636/// An event queue.
637pub struct BnicEq {
638    doorbell: DoorbellPage,
639    mem: MemoryBlock,
640    id: u32,
641    interrupt: DeviceInterrupt,
642}
643
644impl BnicEq {
645    /// The event queue ID.
646    pub fn id(&self) -> u32 {
647        self.id
648    }
649
650    /// The interrupt that will be signaled when the armed event queue is ready.
651    pub fn interrupt(&self) -> DeviceInterrupt {
652        self.interrupt.clone()
653    }
654
655    /// Gets an object to access the queue's entries.
656    pub fn queue(&self) -> queues::Eq {
657        queues::Eq::new_eq(self.mem.clone(), self.doorbell.clone(), self.id)
658    }
659}
660
661/// A work queue (transmit or receive).
662pub struct BnicWq {
663    doorbell: DoorbellPage,
664    wq_mem: MemoryBlock,
665    cq_mem: MemoryBlock,
666    wq_id: u32,
667    cq_id: u32,
668    is_send: bool,
669    wq_obj: u64,
670}
671
672impl BnicWq {
673    /// Gets the work queue for sending requests.
674    pub fn wq(&self) -> queues::Wq {
675        if self.is_send {
676            queues::Wq::new_sq(self.wq_mem.clone(), self.doorbell.clone(), self.wq_id)
677        } else {
678            queues::Wq::new_rq(self.wq_mem.clone(), self.doorbell.clone(), self.wq_id)
679        }
680    }
681
682    /// Gets the completion queue for receiving results.
683    pub fn cq(&self) -> queues::Cq {
684        queues::Cq::new_cq(self.cq_mem.clone(), self.doorbell.clone(), self.cq_id)
685    }
686
687    /// Gets the work queue object ID.
688    pub fn wq_obj(&self) -> u64 {
689        self.wq_obj
690    }
691}