mana_driver/
mana.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! The interface to the MANA device.
5
6pub use crate::bnic_driver::RxConfig;
7pub use crate::resources::ResourceArena;
8pub use crate::save_restore::ManaDeviceSavedState;
9
10use crate::bnic_driver::BnicDriver;
11use crate::bnic_driver::WqConfig;
12use crate::gdma_driver::GdmaDriver;
13use crate::queues;
14use crate::queues::Doorbell;
15use crate::queues::DoorbellPage;
16use anyhow::Context;
17use futures::StreamExt;
18use futures::lock::Mutex;
19use gdma_defs::GdmaDevId;
20use gdma_defs::GdmaDevType;
21use gdma_defs::GdmaQueueType;
22use gdma_defs::GdmaRegisterDeviceResp;
23use gdma_defs::bnic::ManaQueryDeviceCfgResp;
24use gdma_defs::bnic::ManaQueryFilterStateResponse;
25use gdma_defs::bnic::ManaQueryStatisticsResponse;
26use gdma_defs::bnic::ManaQueryVportCfgResp;
27use gdma_defs::bnic::STATISTICS_FLAGS_ALL;
28use inspect::Inspect;
29use net_backend_resources::mac_address::MacAddress;
30use pal_async::driver::SpawnDriver;
31use pal_async::task::Spawn;
32use pal_async::task::Task;
33use std::sync::Arc;
34use tracing::Instrument;
35use user_driver::DeviceBacking;
36use user_driver::DmaClient;
37use user_driver::DmaPool;
38use user_driver::interrupt::DeviceInterrupt;
39use user_driver::memory::MemoryBlock;
40use user_driver::memory::PAGE_SIZE;
41use vmcore::vm_task::VmTaskDriverSource;
42
43enum LinkStatus {
44    Default,
45    Pending(bool),
46    Active {
47        sender: mesh::Sender<bool>,
48        connected: bool,
49    },
50}
51
52/// A MANA device.
53#[derive(Inspect)]
54pub struct ManaDevice<T: DeviceBacking> {
55    #[inspect(skip)]
56    inner: Arc<Inner<T>>,
57    #[inspect(skip)]
58    inspect_task: Task<()>,
59    #[inspect(skip)]
60    hwc_task: Option<Task<()>>,
61    #[inspect(flatten, send = "|x| x")]
62    inspect_send: mesh::Sender<inspect::Deferred>,
63}
64
65struct Inner<T: DeviceBacking> {
66    gdma: Mutex<GdmaDriver<T>>,
67    dev_id: GdmaDevId,
68    dev_data: GdmaRegisterDeviceResp,
69    dev_config: ManaQueryDeviceCfgResp,
70    doorbell: Arc<dyn Doorbell>,
71    vport_link_status: Arc<Mutex<Vec<LinkStatus>>>,
72}
73
74impl<T: DeviceBacking> ManaDevice<T> {
75    /// Initializes the MANA driver on `device`.
76    pub async fn new(
77        driver: &impl SpawnDriver,
78        device: T,
79        num_vps: u32,
80        max_queues_per_vport: u16,
81        mana_state: Option<&ManaDeviceSavedState>,
82    ) -> anyhow::Result<Self> {
83        let mut gdma = if let Some(mana_state) = mana_state {
84            let memory = device.dma_client().attach_pending_buffers()?;
85            let gdma_memory = memory
86                .iter()
87                .find(|m| m.pfns()[0] == mana_state.gdma.mem.base_pfn)
88                .expect("gdma restored memory not found")
89                .clone();
90
91            GdmaDriver::restore(mana_state.gdma.clone(), device, gdma_memory)
92                .instrument(tracing::info_span!("restore_gdma_driver"))
93                .await?
94        } else {
95            GdmaDriver::new(driver, device, num_vps, None)
96                .instrument(tracing::info_span!("new_gdma_driver"))
97                .await?
98        };
99
100        gdma.test_eq().await?;
101
102        gdma.verify_vf_driver_version().await?;
103
104        let dev_id = gdma
105            .list_devices()
106            .await?
107            .iter()
108            .copied()
109            .find(|dev_id| dev_id.ty == GdmaDevType::GDMA_DEVICE_MANA)
110            .context("no mana device found")?;
111
112        let dev_data = if let Some(mana_state) = mana_state {
113            GdmaRegisterDeviceResp {
114                pdid: mana_state.gdma.pdid,
115                gpa_mkey: mana_state.gdma.gpa_mkey,
116                db_id: mana_state.gdma.db_id as u32,
117            }
118        } else {
119            gdma.register_device(dev_id).await?
120        };
121
122        let mut bnic = BnicDriver::new(&mut gdma, dev_id);
123        let dev_config = bnic.query_dev_config().await?;
124        tracing::info!(mana_dev_config = ?dev_config);
125        let num_queues_needed = dev_config.max_num_vports as u32 * max_queues_per_vport as u32;
126        gdma.check_vf_resources(num_vps, num_queues_needed);
127
128        let doorbell = gdma.doorbell();
129        let vport_link_status = (0..dev_config.max_num_vports)
130            .map(|_| LinkStatus::Default)
131            .collect();
132        let inner = Arc::new(Inner {
133            gdma: Mutex::new(gdma),
134            dev_id,
135            dev_data,
136            dev_config,
137            doorbell,
138            vport_link_status: Arc::new(Mutex::new(vport_link_status)),
139        });
140
141        let (inspect_send, mut inspect_recv) = mesh::channel::<inspect::Deferred>();
142        let inspect_task = driver.spawn("mana-inspect", {
143            let inner = inner.clone();
144            async move {
145                while let Some(deferred) = inspect_recv.next().await {
146                    let Inner {
147                        gdma,
148                        dev_id: _,
149                        dev_data: _,
150                        dev_config: _,
151                        doorbell: _,
152                        vport_link_status: _,
153                    } = inner.as_ref();
154                    let gdma = gdma.lock().await;
155                    deferred.respond(|resp| {
156                        resp.merge(&*gdma);
157                    })
158                }
159            }
160        });
161
162        let device = Self {
163            inner,
164            inspect_send,
165            inspect_task,
166            hwc_task: None,
167        };
168        Ok(device)
169    }
170
171    /// Saves the device's state for servicing
172    pub async fn save(self) -> (anyhow::Result<ManaDeviceSavedState>, T) {
173        self.inspect_task.cancel().await;
174        if let Some(hwc_task) = self.hwc_task {
175            hwc_task.cancel().await;
176        }
177        let inner = Arc::into_inner(self.inner).unwrap();
178        let mut driver = inner.gdma.into_inner();
179
180        if let Ok(saved_state) = driver.save().await {
181            let mana_saved_state = ManaDeviceSavedState { gdma: saved_state };
182
183            (Ok(mana_saved_state), driver.into_device())
184        } else {
185            tracing::error!("Failed to save MANA device state");
186            (
187                Err(anyhow::anyhow!("Failed to save MANA device state")),
188                driver.into_device(),
189            )
190        }
191    }
192
193    /// Returns the number of vports the device supports.
194    pub fn num_vports(&self) -> u32 {
195        self.inner.dev_config.max_num_vports.into()
196    }
197
198    /// Returns the device configuration.
199    pub fn dev_config(&self) -> &ManaQueryDeviceCfgResp {
200        &self.inner.dev_config
201    }
202
203    /// Starts a hardware channel (HWC) task that listens to events on the HWC
204    /// and calls the appropriate provided callsbacks/closure.
205    pub async fn start_notification_task(&mut self, driver_source: &VmTaskDriverSource) {
206        if self.hwc_task.is_some() {
207            return;
208        }
209
210        let inner = self.inner.clone();
211        let hwc_task = driver_source.simple().spawn("mana-hwc", {
212            let mut gdma = self.inner.gdma.lock().await;
213            let mut hwc_event = gdma.hwc_subscribe();
214            async move {
215                loop {
216                    hwc_event.wait().await;
217                    let mut gdma = inner.gdma.lock().await;
218                    if gdma.process_all_eqs() {
219                        let mut vport_link_status = inner.vport_link_status.lock().await;
220                        for (vport_index, current) in gdma.get_link_toggle_list() {
221                            let vport_index = vport_index as usize;
222                            if vport_index >= vport_link_status.len() {
223                                tracing::error!(vport_index, "Invalid vport index");
224                                continue;
225                            }
226                            if let LinkStatus::Active { sender, connected } =
227                                &mut vport_link_status[vport_index]
228                            {
229                                *connected = current;
230                                sender.send(*connected);
231                            } else {
232                                let _ = std::mem::replace(
233                                    &mut vport_link_status[vport_index],
234                                    LinkStatus::Pending(current),
235                                );
236                            }
237                        }
238                    }
239                }
240            }
241        });
242        self.hwc_task = Some(hwc_task);
243    }
244
245    /// Initializes and returns the vport number `index`.
246    pub async fn new_vport(
247        &self,
248        index: u32,
249        vport_state: Option<VportState>,
250        dev_config: &ManaQueryDeviceCfgResp,
251    ) -> anyhow::Result<Vport<T>> {
252        let vport_config = self.query_vport_config(index).await?;
253
254        let vport_state = vport_state.unwrap_or(VportState::new(None, None));
255
256        let vport = Vport {
257            inner: self.inner.clone(),
258            config: vport_config,
259            vport_state,
260            id: index,
261        };
262
263        if dev_config.cap_filter_state_query() {
264            if let Ok(resp) = vport.query_filter_state(vport.id.into()).await {
265                tracing::debug!(
266                    mac_address = %vport.mac_address(),
267                    direction_to_vtl0 = resp.direction_to_vtl0,
268                    "query_filter_state"
269                );
270                vport
271                    .vport_state
272                    .set_direction_to_vtl0(resp.direction_to_vtl0 == 1);
273            }
274        }
275
276        Ok(vport)
277    }
278
279    /// Shuts the device down.
280    pub async fn shutdown(self) -> (anyhow::Result<()>, T) {
281        self.inspect_task.cancel().await;
282        if let Some(hwc_task) = self.hwc_task {
283            hwc_task.cancel().await;
284        }
285        let inner = Arc::into_inner(self.inner).unwrap();
286        let mut driver = inner.gdma.into_inner();
287        let result = driver.deregister_device(inner.dev_id).await;
288        (result, driver.into_device())
289    }
290    /// Queries the configuration of a specific vport.
291    pub async fn query_vport_config(&self, vport: u32) -> anyhow::Result<ManaQueryVportCfgResp> {
292        let mut gdma = self.inner.gdma.lock().await;
293        BnicDriver::new(&mut *gdma, self.inner.dev_id)
294            .query_vport_config(vport)
295            .await
296    }
297}
298
299/// Tracks vport state and optionally notifies a listener of changes.
300#[derive(Clone)]
301pub struct VportState {
302    direction_to_vtl0: Arc<parking_lot::Mutex<Option<bool>>>,
303    state_change_callback: Arc<Option<Box<dyn Fn(bool) + Send + Sync>>>,
304}
305
306impl VportState {
307    /// Create a new VportState instance.
308    pub fn new(
309        direction_to_vtl0: Option<bool>,
310        state_change_callback: Option<Box<dyn Fn(bool) + Send + Sync>>,
311    ) -> Self {
312        Self {
313            direction_to_vtl0: Arc::new(parking_lot::Mutex::new(direction_to_vtl0)),
314            state_change_callback: Arc::new(state_change_callback),
315        }
316    }
317
318    /// Remember current filter setting.
319    pub fn set_direction_to_vtl0(&self, direction_to_vtl0: bool) {
320        *self.direction_to_vtl0.lock() = Some(direction_to_vtl0);
321        if let Some(callback) = self.state_change_callback.as_ref() {
322            (callback)(direction_to_vtl0);
323        }
324    }
325
326    /// Get current filter setting if known.
327    pub fn get_direction_to_vtl0(&self) -> Option<bool> {
328        *self.direction_to_vtl0.lock()
329    }
330}
331
332/// A MANA vport.
333pub struct Vport<T: DeviceBacking> {
334    inner: Arc<Inner<T>>,
335    config: ManaQueryVportCfgResp,
336    vport_state: VportState,
337    id: u32,
338}
339
340impl<T: DeviceBacking> Vport<T> {
341    /// Returns the maximum number of transmit queues.
342    pub fn max_tx_queues(&self) -> u32 {
343        self.config.max_num_sq
344    }
345
346    /// Returns the maximum number of receive queues.
347    pub fn max_rx_queues(&self) -> u32 {
348        self.config.max_num_rq
349    }
350
351    /// Returns the assigned MAC address.
352    pub fn mac_address(&self) -> MacAddress {
353        self.config.mac_addr.into()
354    }
355
356    /// Returns the memory key to refer to all of GPA space.
357    pub fn gpa_mkey(&self) -> u32 {
358        self.inner.dev_data.gpa_mkey
359    }
360
361    /// Returns this vport's id
362    pub fn id(&self) -> u32 {
363        self.id
364    }
365
366    /// Returns the number of indirection entries supported by the vport
367    pub fn num_indirection_ent(&self) -> u32 {
368        self.config.num_indirection_ent
369    }
370
371    /// Creates a new event queue.
372    pub async fn new_eq(
373        &self,
374        arena: &mut ResourceArena,
375        size: u32,
376        cpu: u32,
377    ) -> anyhow::Result<BnicEq> {
378        let mut gdma = self.inner.gdma.lock().await;
379        let dma_client = gdma.device().dma_client_for(DmaPool::Ephemeral)?;
380        let mem = dma_client
381            .allocate_dma_buffer(size as usize)
382            .context("Failed to allocate DMA buffer")?;
383
384        let gdma_region = gdma
385            .create_dma_region(arena, self.inner.dev_id, mem.clone())
386            .await
387            .context("failed to create eq dma region")?;
388        let (id, interrupt) = gdma
389            .create_eq(
390                arena,
391                self.inner.dev_id,
392                gdma_region,
393                size,
394                self.inner.dev_data.pdid,
395                self.inner.dev_data.db_id,
396                cpu,
397            )
398            .await
399            .context("failed to create eq")?;
400        Ok(BnicEq {
401            doorbell: DoorbellPage::new(self.inner.doorbell.clone(), self.inner.dev_data.db_id)?,
402            mem,
403            id,
404            interrupt,
405        })
406    }
407
408    /// Creates a new work queue (transmit or receive).
409    pub async fn new_wq(
410        &self,
411        arena: &mut ResourceArena,
412        is_send: bool,
413        wq_size: u32,
414        cq_size: u32,
415        eq_id: u32,
416    ) -> anyhow::Result<BnicWq> {
417        assert!(wq_size >= PAGE_SIZE as u32 && wq_size.is_power_of_two());
418        assert!(cq_size >= PAGE_SIZE as u32 && cq_size.is_power_of_two());
419        let mut gdma = self.inner.gdma.lock().await;
420
421        let dma_client = gdma.device().dma_client_for(DmaPool::Ephemeral)?;
422
423        let mem = dma_client
424            .allocate_dma_buffer((wq_size + cq_size) as usize)
425            .context("failed to allocate DMA buffer")?;
426
427        let wq_mem = mem.subblock(0, wq_size as usize);
428        let cq_mem = mem.subblock(wq_size as usize, cq_size as usize);
429
430        let wq_gdma_region = gdma
431            .create_dma_region(arena, self.inner.dev_id, wq_mem.clone())
432            .await?;
433        let cq_gdma_region = gdma
434            .create_dma_region(arena, self.inner.dev_id, cq_mem.clone())
435            .await?;
436        let wq_type = if is_send {
437            GdmaQueueType::GDMA_SQ
438        } else {
439            GdmaQueueType::GDMA_RQ
440        };
441        let doorbell = DoorbellPage::new(self.inner.doorbell.clone(), self.inner.dev_data.db_id)?;
442        let resp = BnicDriver::new(&mut *gdma, self.inner.dev_id)
443            .create_wq_obj(
444                arena,
445                self.config.vport,
446                wq_type,
447                &WqConfig {
448                    wq_gdma_region,
449                    cq_gdma_region,
450                    wq_size,
451                    cq_size,
452                    cq_moderation_ctx_id: 0,
453                    eq_id,
454                },
455            )
456            .await?;
457
458        Ok(BnicWq {
459            doorbell,
460            wq_mem,
461            cq_mem,
462            wq_id: resp.wq_id,
463            cq_id: resp.cq_id,
464            is_send,
465            wq_obj: resp.wq_obj,
466        })
467    }
468
469    /// Get the transmit configuration.
470    pub async fn config_tx(&self) -> anyhow::Result<TxConfig> {
471        let mut gdma = self.inner.gdma.lock().await;
472        let resp = BnicDriver::new(&mut *gdma, self.inner.dev_id)
473            .config_vport_tx(
474                self.config.vport,
475                self.inner.dev_data.pdid,
476                self.inner.dev_data.db_id,
477            )
478            .await?;
479
480        let config = TxConfig {
481            tx_vport_offset: resp.tx_vport_offset,
482        };
483        Ok(config)
484    }
485
486    /// Sets the receive configuration.
487    pub async fn config_rx(&self, config: &RxConfig<'_>) -> anyhow::Result<()> {
488        let mut gdma = self.inner.gdma.lock().await;
489        BnicDriver::new(&mut *gdma, self.inner.dev_id)
490            .config_vport_rx(self.config.vport, config)
491            .await?;
492
493        Ok(())
494    }
495
496    /// Move filter between VTL2 VF vport and VTL0 VF vport
497    pub async fn move_filter(&self, direction_to_vtl0: u8) -> anyhow::Result<()> {
498        if let Some(to_vtl0) = self.vport_state.get_direction_to_vtl0() {
499            if to_vtl0 == (direction_to_vtl0 == 1) {
500                return Ok(());
501            }
502        }
503        let mut gdma = self.inner.gdma.lock().await;
504        let hwc_activity_id = BnicDriver::new(&mut *gdma, self.inner.dev_id)
505            .move_vport_filter(self.config.vport, direction_to_vtl0)
506            .await?;
507        self.vport_state
508            .set_direction_to_vtl0(direction_to_vtl0 == 1);
509        tracing::info!(
510            mac_address = %self.mac_address(),
511            direction_to_vtl0,
512            hwc_activity_id,
513            "switch data path for mac",
514        );
515        Ok(())
516    }
517
518    /// Get current filter state.
519    pub async fn get_direction_to_vtl0(&self) -> Option<bool> {
520        self.vport_state.get_direction_to_vtl0()
521    }
522
523    /// Set the vport serial number
524    pub async fn set_serial_no(&self, serial_no: u32) -> anyhow::Result<()> {
525        let mut gdma = self.inner.gdma.lock().await;
526        BnicDriver::new(&mut *gdma, self.inner.dev_id)
527            .set_vport_serial_no(self.config.vport, serial_no)
528            .await?;
529        Ok(())
530    }
531
532    /// Gets stats. Note that these are adapter-wide and not really per-vport.
533    pub async fn query_stats(&self) -> anyhow::Result<ManaQueryStatisticsResponse> {
534        let mut gdma = self.inner.gdma.lock().await;
535        BnicDriver::new(&mut *gdma, self.inner.dev_id)
536            .query_stats(STATISTICS_FLAGS_ALL)
537            .await
538    }
539
540    /// Retrieves vport mac filter state from socamana
541    pub async fn query_filter_state(
542        &self,
543        vport: u64,
544    ) -> anyhow::Result<ManaQueryFilterStateResponse> {
545        let mut gdma = self.inner.gdma.lock().await;
546        BnicDriver::new(&mut *gdma, self.inner.dev_id)
547            .query_filter_state(vport)
548            .await
549    }
550
551    /// Destroys resources in `arena`.
552    pub async fn destroy(&self, arena: ResourceArena) {
553        let mut gdma = self.inner.gdma.lock().await;
554        arena.destroy(&mut *gdma).await;
555    }
556
557    /// Changes the target CPU for the given eq to `cpu`.
558    pub async fn retarget_interrupt(
559        &self,
560        eq_id: u32,
561        cpu: u32,
562    ) -> anyhow::Result<Option<DeviceInterrupt>> {
563        let mut gdma = self.inner.gdma.lock().await;
564        gdma.retarget_eq(self.inner.dev_id, eq_id, cpu).await
565    }
566
567    /// Registers for link status notification updates.
568    pub async fn register_link_status_notifier(&self, sender: mesh::Sender<bool>) {
569        let mut vport_link_status = self.inner.vport_link_status.lock().await;
570        let vport_index = self.id as usize;
571        let (send, connected) = match vport_link_status[vport_index] {
572            // Send any pending notifications, whatever it is.
573            LinkStatus::Pending(connected) => (true, connected),
574            // Endpoint reestablishing connection. Only send, if the link is down.
575            LinkStatus::Active { connected, .. } => (!connected, connected),
576            // Don't send anything when transitioning from the default state.
577            _ => (false, true),
578        };
579        if send {
580            sender.send(connected);
581        }
582        vport_link_status[vport_index] = LinkStatus::Active { sender, connected };
583    }
584
585    /// Returns an object that can allocate dma memory to be shared with the device.
586    pub async fn dma_client(&self) -> Arc<dyn DmaClient> {
587        self.inner.gdma.lock().await.device().dma_client()
588    }
589}
590
591/// Transmit configuration.
592pub struct TxConfig {
593    /// The vport offset to include in tx packets.
594    pub tx_vport_offset: u16,
595}
596
597/// An event queue.
598pub struct BnicEq {
599    doorbell: DoorbellPage,
600    mem: MemoryBlock,
601    id: u32,
602    interrupt: DeviceInterrupt,
603}
604
605impl BnicEq {
606    /// The event queue ID.
607    pub fn id(&self) -> u32 {
608        self.id
609    }
610
611    /// The interrupt that will be signaled when the armed event queue is ready.
612    pub fn interrupt(&self) -> DeviceInterrupt {
613        self.interrupt.clone()
614    }
615
616    /// Gets an object to access the queue's entries.
617    pub fn queue(&self) -> queues::Eq {
618        queues::Eq::new_eq(self.mem.clone(), self.doorbell.clone(), self.id)
619    }
620}
621
622/// A work queue (transmit or receive).
623pub struct BnicWq {
624    doorbell: DoorbellPage,
625    wq_mem: MemoryBlock,
626    cq_mem: MemoryBlock,
627    wq_id: u32,
628    cq_id: u32,
629    is_send: bool,
630    wq_obj: u64,
631}
632
633impl BnicWq {
634    /// Gets the work queue for sending requests.
635    pub fn wq(&self) -> queues::Wq {
636        if self.is_send {
637            queues::Wq::new_sq(self.wq_mem.clone(), self.doorbell.clone(), self.wq_id)
638        } else {
639            queues::Wq::new_rq(self.wq_mem.clone(), self.doorbell.clone(), self.wq_id)
640        }
641    }
642
643    /// Gets the completion queue for receiving results.
644    pub fn cq(&self) -> queues::Cq {
645        queues::Cq::new_cq(self.cq_mem.clone(), self.doorbell.clone(), self.cq_id)
646    }
647
648    /// Gets the work queue object ID.
649    pub fn wq_obj(&self) -> u64 {
650        self.wq_obj
651    }
652}