Skip to content

Commit d25ad9c

Browse files
Add support for userspace device drivers with HW offload mode (#322)
* Add support for userspace drivers with HW offload mode Add support for SR-IOV VFs using both hardware offload (switchdev) and userspace device driver such as vfio-pci. Signed-off-by: Taekyung Kim <[email protected]> * Update MAC address if provided in HW offload mode If the MAC address is provided from args, update the MAC address of the VF to the provided MAC address via netlink. Signed-off-by: Taekyung Kim <[email protected]> --------- Signed-off-by: Taekyung Kim <[email protected]>
1 parent 44e6a7b commit d25ad9c

File tree

3 files changed

+185
-52
lines changed

3 files changed

+185
-52
lines changed

pkg/plugin/plugin.go

+44-19
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,15 @@ func CmdAdd(args *skel.CmdArgs) error {
291291
return err
292292
}
293293

294+
// check if the device driver is the type of userspace driver
295+
userspaceMode := false
296+
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
297+
userspaceMode, err = sriov.HasUserspaceDriver(netconf.DeviceID)
298+
if err != nil {
299+
return err
300+
}
301+
}
302+
294303
// removes all ports whose interfaces have an error
295304
if err := cleanPorts(ovsBridgeDriver); err != nil {
296305
return err
@@ -302,8 +311,9 @@ func CmdAdd(args *skel.CmdArgs) error {
302311
}
303312
defer contNetns.Close()
304313

314+
// userspace driver does not create a network interface for the VF on the host
305315
var origIfName string
306-
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
316+
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) && !userspaceMode {
307317
origIfName, err = sriov.GetVFLinkName(netconf.DeviceID)
308318
if err != nil {
309319
return err
@@ -312,13 +322,13 @@ func CmdAdd(args *skel.CmdArgs) error {
312322

313323
// Cache NetConf for CmdDel
314324
if err = utils.SaveCache(config.GetCRef(args.ContainerID, args.IfName),
315-
&types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName}); err != nil {
325+
&types.CachedNetConf{Netconf: netconf, OrigIfName: origIfName, UserspaceMode: userspaceMode}); err != nil {
316326
return fmt.Errorf("error saving NetConf %q", err)
317327
}
318328

319329
var hostIface, contIface *current.Interface
320330
if sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID) {
321-
hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, netconf.MTU, netconf.DeviceID)
331+
hostIface, contIface, err = sriov.SetupSriovInterface(contNetns, args.ContainerID, args.IfName, mac, netconf.MTU, netconf.DeviceID, userspaceMode)
322332
if err != nil {
323333
return err
324334
}
@@ -353,7 +363,9 @@ func CmdAdd(args *skel.CmdArgs) error {
353363
}
354364

355365
// run the IPAM plugin
356-
if netconf.IPAM.Type != "" {
366+
// userspace driver does not support IPAM plugin,
367+
// because there is no network interface for the VF on the host
368+
if netconf.IPAM.Type != "" && !userspaceMode {
357369
var r cnitypes.Result
358370
r, err = ipam.ExecAdd(netconf.IPAM.Type, args.StdinData)
359371
defer func() {
@@ -562,8 +574,11 @@ func CmdDel(args *skel.CmdArgs) error {
562574
// port is already deleted in a previous invocation.
563575
log.Printf("Error: %v\n", err)
564576
}
565-
if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
566-
return err
577+
// there is no network interface in case of userspace driver, so OrigIfName is empty
578+
if !cache.UserspaceMode {
579+
if err = sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
580+
return err
581+
}
567582
}
568583
} else {
569584
// In accordance with the spec we clean up as many resources as possible.
@@ -591,11 +606,14 @@ func CmdDel(args *skel.CmdArgs) error {
591606
}
592607

593608
if sriov.IsOvsHardwareOffloadEnabled(cache.Netconf.DeviceID) {
594-
err = sriov.ReleaseVF(args, cache.OrigIfName)
595-
if err != nil {
596-
// try to reset vf into original state as much as possible in case of error
597-
if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
598-
log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err)
609+
// there is no network interface in case of userspace driver, so OrigIfName is empty
610+
if !cache.UserspaceMode {
611+
err = sriov.ReleaseVF(args, cache.OrigIfName)
612+
if err != nil {
613+
// try to reset vf into original state as much as possible in case of error
614+
if err := sriov.ResetVF(args, cache.Netconf.DeviceID, cache.OrigIfName); err != nil {
615+
log.Printf("Failed best-effort cleanup of VF %s: %v", cache.OrigIfName, err)
616+
}
599617
}
600618
}
601619
} else {
@@ -633,14 +651,6 @@ func CmdCheck(args *skel.CmdArgs) error {
633651
}
634652
ovsHWOffloadEnable := sriov.IsOvsHardwareOffloadEnabled(netconf.DeviceID)
635653

636-
// run the IPAM plugin
637-
if netconf.NetConf.IPAM.Type != "" {
638-
err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData)
639-
if err != nil {
640-
return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err)
641-
}
642-
}
643-
644654
envArgs, err := getEnvArgs(args.Args)
645655
if err != nil {
646656
return err
@@ -672,6 +682,21 @@ func CmdCheck(args *skel.CmdArgs) error {
672682
return err
673683
}
674684

685+
// TODO: CmdCheck for userspace driver
686+
if cache.UserspaceMode {
687+
return nil
688+
}
689+
690+
// run the IPAM plugin
691+
// userspace driver does not support IPAM plugin,
692+
// because there is no network interface for the VF on the host
693+
if netconf.NetConf.IPAM.Type != "" && !cache.UserspaceMode {
694+
err = ipam.ExecCheck(netconf.NetConf.IPAM.Type, args.StdinData)
695+
if err != nil {
696+
return fmt.Errorf("failed to check with IPAM plugin type %q: %v", netconf.NetConf.IPAM.Type, err)
697+
}
698+
}
699+
675700
// Parse previous result.
676701
if netconf.NetConf.RawPrevResult == nil {
677702
return fmt.Errorf("Required prevResult missing")

pkg/sriov/sriov.go

+135-29
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package sriov
1919

2020
import (
2121
"fmt"
22+
"net"
2223
"os"
2324
"path/filepath"
2425

@@ -32,7 +33,8 @@ import (
3233

3334
var (
3435
// SysBusPci is sysfs pci device directory
35-
SysBusPci = "/sys/bus/pci/devices"
36+
SysBusPci = "/sys/bus/pci/devices"
37+
UserspaceDrivers = []string{"vfio-pci", "uio_pci_generic", "igb_uio"}
3638
)
3739

3840
// GetVFLinkName retrives interface name for given pci address
@@ -66,6 +68,27 @@ func IsOvsHardwareOffloadEnabled(deviceID string) bool {
6668
return deviceID != ""
6769
}
6870

71+
// HasUserspaceDriver checks if a device is attached to userspace driver
72+
// This method is copied from https://github.com/k8snetworkplumbingwg/sriov-cni/blob/8af83a33b2cac8e2df0bd6276b76658eb7c790ab/pkg/utils/utils.go#L222
73+
func HasUserspaceDriver(pciAddr string) (bool, error) {
74+
driverLink := filepath.Join(SysBusPci, pciAddr, "driver")
75+
driverPath, err := filepath.EvalSymlinks(driverLink)
76+
if err != nil {
77+
return false, err
78+
}
79+
driverStat, err := os.Stat(driverPath)
80+
if err != nil {
81+
return false, err
82+
}
83+
driverName := driverStat.Name()
84+
for _, drv := range UserspaceDrivers {
85+
if driverName == drv {
86+
return true, nil
87+
}
88+
}
89+
return false, nil
90+
}
91+
6992
// GetBridgeUplinkNameByDeviceID tries to automatically resolve uplink interface name
7093
// for provided VF deviceID by following the sequence:
7194
// VF pci address > PF pci address > Bond (optional, if PF is part of a bond)
@@ -159,48 +182,33 @@ func GetNetRepresentor(deviceID string) (string, error) {
159182
return rep, nil
160183
}
161184

162-
// SetupSriovInterface moves smartVF into container namespace, rename it with ifName and also returns host interface with VF's representor device
163-
func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int, deviceID string) (*current.Interface, *current.Interface, error) {
164-
hostIface := &current.Interface{}
165-
contIface := &current.Interface{}
166-
185+
// setupKernelSriovContIface moves smartVF into container namespace,
186+
// configures the smartVF and also fills in the contIface fields
187+
func setupKernelSriovContIface(contNetns ns.NetNS, contIface *current.Interface, deviceID string, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr, mtu int) error {
167188
// get smart VF netdevice from PCI
168189
vfNetdevices, err := sriovnet.GetNetDevicesFromPci(deviceID)
169190
if err != nil {
170-
return nil, nil, err
191+
return err
171192
}
172193

173194
// Make sure we have 1 netdevice per pci address
174195
if len(vfNetdevices) != 1 {
175-
return nil, nil, fmt.Errorf("failed to get one netdevice interface per %s", deviceID)
196+
return fmt.Errorf("failed to get one netdevice interface per %s", deviceID)
176197
}
177198
vfNetdevice := vfNetdevices[0]
178199

179-
// network representor device for smartvf
180-
rep, err := GetNetRepresentor(deviceID)
181-
if err != nil {
182-
return nil, nil, err
183-
}
184-
185-
hostIface.Name = rep
186-
187-
link, err := netlink.LinkByName(hostIface.Name)
188-
if err != nil {
189-
return nil, nil, err
190-
}
191-
hostIface.Mac = link.Attrs().HardwareAddr.String()
192-
193-
// set MTU on smart VF representor
194-
if mtu != 0 {
195-
if err = netlink.LinkSetMTU(link, mtu); err != nil {
196-
return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err)
200+
// if MAC address is provided, set it to the VF by using PF netlink
201+
// which is accessible in the host namespace, not in the container namespace
202+
if hwaddr != nil {
203+
if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil {
204+
return err
197205
}
198206
}
199207

200208
// Move smart VF to Container namespace
201209
err = moveIfToNetns(vfNetdevice, contNetns)
202210
if err != nil {
203-
return nil, nil, err
211+
return err
204212
}
205213

206214
err = contNetns.Do(func(hostNS ns.NetNS) error {
@@ -209,10 +217,20 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int
209217
if err != nil {
210218
return err
211219
}
212-
link, err = netlink.LinkByName(contIface.Name)
220+
link, err := netlink.LinkByName(contIface.Name)
213221
if err != nil {
214222
return err
215223
}
224+
// if MAC address is provided, set it to the kernel VF netdevice
225+
// otherwise, read the MAC address from the kernel VF netdevice
226+
if hwaddr != nil {
227+
if err = netlink.LinkSetHardwareAddr(link, hwaddr); err != nil {
228+
return err
229+
}
230+
contIface.Mac = hwaddr.String()
231+
} else {
232+
contIface.Mac = link.Attrs().HardwareAddr.String()
233+
}
216234
if mtu != 0 {
217235
if err = netlink.LinkSetMTU(link, mtu); err != nil {
218236
return err
@@ -223,13 +241,101 @@ func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName string, mtu int
223241
return err
224242
}
225243
contIface.Sandbox = contNetns.Path()
226-
contIface.Mac = link.Attrs().HardwareAddr.String()
227244

228245
return nil
229246
})
247+
if err != nil {
248+
return err
249+
}
250+
251+
return nil
252+
}
253+
254+
// setupUserspaceSriovContIface configures smartVF via PF netlink and fills in the contIface fields
255+
func setupUserspaceSriovContIface(contNetns ns.NetNS, contIface *current.Interface, pfLink netlink.Link, vfIdx int, ifName string, hwaddr net.HardwareAddr) error {
256+
contIface.Name = ifName
257+
contIface.Sandbox = contNetns.Path()
258+
259+
// if MAC address is provided, set it to the VF by using PF netlink
260+
if hwaddr != nil {
261+
if err := netlink.LinkSetVfHardwareAddr(pfLink, vfIdx, hwaddr); err != nil {
262+
return err
263+
}
264+
contIface.Mac = hwaddr.String()
265+
} else {
266+
vfInfo := pfLink.Attrs().Vfs[vfIdx]
267+
contIface.Mac = vfInfo.Mac.String()
268+
}
269+
270+
return nil
271+
}
272+
273+
// SetupSriovInterface configures smartVF and returns VF's representor device as host interface and VF's netdevice as container interface
274+
func SetupSriovInterface(contNetns ns.NetNS, containerID, ifName, mac string, mtu int, deviceID string, userspaceMode bool) (*current.Interface, *current.Interface, error) {
275+
hostIface := &current.Interface{}
276+
contIface := &current.Interface{}
277+
278+
// network representor device for smartvf
279+
rep, err := GetNetRepresentor(deviceID)
280+
if err != nil {
281+
return nil, nil, err
282+
}
283+
284+
hostIface.Name = rep
285+
286+
link, err := netlink.LinkByName(hostIface.Name)
287+
if err != nil {
288+
return nil, nil, err
289+
}
290+
hostIface.Mac = link.Attrs().HardwareAddr.String()
291+
292+
// get PF netlink and VF index from PCI address
293+
pfIface, err := sriovnet.GetUplinkRepresentor(deviceID)
230294
if err != nil {
231295
return nil, nil, err
232296
}
297+
pfLink, err := netlink.LinkByName(pfIface)
298+
if err != nil {
299+
return nil, nil, err
300+
}
301+
vfIdx, err := sriovnet.GetVfIndexByPciAddress(deviceID)
302+
if err != nil {
303+
return nil, nil, err
304+
}
305+
306+
// make sure PF netlink and VF index are valid
307+
if len(pfLink.Attrs().Vfs) < vfIdx || pfLink.Attrs().Vfs[vfIdx].ID != vfIdx {
308+
return nil, nil, fmt.Errorf("failed to get vf info from %s at index %d with Vfs %v", pfIface, vfIdx, pfLink.Attrs().Vfs)
309+
}
310+
311+
// parse MAC address if provided from args as described
312+
// in the CNI spec (https://github.com/containernetworking/cni/blob/main/CONVENTIONS.md)
313+
var hwaddr net.HardwareAddr
314+
if mac != "" {
315+
hwaddr, err = net.ParseMAC(mac)
316+
if err != nil {
317+
return nil, nil, fmt.Errorf("failed to parse MAC address %q: %v", mac, err)
318+
}
319+
}
320+
321+
// set MTU on smart VF representor
322+
if mtu != 0 {
323+
if err = netlink.LinkSetMTU(link, mtu); err != nil {
324+
return nil, nil, fmt.Errorf("failed to set MTU on %s: %v", hostIface.Name, err)
325+
}
326+
}
327+
328+
if !userspaceMode {
329+
// configure the smart VF netdevice directly in the container namespace
330+
if err = setupKernelSriovContIface(contNetns, contIface, deviceID, pfLink, vfIdx, ifName, hwaddr, mtu); err != nil {
331+
return nil, nil, err
332+
}
333+
} else {
334+
// configure the smart VF netdevice via PF netlink
335+
if err = setupUserspaceSriovContIface(contNetns, contIface, pfLink, vfIdx, ifName, hwaddr); err != nil {
336+
return nil, nil, err
337+
}
338+
}
233339

234340
return hostIface, contIface, nil
235341
}

pkg/types/types.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,15 @@ type Trunk struct {
7171
ID *uint `json:"id,omitempty"`
7272
}
7373

74-
// CachedNetConf containing NetConfig and original smartnic vf interface
75-
// name (set only in case of ovs hareware offload scenario).
74+
// CachedNetConf containing NetConfig, original smartnic vf interface name
75+
// and kernel/userspace device driver mode of the smartnic vf interface
76+
// (the last two are set only in case of ovs hareware offload scenario).
7677
// this is intended to be used only for storing and retrieving config
7778
// to/from a data store (example file cache).
7879
type CachedNetConf struct {
79-
Netconf *NetConf
80-
OrigIfName string
80+
Netconf *NetConf
81+
OrigIfName string
82+
UserspaceMode bool
8183
}
8284

8385
// CachedPrevResultNetConf containing PrevResult.

0 commit comments

Comments
 (0)