From 284c581845201d89c9c2f4934ae5a338bcc23f61 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 15 Jun 2021 00:23:15 -0400 Subject: [PATCH 01/43] Ensure shutdown migrations actually time out Without this a VM that fails to respond to a shutdown will just spin forever, blocking state changes. --- node-daemon/pvcnoded/VMInstance.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index 2d93442f..e00f967e 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -531,8 +531,20 @@ class VMInstance(object): self.zkhandler.write([ (('domain.state', self.domuuid), 'shutdown') ]) + + ticks = 0 while self.zkhandler.read(('domain.state', self.domuuid)) != 'stop': + ticks += 1 + if ticks > self.config['vm_shutdown_timeout'] * 2: + # We've hit the timeout, forcibly stop the VM and continue + self.zkhandler.write([ + (('domain.state', self.domuuid), 'stop') + ]) + # Wait 1/2 of a second for the state propagation + time.sleep(0.5) + break time.sleep(0.5) + return True do_migrate_shutdown = False From f540dd320b7ecd68b38b7f51a44fb5700beae115 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 15 Jun 2021 00:27:01 -0400 Subject: [PATCH 02/43] Allow VNI for "direct" type vNICs --- daemon-common/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/daemon-common/common.py b/daemon-common/common.py index 335c6afe..b9c0e38d 100644 --- a/daemon-common/common.py +++ b/daemon-common/common.py @@ -397,9 +397,13 @@ def getDomainNetworks(parsed_xml, stats_data): net_wr_packets = net_stats.get('wr_packets', 0) net_wr_errors = net_stats.get('wr_errors', 0) net_wr_drops = net_stats.get('wr_drops', 0) + if net_type in ['direct']: + net_vni = device.source.attrib.get('dev') + else: + net_vni = re_match(r'[vm]*br([0-9a-z]+)', net_bridge).group(1) net_obj = { 'type': net_type, - 'vni': re_match(r'[vm]*br([0-9a-z]+)', net_bridge).group(1), + 'vni': net_vni, 'mac': net_mac, 'source': net_bridge, 'model': net_model, From da48304d4a41a8c447360279de9f00d6db697dbd Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 15 Jun 2021 00:31:13 -0400 Subject: [PATCH 03/43] Avoid hackery in VNI list and support direct type --- client-cli/cli_lib/vm.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/client-cli/cli_lib/vm.py b/client-cli/cli_lib/vm.py index e316995e..a6c1fe14 100644 --- a/client-cli/cli_lib/vm.py +++ b/client-cli/cli_lib/vm.py @@ -1242,13 +1242,7 @@ def format_list(config, vm_list, raw): # Network list net_list = [] for net in domain_information['networks']: - # Split out just the numerical (VNI) part of the brXXXX name - net_vnis = re.findall(r'\d+', net['source']) - if net_vnis: - net_vni = net_vnis[0] - else: - net_vni = re.sub('br', '', net['source']) - net_list.append(net_vni) + net_list.append(net['vni']) return net_list # Handle raw mode since it just lists the names @@ -1348,7 +1342,7 @@ def format_list(config, vm_list, raw): for net_vni in raw_net_list: if net_vni not in valid_net_list: response = call_api(config, 'get', '/network/{net}'.format(net=net_vni)) - if response.status_code != 200 and net_vni not in ['cluster', 'storage', 'upstream']: + if response.status_code != 200 and net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^e.*', net_vni): vm_net_colour = ansiprint.red() else: valid_net_list.append(net_vni) From e4a65230a1b428bf81e8637a72621b75c87f1542 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 15 Jun 2021 02:32:14 -0400 Subject: [PATCH 04/43] Just do the shutdown command itself --- node-daemon/pvcnoded/VMInstance.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index e00f967e..634f72dd 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -528,23 +528,7 @@ class VMInstance(object): def migrate_shutdown(): self.logger.out('Shutting down VM for offline migration', state='i', prefix='Domain {}'.format(self.domuuid)) - self.zkhandler.write([ - (('domain.state', self.domuuid), 'shutdown') - ]) - - ticks = 0 - while self.zkhandler.read(('domain.state', self.domuuid)) != 'stop': - ticks += 1 - if ticks > self.config['vm_shutdown_timeout'] * 2: - # We've hit the timeout, forcibly stop the VM and continue - self.zkhandler.write([ - (('domain.state', self.domuuid), 'stop') - ]) - # Wait 1/2 of a second for the state propagation - time.sleep(0.5) - break - time.sleep(0.5) - + self.shutdown_vm() return True do_migrate_shutdown = False From 164becd3ef9414ba3b52c284c880c77fe5b213e6 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 15 Jun 2021 02:32:34 -0400 Subject: [PATCH 05/43] Fix info and list matching --- client-cli/cli_lib/vm.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/client-cli/cli_lib/vm.py b/client-cli/cli_lib/vm.py index a6c1fe14..e86e5904 100644 --- a/client-cli/cli_lib/vm.py +++ b/client-cli/cli_lib/vm.py @@ -1175,17 +1175,14 @@ def format_info(config, domain_information, long_output): # Network list net_list = [] + cluster_net_list = call_api(config, 'get', '/network').json() for net in domain_information['networks']: - # Split out just the numerical (VNI) part of the brXXXX name - net_vnis = re.findall(r'\d+', net['source']) - if net_vnis: - net_vni = net_vnis[0] - else: - net_vni = re.sub('br', '', net['source']) - - response = call_api(config, 'get', '/network/{net}'.format(net=net_vni)) - if response.status_code != 200 and net_vni not in ['cluster', 'storage', 'upstream']: - net_list.append(ansiprint.red() + net_vni + ansiprint.end() + ' [invalid]') + net_vni = net['vni'] + if net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^e.*', net_vni): + if int(net_vni) not in [net['vni'] for net in cluster_net_list]: + net_list.append(ansiprint.red() + net_vni + ansiprint.end() + ' [invalid]') + else: + net_list.append(net_vni) else: net_list.append(net_vni) @@ -1318,8 +1315,6 @@ def format_list(config, vm_list, raw): ) ) - # Keep track of nets we found to be valid to cut down on duplicate API hits - valid_net_list = [] # Format the string (elements) for domain_information in vm_list: if domain_information['state'] == 'start': @@ -1336,18 +1331,13 @@ def format_list(config, vm_list, raw): vm_state_colour = ansiprint.blue() # Handle colouring for an invalid network config - raw_net_list = getNiceNetID(domain_information) - net_list = [] + net_list = getNiceNetID(domain_information) + cluster_net_list = call_api(config, 'get', '/network').json() vm_net_colour = '' - for net_vni in raw_net_list: - if net_vni not in valid_net_list: - response = call_api(config, 'get', '/network/{net}'.format(net=net_vni)) - if response.status_code != 200 and net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^e.*', net_vni): + for net_vni in net_list: + if net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^e.*', net_vni): + if int(net_vni) not in [net['vni'] for net in cluster_net_list]: vm_net_colour = ansiprint.red() - else: - valid_net_list.append(net_vni) - - net_list.append(net_vni) vm_list_output.append( '{bold}{vm_name: <{vm_name_length}} {vm_uuid: <{vm_uuid_length}} \ From 0ad6d55dff01406e4927ce4867a8b5843bf615dc Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 15 Jun 2021 22:42:59 -0400 Subject: [PATCH 06/43] Add initial SR-IOV support to node daemon Adds configuration values for enabled flag and SR-IOV devices to the configuration and sets up the initial SR-IOV configuration on daemon startup (inserting the module, configuring the VF count, etc.). --- node-daemon/pvcnoded.sample.yaml | 15 +++++++++++ node-daemon/pvcnoded/Daemon.py | 43 ++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/node-daemon/pvcnoded.sample.yaml b/node-daemon/pvcnoded.sample.yaml index 89c17603..b728c84b 100644 --- a/node-daemon/pvcnoded.sample.yaml +++ b/node-daemon/pvcnoded.sample.yaml @@ -157,6 +157,21 @@ pvc: networking: # bridge_device: Underlying device to use for bridged vLAN networks; usually the device underlying bridge_device: ens4 + # sriov_enable: Enable or disable (default if absent) SR-IOV network support + sriov_enable: False + # sriov_device: Underlying device(s) to use for SR-IOV networks; can be bridge_device or other NIC(s) + sriov_device: + # The physical device name + - phy: ens1f1 + # The preferred MTU of the physical device; OPTIONAL - defaults to the interface default if unset + mtu: 9000 + # The number of VFs to enable on this device + # NOTE: This defines the maximum number of VMs which can be provisioned on this physical device; VMs + # are allocated to these VFs manually by the administrator and thus all nodes should have the + # same number + # NOTE: This value cannot be changed at runtime on Intel(R) NICs; the node will need to be restarted + # if this value changes + vfcount: 8 # upstream: Upstream physical interface device upstream: # device: Upstream interface device name diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 4d339ebe..96b061f0 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -223,6 +223,12 @@ def readConfig(pvcnoded_config_file, myhostname): 'upstream_mtu': o_config['pvc']['system']['configuration']['networking']['upstream']['mtu'], 'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['upstream']['address'], } + + # Check if SR-IOV is enabled and activate + config_networking['enable_sriov'] = o_config['pvc']['system']['configuration']['networking'].get('sriov_enable', False) + if config_networking['enable_sriov']: + config_networking['sriov_device'] = list(o_config['pvc']['system']['configuration']['networking']['sriov_device']) + except Exception as e: print('ERROR: Failed to load configuration: {}'.format(e)) exit(1) @@ -289,6 +295,7 @@ if debug: # Handle the enable values enable_hypervisor = config['enable_hypervisor'] enable_networking = config['enable_networking'] +enable_sriov = config['enable_sriov'] enable_storage = config['enable_storage'] ############################################################################### @@ -380,7 +387,39 @@ else: fmt_purple = '' ############################################################################### -# PHASE 2a - Create local IP addresses for static networks +# PHASE 2a - Activate SR-IOV support +############################################################################### + +if enable_networking and enable_sriov: + logger.out('Setting up SR-IOV device support', state='i') + # Enable unsafe interruptts for the vfio_iommu_type1 kernel module + try: + common.run_os_command('modprobe vfio_iommu_type1 allow_unsafe_interrupts=1') + with open('/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts', 'w') as mfh: + mfh.write('Y') + except Exception: + logger.out('Failed to enable kernel modules; SR-IOV may fail.', state='w') + + # Loop through our SR-IOV NICs and enable the numvfs for each + for device in config['sriov_device']: + logger.out('Preparing SR-IOV PF {} with {} VFs'.format(device['phy'], device['vfcount']), state='i') + try: + with open('/sys/class/net/{}/device/sriov_numvfs'.format(device['phy']), 'r') as vfh: + current_sriov_count = vfh.read().strip() + with open('/sys/class/net/{}/device/sriov_numvfs'.format(device['phy']), 'w') as vfh: + vfh.write(str(device['vfcount'])) + except FileNotFoundError: + logger.out('Failed to open SR-IOV configuration for PF {}; device may not support SR-IOV.'.format(device), state='w') + except OSError: + logger.out('Failed to set SR-IOV VF count for PF {} to {}; already set to {}.'.format(device['phy'], device['vfcount'], current_sriov_count), state='w') + + if device.get('mtu', None) is not None: + logger.out('Setting SR-IOV PF {} to MTU {}'.format(device['phy'], device['mtu']), state='i') + common.run_os_command('ip link set {} mtu {} up'.format(device['phy'], device['mtu'])) + + +############################################################################### +# PHASE 2b - Create local IP addresses for static networks ############################################################################### if enable_networking: @@ -444,7 +483,7 @@ if enable_networking: common.run_os_command('ip route add default via {} dev {}'.format(upstream_gateway, 'brupstream')) ############################################################################### -# PHASE 2b - Prepare sysctl for pvcnoded +# PHASE 2c - Prepare sysctl for pvcnoded ############################################################################### if enable_networking: From e7b6a3eac1a1442b6066e831c3b58490a5eb4413 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Thu, 17 Jun 2021 01:01:23 -0400 Subject: [PATCH 07/43] Implement SR-IOV PF and VF instances Adds support for the node daemon managing SR-IOV PF and VF instances. PFs are added to Zookeeper automatically based on the config at startup during network configuration, and are otherwise completely static. PFs are automatically removed from Zookeeper, along with all coresponding VFs, should the PF phy device be removed from the configuration. VFs are configured based on the (autocreated) VFs of each PF device, added to Zookeeper, and then a new class instance, SRIOVVFInstance, is used to watch them for configuration changes. This will enable the runtime management of VF settings by the API. The set of keys ensures that both configuration and details of the NIC can be tracked. Most keys are self-explanatory, especially for PFs and the basic keys for VFs. The configuration tree is also self-explanatory, being based entirely on the options available in the `ip link set {dev} vf` command. Two additional keys are also present: `used` and `used_by`, which will be able to track the (boolean) state of usage, as well as the VM that uses a given VIF. Since the VM side implementation will support both macvtap and direct "hostdev" assignments, this will ensure that this state can be tracked on both the VF and the VM side. --- daemon-common/migrations/versions/1.json | 1 + daemon-common/zkhandler.py | 39 ++++- node-daemon/pvcnoded/Daemon.py | 90 ++++++++++ node-daemon/pvcnoded/SRIOVVFInstance.py | 199 +++++++++++++++++++++++ 4 files changed, 327 insertions(+), 2 deletions(-) create mode 100644 daemon-common/migrations/versions/1.json create mode 100644 node-daemon/pvcnoded/SRIOVVFInstance.py diff --git a/daemon-common/migrations/versions/1.json b/daemon-common/migrations/versions/1.json new file mode 100644 index 00000000..70f5e23f --- /dev/null +++ b/daemon-common/migrations/versions/1.json @@ -0,0 +1 @@ +{"version": "1", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "migrate.sync_lock": "/migrate_sync_lock"}, "network": {"vni": "", "type": "/nettype", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index f201429d..5bcffa6f 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -426,7 +426,7 @@ class ZKHandler(object): # class ZKSchema(object): # Current version - _version = 0 + _version = 1 # Root for doing nested keys _schema_root = '' @@ -483,7 +483,34 @@ class ZKSchema(object): 'memory.provisioned': '/memprov', 'ipmi.hostname': '/ipmihostname', 'ipmi.username': '/ipmiusername', - 'ipmi.password': '/ipmipassword' + 'ipmi.password': '/ipmipassword', + 'sriov': '/sriov', + 'sriov.pf': '/sriov/pf', + 'sriov.vf': '/sriov/vf', + }, + # The schema of an individual SR-IOV PF entry (/nodes/{node_name}/sriov/pf/{pf}) + 'sriov_pf': { + 'phy': '', # The root key + 'mtu': '/mtu', + 'vfcount': '/vfcount' + }, + # The schema of an individual SR-IOV VF entry (/nodes/{node_name}/sriov/vf/{vf}) + 'sriov_vf': { + 'phy': '', # The root key + 'pf': '/pf', + 'mtu': '/mtu', + 'mac': '/mac', + 'config': '/config', + 'config.vlan_id': '/config/vlan_id', + 'config.vlan_qos': '/config/vlan_qos', + 'config.tx_rate_min': '/config/tx_rate_min', + 'config.tx_rate_max': '/config/tx_rate_max', + 'config.spoof_check': '/config/spoof_check', + 'config.link_state': '/config/link_state', + 'config.trust': '/config/trust', + 'config.query_rss': '/config/query_rss', + 'used': '/used', + 'used_by': '/used_by' }, # The schema of an individual domain entry (/domains/{domain_uuid}) 'domain': { @@ -709,6 +736,10 @@ class ZKSchema(object): if not zkhandler.zk_conn.exists(nkipath): result = False + # One might expect child keys under node (specifically, sriov.pf and sriov.vf) to be + # managed here as well, but those are created automatically every time pvcnoded starts + # and thus never need to be validated or applied. + # These two have several children layers that must be parsed through for elem in ['volume']: # First read all the subelements of the key class (pool layer) @@ -782,6 +813,10 @@ class ZKSchema(object): if not zkhandler.zk_conn.exists(nkipath): zkhandler.zk_conn.create(nkipath, ''.encode(zkhandler.encoding)) + # One might expect child keys under node (specifically, sriov.pf and sriov.vf) to be + # managed here as well, but those are created automatically every time pvcnoded starts + # and thus never need to be validated or applied. + # These two have several children layers that must be parsed through for elem in ['volume']: # First read all the subelements of the key class (pool layer) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 96b061f0..fa8562b8 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -49,6 +49,7 @@ import daemon_lib.common as common import pvcnoded.VMInstance as VMInstance import pvcnoded.NodeInstance as NodeInstance import pvcnoded.VXNetworkInstance as VXNetworkInstance +import pvcnoded.SRIOVVFInstance as SRIOVVFInstance import pvcnoded.DNSAggregatorInstance as DNSAggregatorInstance import pvcnoded.CephInstance as CephInstance import pvcnoded.MetadataAPIInstance as MetadataAPIInstance @@ -390,6 +391,7 @@ else: # PHASE 2a - Activate SR-IOV support ############################################################################### +# This happens before other networking steps to enable using VFs for cluster functions. if enable_networking and enable_sriov: logger.out('Setting up SR-IOV device support', state='i') # Enable unsafe interruptts for the vfio_iommu_type1 kernel module @@ -916,12 +918,15 @@ logger.out('Setting up objects', state='i') d_node = dict() d_network = dict() +d_sriov_vf = dict() d_domain = dict() d_osd = dict() d_pool = dict() d_volume = dict() # Dict of Dicts node_list = [] network_list = [] +sriov_pf_list = [] +sriov_vf_list = [] domain_list = [] osd_list = [] pool_list = [] @@ -1076,6 +1081,91 @@ if enable_networking: for node in d_node: d_node[node].update_network_list(d_network) + # Add the SR-IOV PFs and VFs to Zookeeper + # These do not behave like the objects; they are not dynamic (the API cannot change them), and they + # exist for the lifetime of this Node instance. The objects are set here in Zookeeper on a per-node + # basis, under the Node configuration tree. + # MIGRATION: The schema.schema.get ensures that the current active Schema contains the required keys + if enable_sriov and zkhandler.schema.schema.get('sriov_pf', None) is not None: + vf_list = list() + for device in config['sriov_device']: + pf = device['phy'] + vfcount = device['vfcount'] + if device.get('mtu', None) is None: + mtu = 1500 + else: + mtu = device['mtu'] + + # Create the PF device in Zookeeper + zkhandler.write([ + (('node.sriov.pf', myhostname, 'sriov_pf', pf), ''), + (('node.sriov.pf', myhostname, 'sriov_pf.mtu', pf), mtu), + (('node.sriov.pf', myhostname, 'sriov_pf.vfcount', pf), vfcount), + ]) + # Append the device to the list of PFs + sriov_pf_list.append(pf) + + # Get the list of VFs from `ip link show` + vf_list = json.loads(common.run_os_command('ip --json link show {}'.format(pf))[1])[0].get('vfinfo_list', []) + for vf in vf_list: + # { + # 'vf': 3, + # 'link_type': 'ether', + # 'address': '00:00:00:00:00:00', + # 'broadcast': 'ff:ff:ff:ff:ff:ff', + # 'vlan_list': [{'vlan': 101, 'qos': 2}], + # 'rate': {'max_tx': 0, 'min_tx': 0}, + # 'spoofchk': True, + # 'link_state': 'auto', + # 'trust': False, + # 'query_rss_en': False + # } + vfphy = '{}v{}'.format(pf, vf['vf']) + zkhandler.write([ + (('node.sriov.vf', myhostname, 'sriov_vf', vfphy), ''), + (('node.sriov.vf', myhostname, 'sriov_vf.pf', vfphy), pf), + (('node.sriov.vf', myhostname, 'sriov_vf.mtu', vfphy), mtu), + (('node.sriov.vf', myhostname, 'sriov_vf.mac', vfphy), vf['address']), + (('node.sriov.vf', myhostname, 'sriov_vf.config', vfphy), ''), + (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '')), + (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '')), + (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_min', vfphy), vf['rate']['min_tx']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_max', vfphy), vf['rate']['max_tx']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.spoof_check', vfphy), vf['spoofchk']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.link_state', vfphy), vf['link_state']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.trust', vfphy), vf['trust']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']), + ]) + # Append the device to the list of VFs + sriov_vf_list.append(vfphy) + + # Remove any obsolete PFs from Zookeeper if they go away + for pf in zkhandler.children(('node.sriov.pf', myhostname)): + if pf not in sriov_pf_list: + zkhandler.delete([ + ('node.sriov.pf', myhostname, 'sriov_pf', pf) + ]) + # Remove any obsolete VFs from Zookeeper if their PF goes away + for vf in zkhandler.children(('node.sriov.vf', myhostname)): + vf_pf = zkhandler.read(('node.sriov.vf', myhostname, 'sriov_vf.pf', vf)) + if vf_pf not in sriov_pf_list: + zkhandler.delete([ + ('node.sriov.vf', myhostname, 'sriov_vf', vf) + ]) + + # SR-IOV VF objects + # This is a ChildrenWatch just for consistency; the list never changes at runtime + @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('node.sriov.vf', myhostname)) + def update_sriov_pfs(new_sriov_vf_list): + global sriov_vf_list, d_sriov_vf + + # Add VFs to the list + for vf in new_sriov_vf_list: + d_sriov_vf[vf] = SRIOVVFInstance.SRIOVVFInstance(vf, zkhandler, config, logger, this_node) + + sriov_vf_list = new_sriov_vf_list + logger.out('{}SR-IOV VF list:{} {}'.format(fmt_blue, fmt_end, ' '.join(sriov_vf_list)), state='i') + if enable_hypervisor: # VM command pipeline key @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.cmd.domain')) diff --git a/node-daemon/pvcnoded/SRIOVVFInstance.py b/node-daemon/pvcnoded/SRIOVVFInstance.py new file mode 100644 index 00000000..ea20dff8 --- /dev/null +++ b/node-daemon/pvcnoded/SRIOVVFInstance.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 + +# SRIOVVFInstance.py - Class implementing a PVC SR-IOV VF and run by pvcnoded +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018-2021 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import daemon_lib.common as common + + +def boolToOnOff(state): + if state and str(state) == 'True': + return 'on' + else: + return 'off' + + +class SRIOVVFInstance(object): + # Initialization function + def __init__(self, vf, zkhandler, config, logger, this_node): + self.vf = vf + self.zkhandler = zkhandler + self.config = config + self.logger = logger + self.this_node = this_node + self.myhostname = self.this_node.name + + self.pf = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.pf', self.vf)) + self.mtu = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.mtu', self.vf)) + + self.vfid = self.vf.replace('{}v'.format(self.pf), '') + self.mac = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.mac', self.vf)) + + self.vlan_id = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.vlan_id', self.vf)) + self.vlan_qos = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.vlan_qos', self.vf)) + self.tx_rate_min = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.tx_rate_min', self.vf)) + self.tx_rate_max = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.tx_rate_max', self.vf)) + self.spoof_check = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.spoof_check', self.vf)) + self.link_state = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.link_state', self.vf)) + self.trust = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.trust', self.vf)) + self.query_rss = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.query_rss', self.vf)) + + # Zookeeper handlers for changed configs + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.mac', self.vf)) + def watch_vf_mac(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '00:00:00:00:00:00' + + if data != self.mac: + self.mac = data + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_id', self.vf)) + def watch_vf_vlan_id(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.vlan_id: + self.vlan_id = data + common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_qos', self.vf)) + def watch_vf_vlan_qos(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.vlan_qos: + self.vlan_qos = data + common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_min', self.vf)) + def watch_vf_tx_rate_min(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.tx_rate_min: + self.tx_rate_min = data + common.run_os_command('ip link set {} vf {} min_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_min)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_max', self.vf)) + def watch_vf_tx_rate_max(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; termaxate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.tx_rate_max: + self.tx_rate_max = data + common.run_os_command('ip link set {} vf {} max_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_max)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.spoof_check', self.vf)) + def watch_vf_spoof_check(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.spoof_check: + self.spoof_check = data + common.run_os_command('ip link set {} vf {} spoofchk {}'.format(self.pf, self.vfid, boolToOnOff(self.spoof_check))) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.link_state', self.vf)) + def watch_vf_link_state(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = 'on' + + if data != self.link_state: + self.link_state = data + common.run_os_command('ip link set {} vf {} state {}'.format(self.pf, self.vfid, self.link_state)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.trust', self.vf)) + def watch_vf_trust(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = 'off' + + if data != self.trust: + self.trust = data + common.run_os_command('ip link set {} vf {} trust {}'.format(self.pf, self.vfid, boolToOnOff(self.trust))) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.query_rss', self.vf)) + def watch_vf_query_rss(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = 'off' + + if data != self.query_rss: + self.query_rss = data + common.run_os_command('ip link set {} vf {} trust {}'.format(self.pf, self.vfid, boolToOnOff(self.query_rss))) From 8f1af2a6427ba08beb7b614a1467639119effaa1 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Thu, 17 Jun 2021 01:33:11 -0400 Subject: [PATCH 08/43] Ignore hostdev interfaces in VM net stat gathering Prevents errors if a SR-IOV hostdev interface is configured until this is more defined. --- node-daemon/pvcnoded/Daemon.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index fa8562b8..247ea38c 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1655,6 +1655,9 @@ def collect_vm_stats(queue): logger.out("Getting network statistics for VM {}".format(domain_name), state='d', prefix='vm-thread') domain_network_stats = [] for interface in tree.findall('devices/interface'): + interface_type = interface.get('type') + if interface_type in ['hostdev']: + continue interface_name = interface.find('target').get('dev') interface_bridge = interface.find('source').get('bridge') interface_stats = domain.interfaceStats(interface_name) From 5607a6bb62acf21fbe31c74c982a0b144ed92c9e Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Thu, 17 Jun 2021 01:45:22 -0400 Subject: [PATCH 09/43] Avoid overwriting VF data Ensures that the configuration of a VF is not overwritten in Zookeeper on a node restart. The SRIOVVFInstance handlers were modified to start with None values, so that the DataWatch statements will always trigger updates to the live system interfaces on daemon startup, thus ensuring that the config stored in Zookeeper is applied to the system on startup (mostly relevant after a cold boot or if the API changes them during a daemon restart). --- node-daemon/pvcnoded/Daemon.py | 32 +++++++++++++------------ node-daemon/pvcnoded/SRIOVVFInstance.py | 19 ++++++++------- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 247ea38c..53a6decd 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1121,21 +1121,23 @@ if enable_networking: # 'query_rss_en': False # } vfphy = '{}v{}'.format(pf, vf['vf']) - zkhandler.write([ - (('node.sriov.vf', myhostname, 'sriov_vf', vfphy), ''), - (('node.sriov.vf', myhostname, 'sriov_vf.pf', vfphy), pf), - (('node.sriov.vf', myhostname, 'sriov_vf.mtu', vfphy), mtu), - (('node.sriov.vf', myhostname, 'sriov_vf.mac', vfphy), vf['address']), - (('node.sriov.vf', myhostname, 'sriov_vf.config', vfphy), ''), - (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '')), - (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '')), - (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_min', vfphy), vf['rate']['min_tx']), - (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_max', vfphy), vf['rate']['max_tx']), - (('node.sriov.vf', myhostname, 'sriov_vf.config.spoof_check', vfphy), vf['spoofchk']), - (('node.sriov.vf', myhostname, 'sriov_vf.config.link_state', vfphy), vf['link_state']), - (('node.sriov.vf', myhostname, 'sriov_vf.config.trust', vfphy), vf['trust']), - (('node.sriov.vf', myhostname, 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']), - ]) + # Add the VF to Zookeeper if it does not yet exist + if not zkhandler.exists(('node.sriov.vf', myhostname, 'sriov_vf', vfphy)): + zkhandler.write([ + (('node.sriov.vf', myhostname, 'sriov_vf', vfphy), ''), + (('node.sriov.vf', myhostname, 'sriov_vf.pf', vfphy), pf), + (('node.sriov.vf', myhostname, 'sriov_vf.mtu', vfphy), mtu), + (('node.sriov.vf', myhostname, 'sriov_vf.mac', vfphy), vf['address']), + (('node.sriov.vf', myhostname, 'sriov_vf.config', vfphy), ''), + (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '')), + (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '')), + (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_min', vfphy), vf['rate']['min_tx']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_max', vfphy), vf['rate']['max_tx']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.spoof_check', vfphy), vf['spoofchk']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.link_state', vfphy), vf['link_state']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.trust', vfphy), vf['trust']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']), + ]) # Append the device to the list of VFs sriov_vf_list.append(vfphy) diff --git a/node-daemon/pvcnoded/SRIOVVFInstance.py b/node-daemon/pvcnoded/SRIOVVFInstance.py index ea20dff8..f079ab91 100644 --- a/node-daemon/pvcnoded/SRIOVVFInstance.py +++ b/node-daemon/pvcnoded/SRIOVVFInstance.py @@ -43,16 +43,17 @@ class SRIOVVFInstance(object): self.mtu = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.mtu', self.vf)) self.vfid = self.vf.replace('{}v'.format(self.pf), '') - self.mac = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.mac', self.vf)) - self.vlan_id = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.vlan_id', self.vf)) - self.vlan_qos = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.vlan_qos', self.vf)) - self.tx_rate_min = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.tx_rate_min', self.vf)) - self.tx_rate_max = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.tx_rate_max', self.vf)) - self.spoof_check = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.spoof_check', self.vf)) - self.link_state = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.link_state', self.vf)) - self.trust = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.trust', self.vf)) - self.query_rss = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.config.query_rss', self.vf)) + # These properties are set via the DataWatch functions, to ensure they are configured on the system + self.mac = None + self.vlan_id = None + self.vlan_qos = None + self.tx_rate_min = None + self.tx_rate_max = None + self.spoof_check = None + self.link_state = None + self.trust = None + self.query_rss = None # Zookeeper handlers for changed configs @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.mac', self.vf)) From 509afd4d05352ee174426308aee03e9424f29d01 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Thu, 17 Jun 2021 01:52:58 -0400 Subject: [PATCH 10/43] Add hostdev net_type to handler as well --- daemon-common/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon-common/common.py b/daemon-common/common.py index b9c0e38d..f26f1966 100644 --- a/daemon-common/common.py +++ b/daemon-common/common.py @@ -397,7 +397,7 @@ def getDomainNetworks(parsed_xml, stats_data): net_wr_packets = net_stats.get('wr_packets', 0) net_wr_errors = net_stats.get('wr_errors', 0) net_wr_drops = net_stats.get('wr_drops', 0) - if net_type in ['direct']: + if net_type in ['direct', 'hostdev']: net_vni = device.source.attrib.get('dev') else: net_vni = re_match(r'[vm]*br([0-9a-z]+)', net_bridge).group(1) From 57b041dc622ed7c2311e0e89acb3c3142c2a8faf Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Thu, 17 Jun 2021 01:54:37 -0400 Subject: [PATCH 11/43] Ensure default for vLAN and QOS is 0 not empty --- node-daemon/pvcnoded/Daemon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 53a6decd..697a449a 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1129,8 +1129,8 @@ if enable_networking: (('node.sriov.vf', myhostname, 'sriov_vf.mtu', vfphy), mtu), (('node.sriov.vf', myhostname, 'sriov_vf.mac', vfphy), vf['address']), (('node.sriov.vf', myhostname, 'sriov_vf.config', vfphy), ''), - (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '')), - (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '')), + (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '0')), + (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '0')), (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_min', vfphy), vf['rate']['min_tx']), (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_max', vfphy), vf['rate']['max_tx']), (('node.sriov.vf', myhostname, 'sriov_vf.config.spoof_check', vfphy), vf['spoofchk']), From bff6d71e18e13660410c9bcd26462388314ecf0d Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Thu, 17 Jun 2021 02:02:41 -0400 Subject: [PATCH 12/43] Add logging to SRIOVVFInstance and fix bug --- node-daemon/pvcnoded/SRIOVVFInstance.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/node-daemon/pvcnoded/SRIOVVFInstance.py b/node-daemon/pvcnoded/SRIOVVFInstance.py index f079ab91..be9ffbc7 100644 --- a/node-daemon/pvcnoded/SRIOVVFInstance.py +++ b/node-daemon/pvcnoded/SRIOVVFInstance.py @@ -85,6 +85,7 @@ class SRIOVVFInstance(object): if data != self.vlan_id: self.vlan_id = data + self.logger.out('Setting vLAN ID to {}'.format(self.vlan_id), state='i', prefix='SR-IOV VF {}'.format(self.vf)) common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos)) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_qos', self.vf)) @@ -101,6 +102,7 @@ class SRIOVVFInstance(object): if data != self.vlan_qos: self.vlan_qos = data + self.logger.out('Setting vLAN QOS to {}'.format(self.vlan_qos), state='i', prefix='SR-IOV VF {}'.format(self.vf)) common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos)) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_min', self.vf)) @@ -117,6 +119,7 @@ class SRIOVVFInstance(object): if data != self.tx_rate_min: self.tx_rate_min = data + self.logger.out('Setting minimum TX rate to {}'.format(self.tx_rate_min), state='i', prefix='SR-IOV VF {}'.format(self.vf)) common.run_os_command('ip link set {} vf {} min_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_min)) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_max', self.vf)) @@ -133,6 +136,7 @@ class SRIOVVFInstance(object): if data != self.tx_rate_max: self.tx_rate_max = data + self.logger.out('Setting maximum TX rate to {}'.format(self.tx_rate_max), state='i', prefix='SR-IOV VF {}'.format(self.vf)) common.run_os_command('ip link set {} vf {} max_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_max)) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.spoof_check', self.vf)) @@ -149,6 +153,7 @@ class SRIOVVFInstance(object): if data != self.spoof_check: self.spoof_check = data + self.logger.out('Setting spoof checking {}'.format(boolToOnOff(self.spoof_check)), state='i', prefix='SR-IOV VF {}'.format(self.vf)) common.run_os_command('ip link set {} vf {} spoofchk {}'.format(self.pf, self.vfid, boolToOnOff(self.spoof_check))) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.link_state', self.vf)) @@ -165,6 +170,7 @@ class SRIOVVFInstance(object): if data != self.link_state: self.link_state = data + self.logger.out('Setting link state to {}'.format(boolToOnOff(self.link_state)), state='i', prefix='SR-IOV VF {}'.format(self.vf)) common.run_os_command('ip link set {} vf {} state {}'.format(self.pf, self.vfid, self.link_state)) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.trust', self.vf)) @@ -181,6 +187,7 @@ class SRIOVVFInstance(object): if data != self.trust: self.trust = data + self.logger.out('Setting trust mode {}'.format(boolToOnOff(self.trust)), state='i', prefix='SR-IOV VF {}'.format(self.vf)) common.run_os_command('ip link set {} vf {} trust {}'.format(self.pf, self.vfid, boolToOnOff(self.trust))) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.query_rss', self.vf)) @@ -197,4 +204,5 @@ class SRIOVVFInstance(object): if data != self.query_rss: self.query_rss = data - common.run_os_command('ip link set {} vf {} trust {}'.format(self.pf, self.vfid, boolToOnOff(self.query_rss))) + self.logger.out('Setting RSS query ability {}'.format(boolToOnOff(self.query_rss)), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} query_rss {}'.format(self.pf, self.vfid, boolToOnOff(self.query_rss))) From e8bd1bf2c469697acaf969fc64f8045fb01fcd31 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 01:25:38 -0400 Subject: [PATCH 13/43] Ensure used/used_by are set on creation --- node-daemon/pvcnoded/Daemon.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 697a449a..a52cce01 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1137,6 +1137,8 @@ if enable_networking: (('node.sriov.vf', myhostname, 'sriov_vf.config.link_state', vfphy), vf['link_state']), (('node.sriov.vf', myhostname, 'sriov_vf.config.trust', vfphy), vf['trust']), (('node.sriov.vf', myhostname, 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']), + (('node.sriov.vf', myhostname, 'sriov_vf.used', vfphy), False), + (('node.sriov.vf', myhostname, 'sriov_vf.used_by', vfphy), ''), ]) # Append the device to the list of VFs sriov_vf_list.append(vfphy) From ca11dbf491ea7a2eb6a8b6ec3c9b4df2485517d6 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 01:40:05 -0400 Subject: [PATCH 14/43] Sort the list of VFs for easier parsing --- node-daemon/pvcnoded/Daemon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index a52cce01..e9bafcc8 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1164,10 +1164,10 @@ if enable_networking: global sriov_vf_list, d_sriov_vf # Add VFs to the list - for vf in new_sriov_vf_list: + for vf in sorted(new_sriov_vf_list): d_sriov_vf[vf] = SRIOVVFInstance.SRIOVVFInstance(vf, zkhandler, config, logger, this_node) - sriov_vf_list = new_sriov_vf_list + sriov_vf_list = sorted(new_sriov_vf_list) logger.out('{}SR-IOV VF list:{} {}'.format(fmt_blue, fmt_end, ' '.join(sriov_vf_list)), state='i') if enable_hypervisor: From a697c2db2ed4a2f7a16227563a7d0a5ab31b583b Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 01:42:55 -0400 Subject: [PATCH 15/43] Add SRIOV PF and VF listing to API --- api-daemon/pvcapid/flaskapi.py | 128 +++++++++++++++++++++++++++++++++ api-daemon/pvcapid/helper.py | 56 +++++++++++++++ daemon-common/network.py | 111 ++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+) diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index 2ce9c16b..54691368 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -2719,6 +2719,134 @@ class API_Network_ACL_Element(Resource): api.add_resource(API_Network_ACL_Element, '/network//acl/') +########################################################## +# Client API - SR-IOV +########################################################## + +# /sriov +class API_SRIOV_Root(Resource): + @Authenticator + def get(self): + pass + + +api.add_resource(API_SRIOV_Root, '/sriov') + + +# /sriov/pf +class API_SRIOV_PF_Root(Resource): + @RequestParser([ + {'name': 'node', 'required': True, 'helptext': "A valid node must be specified."}, + ]) + @Authenticator + def get(self, reqargs): + """ + Return a list of SR-IOV PFs on a given node + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + type: object + id: sriov_pf + properties: + phy: + type: string + description: The name of the SR-IOV PF device + mtu: + type: string + description: The MTU of the SR-IOV PF device + vfs: + type: list + items: + type: string + description: The PHY name of a VF of this PF + """ + return api_helper.sriov_pf_list(reqargs.get('node')) + + +api.add_resource(API_SRIOV_PF_Root, '/sriov/pf') + + +# /sriov/vf +class API_SRIOV_VF_Root(Resource): + @RequestParser([ + {'name': 'node', 'required': True, 'helptext': "A valid node must be specified."}, + {'name': 'pf', 'required': False, 'helptext': "A PF parent may be specified."}, + ]) + @Authenticator + def get(self, reqargs): + """ + Return a list of SR-IOV VFs on a given node, optionally limited to those in the specified PF + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + type: object + id: sriov_vf + properties: + phy: + type: string + description: The name of the SR-IOV VF device + pf: + type: string + description: The name of the SR-IOV PF parent of this VF device + mtu: + type: integer + description: The current MTU of the VF device + mac: + type: string + description: The current MAC address of the VF device + config: + type: object + id: sriov_vf_config + properties: + vlan_id: + type: string + description: The tagged vLAN ID of the SR-IOV VF device + vlan_qos: + type: string + description: The QOS group of the tagged vLAN + tx_rate_min: + type: string + description: The minimum TX rate of the SR-IOV VF device + tx_rate_max: + type: string + description: The maximum TX rate of the SR-IOV VF device + spoof_check: + type: boolean + description: Whether device spoof checking is enabled or disabled + link_state: + type: string + description: The current SR-IOV VF link state (either enabled, disabled, or auto) + trust: + type: boolean + description: Whether guest device trust is enabled or disabled + query_rss: + type: boolean + description: Whether VF RSS querying is enabled or disabled + usage: + type: object + id: sriov_vf_usage + properties: + used: + type: boolean + description: Whether the SR-IOV VF is currently used by a VM or not + domain: + type: boolean + description: The UUID of the domain the SR-IOV VF is currently used by + """ + return api_helper.sriov_vf_list(reqargs.get('node'), reqargs.get('pf', None)) + + +api.add_resource(API_SRIOV_VF_Root, '/sriov/vf') + + ########################################################## # Client API - Storage ########################################################## diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index 974130c6..bf6f345f 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -978,6 +978,62 @@ def net_acl_remove(zkhandler, network, description): return output, retcode +# +# SR-IOV functions +# +@ZKConnection(config) +def sriov_pf_list(zkhandler, node): + """ + List all PFs on a given node. + """ + retflag, retdata = pvc_network.get_list_sriov_pf(zkhandler, node) + + if retflag: + if retdata: + retcode = 200 + else: + retcode = 404 + retdata = { + 'message': 'PF not found.' + } + else: + retcode = 400 + retdata = { + 'message': retdata + } + + return retdata, retcode + + +@ZKConnection(config) +def sriov_vf_list(zkhandler, node, pf=None): + """ + List all VFs on a given node, optionally limited to PF. + """ + retflag, retdata = pvc_network.get_list_sriov_vf(zkhandler, node, pf) + + if retflag: + retcode = 200 + else: + retcode = 400 + + if retflag: + if retdata: + retcode = 200 + else: + retcode = 404 + retdata = { + 'message': 'VF not found.' + } + else: + retcode = 400 + retdata = { + 'message': retdata + } + + return retdata, retcode + + # # Ceph functions # diff --git a/daemon-common/network.py b/daemon-common/network.py index 67f7bdea..d810d1dd 100644 --- a/daemon-common/network.py +++ b/daemon-common/network.py @@ -629,3 +629,114 @@ def get_list_acl(zkhandler, network, limit, direction, is_fuzzy=True): acl_list.append(acl) return True, acl_list + + +# +# SR-IOV functions +# +# These are separate since they don't work like other network types +# +def getSRIOVPFInformation(zkhandler, node, pf): + mtu = zkhandler.read(('node.sriov.pf', node, 'sriov_pf.mtu', pf)) + + retcode, vf_list = get_list_sriov_vf(zkhandler, node, pf) + if retcode: + vfs = [vf['phy'] for vf in vf_list if vf['pf'] == pf] + else: + vfs = [] + + # Construct a data structure to represent the data + pf_information = { + 'phy': pf, + 'mtu': mtu, + 'vfs': vfs, + } + return pf_information + + +def get_info_sriov_pf(zkhandler, node, pf): + pf_information = getSRIOVPFInformation(zkhandler, node, pf) + if not pf_information: + return False, 'ERROR: Could not get information about SR-IOV PF "{}" on node "{}"'.format(pf, node) + + return True, pf_information + + +def get_list_sriov_pf(zkhandler, node): + pf_list = list() + pf_phy_list = zkhandler.children(('node.sriov.pf', node)) + for phy in pf_phy_list: + retcode, pf_information = get_info_sriov_pf(zkhandler, node, phy) + if retcode: + pf_list.append(pf_information) + + return True, pf_list + + +def getSRIOVVFInformation(zkhandler, node, vf): + pf = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pf', vf)) + mtu = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.mtu', vf)) + mac = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.mac', vf)) + vlan_id = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.vlan_id', vf)) + vlan_qos = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.vlan_qos', vf)) + tx_rate_min = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.tx_rate_min', vf)) + tx_rate_max = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.tx_rate_max', vf)) + spoof_check = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.spoof_check', vf)) + link_state = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.link_state', vf)) + trust = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.trust', vf)) + query_rss = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.query_rss', vf)) + used = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.used', vf)) + used_by_domain = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.used_by', vf)) + + vf_information = { + 'phy': vf, + 'pf': pf, + 'mtu': mtu, + 'mac': mac, + 'config': { + 'vlan_id': vlan_id, + 'vlan_qos': vlan_qos, + 'tx_rate_min': tx_rate_min, + 'tx_rate_max': tx_rate_max, + 'spoof_check': spoof_check, + 'link_state': link_state, + 'trust': trust, + 'query_rss': query_rss, + }, + 'usage': { + 'used': used, + 'domain': used_by_domain, + } + } + return vf_information + + +def get_info_sriov_vf(zkhandler, node, vf): + vf_information = getSRIOVVFInformation(zkhandler, node, vf) + if not vf_information: + return False, 'ERROR: Could not get information about SR-IOV VF "{}" on node "{}"'.format(vf, node) + + return True, vf_information + + +def get_list_sriov_vf(zkhandler, node, pf=None): + vf_list = list() + vf_phy_list = sorted(zkhandler.children(('node.sriov.vf', node))) + for phy in vf_phy_list: + retcode, vf_information = get_info_sriov_vf(zkhandler, node, phy) + if retcode: + if pf is not None: + if vf_information['pf'] == pf: + vf_list.append(vf_information) + else: + vf_list.append(vf_information) + + return True, vf_list + + +def set_sriov_vf_config(zkhandler, node, vf, vlan_id=None, vlan_qos=None, tx_rate_min=None, tx_rate_max=None, spoof_check=None, link_state=None, trust=None, query_rss=None): + pass + + +def set_sriov_vf_vm(zkhandler, node, vf, vm_name, vm_macaddr): + pass From 33195c3c2938b75c6a1e29a9a2d11c15ebec437c Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 17:11:48 -0400 Subject: [PATCH 16/43] Ensure VF list is sorted --- daemon-common/network.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/daemon-common/network.py b/daemon-common/network.py index d810d1dd..d895408d 100644 --- a/daemon-common/network.py +++ b/daemon-common/network.py @@ -641,7 +641,8 @@ def getSRIOVPFInformation(zkhandler, node, pf): retcode, vf_list = get_list_sriov_vf(zkhandler, node, pf) if retcode: - vfs = [vf['phy'] for vf in vf_list if vf['pf'] == pf] + unsorted_vfs = [vf['phy'] for vf in vf_list if vf['pf'] == pf] + vfs = sorted(unsorted_vfs, key=lambda k: int(''.join(re.findall(r'[0-9]', k)))) else: vfs = [] From ae480d6cc1cfdeb2bac866f7c513e35d7cb3f6e3 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 17:12:14 -0400 Subject: [PATCH 17/43] Add SR-IOV listing/info endpoints to API --- api-daemon/pvcapid/flaskapi.py | 80 ++++++++++++++ docs/manuals/swagger.json | 196 ++++++++++++++++++++++++++++++++- 2 files changed, 274 insertions(+), 2 deletions(-) diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index 54691368..dc9cc588 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -2770,6 +2770,27 @@ class API_SRIOV_PF_Root(Resource): api.add_resource(API_SRIOV_PF_Root, '/sriov/pf') +# /sriov/pf/ +class API_SRIOV_PF_Node(Resource): + @Authenticator + def get(self, node): + """ + Return a list of SR-IOV PFs on node {node} + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + $ref: '#/definitions/sriov_pf' + """ + return api_helper.sriov_pf_list(node) + + +api.add_resource(API_SRIOV_PF_Node, '/sriov/pf/') + + # /sriov/vf class API_SRIOV_VF_Root(Resource): @RequestParser([ @@ -2847,6 +2868,65 @@ class API_SRIOV_VF_Root(Resource): api.add_resource(API_SRIOV_VF_Root, '/sriov/vf') +# /sriov/vf/ +class API_SRIOV_VF_Node(Resource): + @RequestParser([ + {'name': 'pf', 'required': False, 'helptext': "A PF parent may be specified."}, + ]) + @Authenticator + def get(self, node, reqargs): + """ + Return a list of SR-IOV VFs on node {node}, optionally limited to those in the specified PF + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + $ref: '#/definitions/sriov_vf' + """ + return api_helper.sriov_vf_list(node, reqargs.get('pf', None)) + + +api.add_resource(API_SRIOV_VF_Node, '/sriov/vf/') + + +# /sriov/vf// +class API_SRIOV_VF_Element(Resource): + @Authenticator + def get(self, node, vf): + """ + Return information about {vf} on {node} + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + $ref: '#/definitions/sriov_vf' + 404: + description: Not found + schema: + type: object + id: Message + """ + vf_list = list() + full_vf_list, _ = api_helper.sriov_vf_list(node) + for vf_element in full_vf_list: + if vf_element['phy'] == vf: + vf_list.append(vf_element) + + if len(vf_list) == 1: + return vf_list, 200 + else: + return {'message': "No VF '{}' found on node '{}'".format(vf, node)}, 404 + + +api.add_resource(API_SRIOV_VF_Element, '/sriov/vf//') + + ########################################################## # Client API - Storage ########################################################## diff --git a/docs/manuals/swagger.json b/docs/manuals/swagger.json index c52cfc92..af4eafaa 100644 --- a/docs/manuals/swagger.json +++ b/docs/manuals/swagger.json @@ -764,6 +764,99 @@ }, "type": "object" }, + "sriov_pf": { + "properties": { + "mtu": { + "description": "The MTU of the SR-IOV PF device", + "type": "string" + }, + "phy": { + "description": "The name of the SR-IOV PF device", + "type": "string" + }, + "vfs": { + "items": { + "description": "The PHY name of a VF of this PF", + "type": "string" + }, + "type": "list" + } + }, + "type": "object" + }, + "sriov_vf": { + "properties": { + "config": { + "id": "sriov_vf_config", + "properties": { + "link_state": { + "description": "The current SR-IOV VF link state (either enabled, disabled, or auto)", + "type": "string" + }, + "query_rss": { + "description": "Whether VF RSS querying is enabled or disabled", + "type": "boolean" + }, + "spoof_check": { + "description": "Whether device spoof checking is enabled or disabled", + "type": "boolean" + }, + "trust": { + "description": "Whether guest device trust is enabled or disabled", + "type": "boolean" + }, + "tx_rate_max": { + "description": "The maximum TX rate of the SR-IOV VF device", + "type": "string" + }, + "tx_rate_min": { + "description": "The minimum TX rate of the SR-IOV VF device", + "type": "string" + }, + "vlan_id": { + "description": "The tagged vLAN ID of the SR-IOV VF device", + "type": "string" + }, + "vlan_qos": { + "description": "The QOS group of the tagged vLAN", + "type": "string" + } + }, + "type": "object" + }, + "mac": { + "description": "The current MAC address of the VF device", + "type": "string" + }, + "mtu": { + "description": "The current MTU of the VF device", + "type": "integer" + }, + "pf": { + "description": "The name of the SR-IOV PF parent of this VF device", + "type": "string" + }, + "phy": { + "description": "The name of the SR-IOV VF device", + "type": "string" + }, + "usage": { + "id": "sriov_vf_usage", + "properties": { + "domain": { + "description": "The UUID of the domain the SR-IOV VF is currently used by", + "type": "boolean" + }, + "used": { + "description": "Whether the SR-IOV VF is currently used by a VM or not", + "type": "boolean" + } + }, + "type": "object" + } + }, + "type": "object" + }, "storage-template": { "properties": { "disks": { @@ -1459,8 +1552,15 @@ }, "/api/v1/initialize": { "post": { - "description": "Note: Normally used only once during cluster bootstrap; checks for the existence of the \"/primary_node\" key before proceeding and returns 400 if found", + "description": "
If the 'overwrite' option is not True, the cluster will return 400 if the `/config/primary_node` key is found. If 'overwrite' is True, the existing cluster
data will be erased and new, empty data written in its place.

All node daemons should be stopped before running this command, and the API daemon started manually to avoid undefined behavior.", "parameters": [ + { + "description": "A flag to enable or disable (default) overwriting existing data", + "in": "query", + "name": "overwrite", + "required": false, + "type": "bool" + }, { "description": "A confirmation string to ensure that the API consumer really means it", "in": "query", @@ -4453,6 +4553,97 @@ ] } }, + "/api/v1/sriov/pf": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_pf" + } + } + }, + "summary": "Return a list of SR-IOV PFs on a given node", + "tags": [ + "network / sriov" + ] + } + }, + "/api/v1/sriov/pf/{node}": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_pf" + } + } + }, + "summary": "Return a list of SR-IOV PFs on node {node}", + "tags": [ + "network / sriov" + ] + } + }, + "/api/v1/sriov/vf": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_vf" + } + } + }, + "summary": "Return a list of SR-IOV VFs on a given node, optionally limited to those in the specified PF", + "tags": [ + "network / sriov" + ] + } + }, + "/api/v1/sriov/vf/{node}": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_vf" + } + } + }, + "summary": "Return a list of SR-IOV VFs on node {node}, optionally limited to those in the specified PF", + "tags": [ + "network / sriov" + ] + } + }, + "/api/v1/sriov/vf/{node}/{vf}": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_vf" + } + }, + "404": { + "description": "Not found", + "schema": { + "$ref": "#/definitions/Message" + } + } + }, + "summary": "Return information about {vf} on {node}", + "tags": [ + "network / sriov" + ] + } + }, "/api/v1/status": { "get": { "description": "", @@ -5721,7 +5912,8 @@ "mem", "vcpus", "load", - "vms" + "vms", + "none (cluster default)" ], "in": "query", "name": "selector", From e13baf8bd36f62b4ba39fe380c166c809ae9a2ba Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 17:12:53 -0400 Subject: [PATCH 18/43] Add initial SR-IOV list/info to CLI --- client-cli/cli_lib/network.py | 331 ++++++++++++++++++++++++++++++++-- client-cli/pvc.py | 124 +++++++++++++ 2 files changed, 438 insertions(+), 17 deletions(-) diff --git a/client-cli/cli_lib/network.py b/client-cli/cli_lib/network.py index fa006e92..2a410872 100644 --- a/client-cli/cli_lib/network.py +++ b/client-cli/cli_lib/network.py @@ -360,7 +360,6 @@ def net_acl_add(config, net, direction, description, rule, order): def net_acl_remove(config, net, description): - """ Remove a network ACL @@ -378,27 +377,88 @@ def net_acl_remove(config, net, description): return retstatus, response.json().get('message', '') +# +# SR-IOV functions +# +def net_sriov_pf_list(config, node): + """ + List all PFs on NODE + + API endpoint: GET /api/v1/sriov/pf/ + API arguments: node=node + API schema: [{json_data_object},{json_data_object},etc.] + """ + response = call_api(config, 'get', '/sriov/pf/{}'.format(node)) + + if response.status_code == 200: + return True, response.json() + else: + return False, response.json().get('message', '') + + +def net_sriov_vf_list(config, node, pf=None): + """ + List all VFs on NODE, optionally limited by PF + + API endpoint: GET /api/v1/sriov/vf/ + API arguments: node=node, pf=pf + API schema: [{json_data_object},{json_data_object},etc.] + """ + params = dict() + params['pf'] = pf + + response = call_api(config, 'get', '/sriov/vf/{}'.format(node), params=params) + + if response.status_code == 200: + return True, response.json() + else: + return False, response.json().get('message', '') + + +def net_sriov_vf_info(config, node, vf): + """ + Get info about VF on NODE + + API endpoint: GET /api/v1/sriov/vf// + API arguments: + API schema: [{json_data_object}] + """ + response = call_api(config, 'get', '/sriov/vf/{}/{}'.format(node, vf)) + + if response.status_code == 200: + if isinstance(response.json(), list) and len(response.json()) != 1: + # No exact match; return not found + return False, "VF not found." + else: + # Return a single instance if the response is a list + if isinstance(response.json(), list): + return True, response.json()[0] + # This shouldn't happen, but is here just in case + else: + return True, response.json() + else: + return False, response.json().get('message', '') + + # # Output display functions # -def getOutputColours(network_information): - if network_information['ip6']['network'] != "None": - v6_flag_colour = ansiprint.green() +def getColour(value): + if value in ['True', "start"]: + return ansiprint.green() + elif value in ["restart", "shutdown"]: + return ansiprint.yellow() + elif value in ["stop", "fail"]: + return ansiprint.red() else: - v6_flag_colour = ansiprint.blue() - if network_information['ip4']['network'] != "None": - v4_flag_colour = ansiprint.green() - else: - v4_flag_colour = ansiprint.blue() + return ansiprint.blue() - if network_information['ip6']['dhcp_flag'] == "True": - dhcp6_flag_colour = ansiprint.green() - else: - dhcp6_flag_colour = ansiprint.blue() - if network_information['ip4']['dhcp_flag'] == "True": - dhcp4_flag_colour = ansiprint.green() - else: - dhcp4_flag_colour = ansiprint.blue() + +def getOutputColours(network_information): + v6_flag_colour = getColour(network_information['ip6']['network']) + v4_flag_colour = getColour(network_information['ip4']['network']) + dhcp6_flag_colour = getColour(network_information['ip6']['dhcp_flag']) + dhcp4_flag_colour = getColour(network_information['ip4']['dhcp_flag']) return v6_flag_colour, v4_flag_colour, dhcp6_flag_colour, dhcp4_flag_colour @@ -700,3 +760,240 @@ def format_list_acl(acl_list): ) return '\n'.join(sorted(acl_list_output)) + + +def format_list_sriov_pf(pf_list): + # The maximum column width of the VFs column + max_vfs_length = 70 + + # Handle when we get an empty entry + if not pf_list: + pf_list = list() + + pf_list_output = [] + + # Determine optimal column widths + pf_phy_length = 6 + pf_mtu_length = 4 + pf_vfs_length = 4 + + for pf_information in pf_list: + # phy column + _pf_phy_length = len(str(pf_information['phy'])) + 1 + if _pf_phy_length > pf_phy_length: + pf_phy_length = _pf_phy_length + # mtu column + _pf_mtu_length = len(str(pf_information['mtu'])) + 1 + if _pf_mtu_length > pf_mtu_length: + pf_mtu_length = _pf_mtu_length + # vfs column + _pf_vfs_length = len(str(', '.join(pf_information['vfs']))) + 1 + if _pf_vfs_length > pf_vfs_length: + pf_vfs_length = _pf_vfs_length + + # We handle columnizing very long lists later + if pf_vfs_length > max_vfs_length: + pf_vfs_length = max_vfs_length + + # Format the string (header) + pf_list_output.append('{bold}\ +{pf_phy: <{pf_phy_length}} \ +{pf_mtu: <{pf_mtu_length}} \ +{pf_vfs: <{pf_vfs_length}} \ +{end_bold}'.format( + bold=ansiprint.bold(), + end_bold=ansiprint.end(), + pf_phy_length=pf_phy_length, + pf_mtu_length=pf_mtu_length, + pf_vfs_length=pf_vfs_length, + pf_phy='Device', + pf_mtu='MTU', + pf_vfs='VFs') + ) + + for pf_information in pf_list: + # Figure out how to nicely columnize our list + nice_vfs_list = [list()] + vfs_lines = 0 + cur_vfs_length = 0 + for vfs in pf_information['vfs']: + vfs_len = len(vfs) + cur_vfs_length += vfs_len + 2 # for the comma and space + if cur_vfs_length > max_vfs_length: + cur_vfs_length = 0 + vfs_lines += 1 + nice_vfs_list.append(list()) + nice_vfs_list[vfs_lines].append(vfs) + + # Append the lines + pf_list_output.append('{bold}\ +{pf_phy: <{pf_phy_length}} \ +{pf_mtu: <{pf_mtu_length}} \ +{pf_vfs: <{pf_vfs_length}} \ +{end_bold}'.format( + bold='', + end_bold='', + pf_phy_length=pf_phy_length, + pf_mtu_length=pf_mtu_length, + pf_vfs_length=pf_vfs_length, + pf_phy=pf_information['phy'], + pf_mtu=pf_information['mtu'], + pf_vfs=', '.join(nice_vfs_list[0])) + ) + + if len(nice_vfs_list) > 1: + for idx in range(1, len(nice_vfs_list)): + print(idx) + pf_list_output.append('{bold}\ +{pf_phy: <{pf_phy_length}} \ +{pf_mtu: <{pf_mtu_length}} \ +{pf_vfs: <{pf_vfs_length}} \ +{end_bold}'.format( + bold='', + end_bold='', + pf_phy_length=pf_phy_length, + pf_mtu_length=pf_mtu_length, + pf_vfs_length=pf_vfs_length, + pf_phy='', + pf_mtu='', + pf_vfs=', '.join(nice_vfs_list[idx])) + ) + + return '\n'.join(pf_list_output) + + +def format_list_sriov_vf(vf_list): + # Handle when we get an empty entry + if not vf_list: + vf_list = list() + + vf_list_output = [] + + # Determine optimal column widths + vf_phy_length = 4 + vf_pf_length = 3 + vf_mtu_length = 4 + vf_mac_length = 11 + vf_used_length = 5 + vf_domain_length = 5 + + for vf_information in vf_list: + # phy column + _vf_phy_length = len(str(vf_information['phy'])) + 1 + if _vf_phy_length > vf_phy_length: + vf_phy_length = _vf_phy_length + # pf column + _vf_pf_length = len(str(vf_information['pf'])) + 1 + if _vf_pf_length > vf_pf_length: + vf_pf_length = _vf_pf_length + # mtu column + _vf_mtu_length = len(str(vf_information['mtu'])) + 1 + if _vf_mtu_length > vf_mtu_length: + vf_mtu_length = _vf_mtu_length + # mac column + _vf_mac_length = len(str(vf_information['mac'])) + 1 + if _vf_mac_length > vf_mac_length: + vf_mac_length = _vf_mac_length + # used column + _vf_used_length = len(str(vf_information['usage']['used'])) + 1 + if _vf_used_length > vf_used_length: + vf_used_length = _vf_used_length + # domain column + _vf_domain_length = len(str(vf_information['usage']['domain'])) + 1 + if _vf_domain_length > vf_domain_length: + vf_domain_length = _vf_domain_length + + # Format the string (header) + vf_list_output.append('{bold}\ +{vf_phy: <{vf_phy_length}} \ +{vf_pf: <{vf_pf_length}} \ +{vf_mtu: <{vf_mtu_length}} \ +{vf_mac: <{vf_mac_length}} \ +{vf_used: <{vf_used_length}} \ +{vf_domain: <{vf_domain_length}} \ +{end_bold}'.format( + bold=ansiprint.bold(), + end_bold=ansiprint.end(), + vf_phy_length=vf_phy_length, + vf_pf_length=vf_pf_length, + vf_mtu_length=vf_mtu_length, + vf_mac_length=vf_mac_length, + vf_used_length=vf_used_length, + vf_domain_length=vf_domain_length, + vf_phy='Device', + vf_pf='PF', + vf_mtu='MTU', + vf_mac='MAC Address', + vf_used='Used', + vf_domain='Domain') + ) + + for vf_information in vf_list: + vf_list_output.append('{bold}\ +{vf_phy: <{vf_phy_length}} \ +{vf_pf: <{vf_pf_length}} \ +{vf_mtu: <{vf_mtu_length}} \ +{vf_mac: <{vf_mac_length}} \ +{vf_used: <{vf_used_length}} \ +{vf_domain: <{vf_domain_length}} \ +{end_bold}'.format( + bold=ansiprint.bold(), + end_bold=ansiprint.end(), + vf_phy_length=vf_phy_length, + vf_pf_length=vf_pf_length, + vf_mtu_length=vf_mtu_length, + vf_mac_length=vf_mac_length, + vf_used_length=vf_used_length, + vf_domain_length=vf_domain_length, + vf_phy=vf_information['phy'], + vf_pf=vf_information['pf'], + vf_mtu=vf_information['mtu'], + vf_mac=vf_information['mac'], + vf_used=vf_information['usage']['used'], + vf_domain=vf_information['usage']['domain']) + ) + + return '\n'.join(vf_list_output) + + +def format_info_sriov_vf(config, vf_information, node): + if not vf_information: + return "No VF found" + + # Get information on the using VM if applicable + if vf_information['usage']['used'] == 'True' and vf_information['usage']['domain']: + vm_information = call_api(config, 'get', '/vm/{vm}'.format(vm=vf_information['usage']['domain'])).json() + if isinstance(vm_information, list) and len(vm_information) > 0: + vm_information = vm_information[0] + else: + vm_information = None + + # Format a nice output: do this line-by-line then concat the elements at the end + ainformation = [] + ainformation.append('{}SR-IOV VF information:{}'.format(ansiprint.bold(), ansiprint.end())) + ainformation.append('') + # Basic information + ainformation.append('{}PHY:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['phy'])) + ainformation.append('{}PF:{} {} @ {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pf'], node)) + ainformation.append('{}MTU:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['mtu'])) + ainformation.append('{}MAC Address:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['mac'])) + ainformation.append('') + # Configuration information + ainformation.append('{}vLAN ID:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['vlan_id'])) + ainformation.append('{}vLAN QOS priority:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['vlan_qos'])) + ainformation.append('{}Minimum TX Rate:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['tx_rate_min'])) + ainformation.append('{}Maximum TX Rate:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['tx_rate_max'])) + ainformation.append('{}Link State:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['link_state'])) + ainformation.append('{}Spoof Checking:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['config']['spoof_check']), vf_information['config']['spoof_check'], ansiprint.end())) + ainformation.append('{}VF User Trust:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['config']['trust']), vf_information['config']['trust'], ansiprint.end())) + ainformation.append('{}Query RSS Config:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['config']['query_rss']), vf_information['config']['query_rss'], ansiprint.end())) + ainformation.append('') + # Usage information + ainformation.append('{}VF Used:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['usage']['used']), vf_information['usage']['used'], ansiprint.end())) + if vf_information['usage']['used'] == 'True' and vm_information is not None: + ainformation.append('{}Using Domain:{} {} ({}) ({}{}{})'.format(ansiprint.purple(), ansiprint.end(), vf_information['usage']['domain'], vm_information['name'], getColour(vm_information['state']), vm_information['state'], ansiprint.end())) + else: + ainformation.append('{}Using Domain:{} N/A'.format(ansiprint.purple(), ansiprint.end())) + + # Join it all together + return '\n'.join(ainformation) diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 3a04b5ac..7495530b 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -2101,6 +2101,120 @@ def net_acl_list(net, limit, direction): cleanup(retcode, retdata) +############################################################################### +# pvc network sriov +############################################################################### +@click.group(name='sriov', short_help='Manage SR-IOV network resources.', context_settings=CONTEXT_SETTINGS) +def net_sriov(): + """ + Manage SR-IOV network resources on nodes (PFs and VFs). + """ + pass + + +############################################################################### +# pvc network sriov pf +############################################################################### +@click.group(name='pf', short_help='Manage PF devices.', context_settings=CONTEXT_SETTINGS) +def net_sriov_pf(): + """ + Manage SR-IOV PF devices on nodes. + """ + pass + + +############################################################################### +# pvc network sriov pf list +############################################################################### +@click.command(name='list', short_help='List PF devices.') +@click.argument( + 'node' +) +@cluster_req +def net_sriov_pf_list(node): + """ + List all SR-IOV PFs on NODE. + """ + retcode, retdata = pvc_network.net_sriov_pf_list(config, node) + if retcode: + retdata = pvc_network.format_list_sriov_pf(retdata) + cleanup(retcode, retdata) + + +############################################################################### +# pvc network sriov vf +############################################################################### +@click.group(name='vf', short_help='Manage VF devices.', context_settings=CONTEXT_SETTINGS) +def net_sriov_vf(): + """ + Manage SR-IOV VF devices on nodes. + """ + pass + + +############################################################################### +# pvc network sriov vf set +############################################################################### +@click.command(name='set', short_help='Set VF device properties.') +@click.argument( + 'node' +) +@click.argument( + 'vf' +) +@cluster_req +def net_sriov_vf_set(node, pf): + """ + Set a property of SR-IOV VF on NODE. + """ + retcode, retdata = pvc_network.net_sriov_vf_list(config, node, pf) + if retcode: + retdata = pvc_network.format_list_sriov_vf(retdata) + cleanup(retcode, retdata) + + +############################################################################### +# pvc network sriov vf list +############################################################################### +@click.command(name='list', short_help='List VF devices.') +@click.argument( + 'node' +) +@click.argument( + 'pf', default=None, required=False +) +@cluster_req +def net_sriov_vf_list(node, pf): + """ + List all SR-IOV VFs on NODE, optionally limited to device PF. + """ + retcode, retdata = pvc_network.net_sriov_vf_list(config, node, pf) + if retcode: + retdata = pvc_network.format_list_sriov_vf(retdata) + cleanup(retcode, retdata) + + +############################################################################### +# pvc network sriov vf info +############################################################################### +@click.command(name='info', short_help='List VF devices.') +@click.argument( + 'node' +) +@click.argument( + 'vf' +) +@cluster_req +def net_sriov_vf_info(node, vf): + """ + Show details of the SR-IOV VF on NODE. + """ + retcode, retdata = pvc_network.net_sriov_vf_info(config, node, vf) + if retcode: + retdata = pvc_network.format_info_sriov_vf(config, retdata, node) + cleanup(retcode, retdata) + + ############################################################################### # pvc storage ############################################################################### @@ -4475,6 +4589,7 @@ cli_network.add_command(net_info) cli_network.add_command(net_list) cli_network.add_command(net_dhcp) cli_network.add_command(net_acl) +cli_network.add_command(net_sriov) net_dhcp.add_command(net_dhcp_list) net_dhcp.add_command(net_dhcp_add) @@ -4484,6 +4599,15 @@ net_acl.add_command(net_acl_add) net_acl.add_command(net_acl_remove) net_acl.add_command(net_acl_list) +net_sriov.add_command(net_sriov_pf) +net_sriov.add_command(net_sriov_vf) + +net_sriov_pf.add_command(net_sriov_pf_list) + +net_sriov_vf.add_command(net_sriov_vf_list) +net_sriov_vf.add_command(net_sriov_vf_info) +net_sriov_vf.add_command(net_sriov_vf_set) + ceph_benchmark.add_command(ceph_benchmark_run) ceph_benchmark.add_command(ceph_benchmark_info) ceph_benchmark.add_command(ceph_benchmark_list) From 13cc0f986f2e16e7bfa5f3226ea2b5d6954bafbc Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 18:40:11 -0400 Subject: [PATCH 19/43] Implement SR-IOV VF config set Also fixes some random bugs, adds proper interface sorting, and assorted tweaks. --- api-daemon/pvcapid/flaskapi.py | 87 +++++++++++++ api-daemon/pvcapid/helper.py | 18 +++ client-cli/cli_lib/network.py | 52 +++++++- client-cli/pvc.py | 44 ++++++- daemon-common/common.py | 23 ++++ daemon-common/network.py | 110 ++++++++++++++-- debian/changelog | 227 +-------------------------------- node-daemon/pvcnoded/Daemon.py | 4 +- 8 files changed, 322 insertions(+), 243 deletions(-) diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index dc9cc588..2bca5f8c 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -2923,6 +2923,93 @@ class API_SRIOV_VF_Element(Resource): else: return {'message': "No VF '{}' found on node '{}'".format(vf, node)}, 404 + @RequestParser([ + {'name': 'vlan_id'}, + {'name': 'vlan_qos'}, + {'name': 'tx_rate_min'}, + {'name': 'tx_rate_max'}, + {'name': 'link_state', 'choices': ('auto', 'enable', 'disable'), 'helptext': "A valid state must be specified"}, + {'name': 'spoof_check'}, + {'name': 'trust'}, + {'name': 'query_rss'}, + ]) + @Authenticator + def put(self, node, vf, reqargs): + """ + Set the configuration of {vf} on {node} + --- + tags: + - network / sriov + parameters: + - in: query + name: vlan_id + type: integer + required: false + description: The vLAN ID for vLAN tagging (0 is disabled) + - in: query + name: vlan_qos + type: integer + required: false + description: The vLAN QOS priority (0 is disabled) + - in: query + name: tx_rate_min + type: integer + required: false + description: The minimum TX rate (0 is disabled) + - in: query + name: tx_rate_max + type: integer + required: false + description: The maximum TX rate (0 is disabled) + - in: query + name: link_state + type: string + required: false + description: The administrative link state + enum: + - auto + - enable + - disable + - in: query + name: spoof_check + type: boolean + required: false + description: Enable or disable spoof checking + - in: query + name: trust + type: boolean + required: false + description: Enable or disable VF user trust + - in: query + name: query_rss + type: boolean + required: false + description: Enable or disable query RSS support + responses: + 200: + description: OK + schema: + type: object + id: Message + 400: + description: Bad request + schema: + type: object + id: Message + """ + return api_helper.update_sriov_vf_config( + node, + vf, + reqargs.get('vlan_id', None), + reqargs.get('vlan_qos', None), + reqargs.get('tx_rate_min', None), + reqargs.get('tx_rate_max', None), + reqargs.get('link_state', None), + reqargs.get('spoof_check', None), + reqargs.get('trust', None), + reqargs.get('query_rss', None), + ) + api.add_resource(API_SRIOV_VF_Element, '/sriov/vf//') diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index bf6f345f..81ce3093 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -1034,6 +1034,24 @@ def sriov_vf_list(zkhandler, node, pf=None): return retdata, retcode +@ZKConnection(config) +def update_sriov_vf_config(zkhandler, node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss): + """ + Update configuration of a VF on NODE. + """ + retflag, retdata = pvc_network.set_sriov_vf_config(zkhandler, node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss) + + if retflag: + retcode = 200 + else: + retcode = 400 + + output = { + 'message': retdata.replace('\"', '\'') + } + return output, retcode + + # # Ceph functions # diff --git a/client-cli/cli_lib/network.py b/client-cli/cli_lib/network.py index 2a410872..4ae9cdd6 100644 --- a/client-cli/cli_lib/network.py +++ b/client-cli/cli_lib/network.py @@ -385,7 +385,7 @@ def net_sriov_pf_list(config, node): List all PFs on NODE API endpoint: GET /api/v1/sriov/pf/ - API arguments: node=node + API arguments: node={node} API schema: [{json_data_object},{json_data_object},etc.] """ response = call_api(config, 'get', '/sriov/pf/{}'.format(node)) @@ -396,12 +396,59 @@ def net_sriov_pf_list(config, node): return False, response.json().get('message', '') +def net_sriov_vf_set(config, node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss): + """ + Mdoify configuration of a SR-IOV VF + + API endpoint: PUT /api/v1/sriov/vf// + API arguments: vlan_id={vlan_id}, vlan_qos={vlan_qos}, tx_rate_min={tx_rate_min}, tx_rate_max={tx_rate_max}, + link_state={link_state}, spoof_check={spoof_check}, trust={trust}, query_rss={query_rss} + API schema: {"message": "{data}"} + """ + params = dict() + + # Update any params that we've sent + if vlan_id is not None: + params['vlan_id'] = vlan_id + + if vlan_qos is not None: + params['vlan_qos'] = vlan_qos + + if tx_rate_min is not None: + params['tx_rate_min'] = tx_rate_min + + if tx_rate_max is not None: + params['tx_rate_max'] = tx_rate_max + + if link_state is not None: + params['link_state'] = link_state + + if spoof_check is not None: + params['spoof_check'] = spoof_check + + if trust is not None: + params['trust'] = trust + + if query_rss is not None: + params['query_rss'] = query_rss + + # Write the new configuration to the API + response = call_api(config, 'put', '/sriov/vf/{node}/{vf}'.format(node=node, vf=vf), params=params) + + if response.status_code == 200: + retstatus = True + else: + retstatus = False + + return retstatus, response.json().get('message', '') + + def net_sriov_vf_list(config, node, pf=None): """ List all VFs on NODE, optionally limited by PF API endpoint: GET /api/v1/sriov/vf/ - API arguments: node=node, pf=pf + API arguments: node={node}, pf={pf} API schema: [{json_data_object},{json_data_object},etc.] """ params = dict() @@ -843,7 +890,6 @@ def format_list_sriov_pf(pf_list): if len(nice_vfs_list) > 1: for idx in range(1, len(nice_vfs_list)): - print(idx) pf_list_output.append('{bold}\ {pf_phy: <{pf_phy_length}} \ {pf_mtu: <{pf_mtu_length}} \ diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 7495530b..cfc57c51 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -2156,6 +2156,39 @@ def net_sriov_vf(): # pvc network sriov vf set ############################################################################### @click.command(name='set', short_help='Set VF device properties.') +@click.option( + '--vlan-id', 'vlan_id', default=None, show_default=False, + help='The vLAN ID for vLAN tagging.' +) +@click.option( + '--qos-prio', 'vlan_qos', default=None, show_default=False, + help='The vLAN QOS priority.' +) +@click.option( + '--tx-min', 'tx_rate_min', default=None, show_default=False, + help='The minimum TX rate.' +) +@click.option( + '--tx-max', 'tx_rate_max', default=None, show_default=False, + help='The maximum TX rate.' +) +@click.option( + '--link-state', 'link_state', default=None, show_default=False, + type=click.Choice(['auto', 'enable', 'disable']), + help='The administrative link state.' +) +@click.option( + '--spoof-check/--no-spoof-check', 'spoof_check', is_flag=True, default=None, show_default=False, + help='Enable or disable spoof checking.' +) +@click.option( + '--trust/--no-trust', 'trust', is_flag=True, default=None, show_default=False, + help='Enable or disable VF user trust.' +) +@click.option( + '--query-rss/--no-query-rss', 'query_rss', is_flag=True, default=None, show_default=False, + help='Enable or disable query RSS support.' +) @click.argument( 'node' ) @@ -2163,14 +2196,15 @@ def net_sriov_vf(): 'vf' ) @cluster_req -def net_sriov_vf_set(node, pf): +def net_sriov_vf_set(node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss): """ Set a property of SR-IOV VF on NODE. """ - retcode, retdata = pvc_network.net_sriov_vf_list(config, node, pf) - if retcode: - retdata = pvc_network.format_list_sriov_vf(retdata) - cleanup(retcode, retdata) + if vlan_id is None and vlan_qos is None and tx_rate_min is None and tx_rate_max is None and link_state is None and spoof_check is None and trust is None and query_rss is None: + cleanup(False, 'At least one configuration property must be specified to update.') + + retcode, retmsg = pvc_network.net_sriov_vf_set(config, node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss) + cleanup(retcode, retmsg) ############################################################################### diff --git a/daemon-common/common.py b/daemon-common/common.py index f26f1966..6271cc85 100644 --- a/daemon-common/common.py +++ b/daemon-common/common.py @@ -26,6 +26,7 @@ import subprocess import signal from json import loads from re import match as re_match +from re import split as re_split from distutils.util import strtobool from threading import Thread from shlex import split as shlex_split @@ -685,3 +686,25 @@ def removeIPAddress(ipaddr, cidrnetmask, dev): dev ) ) + + +# +# Sort a set of interface names (e.g. ens1f1v10) +# +def sortInterfaceNames(interface_names): + # We can't handle non-list inputs + if not isinstance(interface_names, list): + return interface_names + + def atoi(text): + return int(text) if text.isdigit() else text + + def natural_keys(text): + """ + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + """ + return [atoi(c) for c in re_split(r'(\d+)', text)] + + return sorted(interface_names, key=natural_keys) diff --git a/daemon-common/network.py b/daemon-common/network.py index d895408d..aedadb6a 100644 --- a/daemon-common/network.py +++ b/daemon-common/network.py @@ -21,6 +21,8 @@ import re +import daemon_lib.common as common + # # Cluster search functions @@ -641,8 +643,7 @@ def getSRIOVPFInformation(zkhandler, node, pf): retcode, vf_list = get_list_sriov_vf(zkhandler, node, pf) if retcode: - unsorted_vfs = [vf['phy'] for vf in vf_list if vf['pf'] == pf] - vfs = sorted(unsorted_vfs, key=lambda k: int(''.join(re.findall(r'[0-9]', k)))) + vfs = common.sortInterfaceNames([vf['phy'] for vf in vf_list if vf['pf'] == pf]) else: vfs = [] @@ -675,6 +676,9 @@ def get_list_sriov_pf(zkhandler, node): def getSRIOVVFInformation(zkhandler, node, vf): + if not zkhandler.exists(('node.sriov.vf', node, 'sriov_vf', vf)): + return [] + pf = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pf', vf)) mtu = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.mtu', vf)) mac = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.mac', vf)) @@ -682,8 +686,8 @@ def getSRIOVVFInformation(zkhandler, node, vf): vlan_qos = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.vlan_qos', vf)) tx_rate_min = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.tx_rate_min', vf)) tx_rate_max = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.tx_rate_max', vf)) - spoof_check = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.spoof_check', vf)) link_state = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.link_state', vf)) + spoof_check = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.spoof_check', vf)) trust = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.trust', vf)) query_rss = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.query_rss', vf)) used = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.used', vf)) @@ -699,8 +703,8 @@ def getSRIOVVFInformation(zkhandler, node, vf): 'vlan_qos': vlan_qos, 'tx_rate_min': tx_rate_min, 'tx_rate_max': tx_rate_max, - 'spoof_check': spoof_check, 'link_state': link_state, + 'spoof_check': spoof_check, 'trust': trust, 'query_rss': query_rss, }, @@ -713,16 +717,26 @@ def getSRIOVVFInformation(zkhandler, node, vf): def get_info_sriov_vf(zkhandler, node, vf): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False, 'ERROR: Specified node "{}" is invalid.'.format(node) + vf_information = getSRIOVVFInformation(zkhandler, node, vf) if not vf_information: - return False, 'ERROR: Could not get information about SR-IOV VF "{}" on node "{}"'.format(vf, node) + return False, 'ERROR: Could not find SR-IOV VF "{}" on node "{}"'.format(vf, node) return True, vf_information def get_list_sriov_vf(zkhandler, node, pf=None): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False, 'ERROR: Specified node "{}" is invalid.'.format(node) + vf_list = list() - vf_phy_list = sorted(zkhandler.children(('node.sriov.vf', node))) + vf_phy_list = common.sortInterfaceNames(zkhandler.children(('node.sriov.vf', node))) for phy in vf_phy_list: retcode, vf_information = get_info_sriov_vf(zkhandler, node, phy) if retcode: @@ -735,9 +749,87 @@ def get_list_sriov_vf(zkhandler, node, pf=None): return True, vf_list -def set_sriov_vf_config(zkhandler, node, vf, vlan_id=None, vlan_qos=None, tx_rate_min=None, tx_rate_max=None, spoof_check=None, link_state=None, trust=None, query_rss=None): - pass +def set_sriov_vf_config(zkhandler, node, vf, vlan_id=None, vlan_qos=None, tx_rate_min=None, tx_rate_max=None, link_state=None, spoof_check=None, trust=None, query_rss=None): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False, 'ERROR: Specified node "{}" is invalid.'.format(node) + + # Verify VF is valid + vf_information = getSRIOVVFInformation(zkhandler, node, vf) + if not vf_information: + return False, 'ERROR: Could not find SR-IOV VF "{}" on node "{}".'.format(vf, node) + + update_list = list() + + if vlan_id is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.vlan_id', vf), vlan_id)) + + if vlan_qos is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.vlan_qos', vf), vlan_qos)) + + if tx_rate_min is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.tx_rate_min', vf), tx_rate_min)) + + if tx_rate_max is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.tx_rate_max', vf), tx_rate_max)) + + if link_state is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.link_state', vf), link_state)) + + if spoof_check is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.spoof_check', vf), spoof_check)) + + if trust is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.trust', vf), trust)) + + if query_rss is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.query_rss', vf), query_rss)) + + if len(update_list) < 1: + return False, 'ERROR: No changes to apply.' + + result = zkhandler.write(update_list) + if result: + return True, 'Successfully modified configuration of SR-IOV VF "{}" on node "{}".'.format(vf, node) + else: + return False, 'Failed to modify configuration of SR-IOV VF "{}" on node "{}".'.format(vf, node) def set_sriov_vf_vm(zkhandler, node, vf, vm_name, vm_macaddr): - pass + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False + + # Verify VF is valid + vf_information = getSRIOVVFInformation(zkhandler, node, vf) + if not vf_information: + return False + + zkhandler.write([ + (('node.sriov.vf', node, 'sriov_vf.used', vf), 'True'), + (('node.sriov.vf', node, 'sriov_vf.used_by', vf), vm_name), + (('node.sriov.vf', node, 'sriov_vf.mac', vf), vm_macaddr), + ]) + + return True + + +def unset_sriov_vf_vm(zkhandler, node, vf): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False + + # Verify VF is valid + vf_information = getSRIOVVFInformation(zkhandler, node, vf) + if not vf_information: + return False + + zkhandler.write([ + (('node.sriov.vf', node, 'sriov_vf.used', vf), 'False'), + (('node.sriov.vf', node, 'sriov_vf.used_by', vf), ''), + ]) + + return True diff --git a/debian/changelog b/debian/changelog index 47d24aca..f50f9601 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,226 +1,5 @@ -pvc (0.9.20-0) unstable; urgency=high +pvc (0.9.20~git~git-e13baf8) unstable; urgency=medium - * [Daemons] Implemented a Zookeeper schema handler and version 0 schema - * [Daemons] Completes major refactoring of codebase to make use of the schema handler - * [Daemons] Adds support for dynamic chema changges and "hot reloading" of pvcnoded processes - * [Daemons] Adds a functional testing script for verifying operation against a test cluster - * [Daemons, CLI] Fixes several minor bugs found by the above script - * [Daemons, CLI] Add support for Debian 11 "Bullseye" + * Unstable revision for commit e13baf8 - -- Joshua M. Boniface Mon, 14 Jun 2021 18:06:27 -0400 - -pvc (0.9.19-0) unstable; urgency=high - - * [CLI] Corrects some flawed conditionals - * [API] Disables SQLAlchemy modification tracking functionality (not used by us) - * [Daemons] Implements new zkhandler module for improved reliability and reusability - * [Daemons] Refactors some code to use new zkhandler module - * [API, CLI] Adds support for "none" migration selector (uses cluster default instead) - * [Daemons] Moves some configuration keys to new /config tree - * [Node Daemon] Increases initial lock timeout for VM migrations to avoid out-of-sync potential - * [Provisioner] Support storing and using textual cluster network labels ("upstream", "storage", "cluster") in templates - * [API] Avoid duplicating existing node states - - -- Joshua M. Boniface Sun, 06 Jun 2021 01:47:41 -0400 - -pvc (0.9.18-0) unstable; urgency=high - - * Adds VM rename functionality to API and CLI client - - -- Joshua M. Boniface Sun, 23 May 2021 17:23:10 -0400 - -pvc (0.9.17-0) unstable; urgency=high - - * [CLI] Fixes bugs in log follow output - - -- Joshua M. Boniface Wed, 19 May 2021 17:06:29 -0400 - -pvc (0.9.16-0) unstable; urgency=high - - * Improves some CLI help messages - * Skips empty local cluster in CLI - * Adjusts how confirmations happen during VM modify restarts - * Fixes bug around corrupted VM log files - * Fixes bug around subprocess pipe exceptions - - -- Joshua M. Boniface Mon, 10 May 2021 01:13:21 -0400 - -pvc (0.9.15-0) unstable; urgency=high - - * [CLI] Adds additional verification (--yes) to several VM management commands - * [CLI] Adds a method to override --yes/confirmation requirements via envvar (PVC_UNSAFE) - * [CLI] Adds description fields to PVC clusters in CLI - - -- Joshua M. Boniface Thu, 08 Apr 2021 13:37:47 -0400 - -pvc (0.9.14-0) unstable; urgency=high - - * Fixes bugs around cloned volume provisioning - * Fixes some minor visual bugs - * Minor license update (from GPL3+ to GPL3) - * Adds qemu-guest-agent support to provisioner-created VMs by default - - -- Joshua M. Boniface Tue, 30 Mar 2021 10:27:37 -0400 - -pvc (0.9.13-0) unstable; urgency=high - - * Adds nicer startup messages for daemons - * Adds additional API field for stored_bytes to pool stats - * Fixes sorting issues with snapshot lists - * Fixes missing increment/decrement of snapshot_count on volumes - * Fixes bad calls in pool element API endpoints - * Fixes inconsistent bytes_tohuman behaviour in daemons - * Adds validation and maximum volume size on creation (must be smaller than the pool free space) - - -- Joshua M. Boniface Wed, 17 Feb 2021 11:33:28 -0500 - -pvc (0.9.12-0) unstable; urgency=high - - * Fixes a bug in the pvcnoded service unit file causing a Zookeeper startup race condition - - -- Joshua M. Boniface Thu, 28 Jan 2021 16:29:58 -0500 - -pvc (0.9.11-0) unstable; urgency=high - - * Documentation updates - * Adds VNC information to VM info - * Goes back to external Ceph commands for disk usage - - -- Joshua M. Boniface Tue, 05 Jan 2021 15:58:26 -0500 - -pvc (0.9.10-0) unstable; urgency=high - - * Moves OSD stats uploading to primary, eliminating reporting failures while hosts are down - * Documentation updates - * Significantly improves RBD locking behaviour in several situations, eliminating cold-cluster start issues and failed VM boot-ups after crashes - * Fixes some timeout delays with fencing - * Fixes bug in validating YAML provisioner userdata - - -- Joshua M. Boniface Tue, 15 Dec 2020 10:45:15 -0500 - -pvc (0.9.9-0) unstable; urgency=high - - * Adds documentation updates - * Removes single-element list stripping and fixes surrounding bugs - * Adds additional fields to some API endpoints for ease of parsing by clients - * Fixes bugs with network configuration - - -- Joshua M. Boniface Wed, 09 Dec 2020 02:20:20 -0500 - -pvc (0.9.8-0) unstable; urgency=high - - * Adds support for cluster backup/restore - * Moves location of `init` command in CLI to make room for the above - * Cleans up some invalid help messages from the API - - -- Joshua M. Boniface Tue, 24 Nov 2020 12:26:57 -0500 - -pvc (0.9.7-0) unstable; urgency=high - - * Fixes bug with provisioner system template modifications - - -- Joshua M. Boniface Thu, 19 Nov 2020 10:48:28 -0500 - -pvc (0.9.6-0) unstable; urgency=high - - * Fixes bug with migrations - - -- Joshua M. Boniface Tue, 17 Nov 2020 13:01:54 -0500 - -pvc (0.9.5-0) unstable; urgency=high - - * Fixes bug with line count in log follow - * Fixes bug with disk stat output being None - * Adds short pretty health output - * Documentation updates - - -- Joshua M. Boniface Tue, 17 Nov 2020 12:34:04 -0500 - -pvc (0.9.4-0) unstable; urgency=high - - * Fixes major bug in OVA parser - - -- Joshua M. Boniface Tue, 10 Nov 2020 15:33:50 -0500 - -pvc (0.9.3-0) unstable; urgency=high - - * Fixes bugs with image & OVA upload parsing - - -- Joshua M. Boniface Mon, 09 Nov 2020 10:28:15 -0500 - -pvc (0.9.2-0) unstable; urgency=high - - * Major linting of the codebase with flake8; adds linting tools - * Implements CLI-based modification of VM vCPUs, memory, networks, and disks without directly editing XML - * Fixes bug where `pvc vm log -f` would show all 1000 lines before starting - * Fixes bug in default provisioner libvirt schema (`drive` -> `driver` typo) - - -- Joshua M. Boniface Sun, 08 Nov 2020 02:03:29 -0500 - -pvc (0.9.1-0) unstable; urgency=high - - * Added per-VM migration method feature - * Fixed bug with provisioner system template listing - - -- Joshua Boniface Thu, 29 Oct 2020 12:15:28 -0400 - -pvc (0.9.0-0) unstable; urgency=high - - * Numerous bugfixes and improvements - - -- Joshua Boniface Sun, 18 Oct 2020 14:31:00 -0400 - -pvc (0.8-1) unstable; urgency=high - - * Fix bug with IPv6 being enabled on bridged interfaces - - -- Joshua Boniface Thu, 15 Oct 2020 11:02:24 -0400 - -pvc (0.8-0) unstable; urgency=medium - - * Numerous bugfixes and improvements - - -- Joshua Boniface Tue, 11 Aug 2020 12:12:07 -0400 - -pvc (0.7-0) unstable; urgency=medium - - * Numerous bugfixes and improvements - - -- Joshua Boniface Sat, 15 Feb 2020 23:24:17 -0500 - -pvc (0.6-0) unstable; urgency=medium - - * Numerous improvements, implementation of provisioner and API client - - -- Joshua Boniface Sat, 08 Feb 2020 18:26:58 -0500 - -pvc (0.5-0) unstable; urgency=medium - - * First public release - - -- Joshua Boniface Thu, 08 Aug 2019 20:55:51 -0400 - -pvc (0.4-0) unstable; urgency=medium - - * Unification of all daemons into node daemon - * Numerous client tweaks - - -- Joshua Boniface Sat, 13 Oct 2018 10:40:14 -0400 - -pvc (0.3-1) unstable; urgency=medium - - * Code and package reorganization pending additional daemons - - -- Joshua Boniface Wed, 12 Sep 2018 12:08:28 -0400 - -pvc (0.2-1) unstable; urgency=medium - - * Minor version bump with stability fixes - - -- Joshua Boniface Wed, 18 Jul 2018 02:18:25 -0400 - -pvc (0.1-1) unstable; urgency=medium - - * Initial packaging release - - -- Joshua Boniface Sun, 17 Jun 2018 02:40:39 -0400 + -- Joshua Boniface Mon, 21 Jun 2021 18:22:08 -0400 diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index e9bafcc8..14a45791 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1160,11 +1160,11 @@ if enable_networking: # SR-IOV VF objects # This is a ChildrenWatch just for consistency; the list never changes at runtime @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('node.sriov.vf', myhostname)) - def update_sriov_pfs(new_sriov_vf_list): + def update_sriov_vfs(new_sriov_vf_list): global sriov_vf_list, d_sriov_vf # Add VFs to the list - for vf in sorted(new_sriov_vf_list): + for vf in common.sortInterfaceNames(new_sriov_vf_list): d_sriov_vf[vf] = SRIOVVFInstance.SRIOVVFInstance(vf, zkhandler, config, logger, this_node) sriov_vf_list = sorted(new_sriov_vf_list) From 64d1a37b3cc3be62a94d310776ee978d3f0fe1fd Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 20:49:45 -0400 Subject: [PATCH 20/43] Add PCIe device paths to SR-IOV VF information This will be used when adding VM network interfaces of type hostdev. --- daemon-common/migrations/versions/1.json | 2 +- daemon-common/network.py | 10 ++++++++ daemon-common/zkhandler.py | 5 ++++ node-daemon/pvcnoded/Daemon.py | 30 +++++++++++++++++++++++- 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/daemon-common/migrations/versions/1.json b/daemon-common/migrations/versions/1.json index 70f5e23f..5ccc1bf0 100644 --- a/daemon-common/migrations/versions/1.json +++ b/daemon-common/migrations/versions/1.json @@ -1 +1 @@ -{"version": "1", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "migrate.sync_lock": "/migrate_sync_lock"}, "network": {"vni": "", "type": "/nettype", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file +{"version": "1", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "migrate.sync_lock": "/migrate_sync_lock"}, "network": {"vni": "", "type": "/nettype", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/network.py b/daemon-common/network.py index aedadb6a..7a4e48d9 100644 --- a/daemon-common/network.py +++ b/daemon-common/network.py @@ -690,6 +690,10 @@ def getSRIOVVFInformation(zkhandler, node, vf): spoof_check = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.spoof_check', vf)) trust = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.trust', vf)) query_rss = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.query_rss', vf)) + pci_domain = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pci.domain', vf)) + pci_bus = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pci.bus', vf)) + pci_slot = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pci.slot', vf)) + pci_function = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pci.function', vf)) used = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.used', vf)) used_by_domain = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.used_by', vf)) @@ -708,6 +712,12 @@ def getSRIOVVFInformation(zkhandler, node, vf): 'trust': trust, 'query_rss': query_rss, }, + 'pci': { + 'domain': pci_domain, + 'bus': pci_bus, + 'slot': pci_slot, + 'function': pci_function, + }, 'usage': { 'used': used, 'domain': used_by_domain, diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index 5bcffa6f..5ecb1cbf 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -509,6 +509,11 @@ class ZKSchema(object): 'config.link_state': '/config/link_state', 'config.trust': '/config/trust', 'config.query_rss': '/config/query_rss', + 'pci': '/pci', + 'pci.domain': '/pci/domain', + 'pci.bus': '/pci/bus', + 'pci.slot': '/pci/slot', + 'pci.function': '/pci/function', 'used': '/used', 'used_by': '/used_by' }, diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 14a45791..57c6ab24 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1121,8 +1121,30 @@ if enable_networking: # 'query_rss_en': False # } vfphy = '{}v{}'.format(pf, vf['vf']) + + # Get the PCIe bus information + dev_pcie_path = None + try: + with open('/sys/class/net/{}/device/uevent'.format(vfphy)) as vfh: + dev_uevent = vfh.readlines() + for line in dev_uevent: + if re.match(r'^PCI_SLOT_NAME=.*', line): + dev_pcie_path = line.split('=')[-1] + except FileNotFoundError: + # Something must already be using the PCIe device + pass + # Add the VF to Zookeeper if it does not yet exist if not zkhandler.exists(('node.sriov.vf', myhostname, 'sriov_vf', vfphy)): + if dev_pcie_path is not None: + pcie_domain, pcie_bus, pcie_slot, pcie_function = re.split(r':|\.', dev_pcie_path) + else: + # We can't add the device - for some reason we can't get any information on its PCIe bus path, + # so just ignore this one, and continue. + # This shouldn't happen under any real circumstances, unless the admin tries to attach a non-existent + # VF to a VM manually, then goes ahead and adds that VF to the system with the VM running. + continue + zkhandler.write([ (('node.sriov.vf', myhostname, 'sriov_vf', vfphy), ''), (('node.sriov.vf', myhostname, 'sriov_vf.pf', vfphy), pf), @@ -1137,9 +1159,15 @@ if enable_networking: (('node.sriov.vf', myhostname, 'sriov_vf.config.link_state', vfphy), vf['link_state']), (('node.sriov.vf', myhostname, 'sriov_vf.config.trust', vfphy), vf['trust']), (('node.sriov.vf', myhostname, 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']), + (('node.sriov.vf', myhostname, 'sriov_vf.pci', vfphy), ''), + (('node.sriov.vf', myhostname, 'sriov_vf.pci.domain', vfphy), pcie_domain), + (('node.sriov.vf', myhostname, 'sriov_vf.pci.bus', vfphy), pcie_bus), + (('node.sriov.vf', myhostname, 'sriov_vf.pci.slot', vfphy), pcie_slot), + (('node.sriov.vf', myhostname, 'sriov_vf.pci.function', vfphy), pcie_function), (('node.sriov.vf', myhostname, 'sriov_vf.used', vfphy), False), (('node.sriov.vf', myhostname, 'sriov_vf.used_by', vfphy), ''), ]) + # Append the device to the list of VFs sriov_vf_list.append(vfphy) @@ -1660,7 +1688,7 @@ def collect_vm_stats(queue): domain_network_stats = [] for interface in tree.findall('devices/interface'): interface_type = interface.get('type') - if interface_type in ['hostdev']: + if interface_type not in ['bridge']: continue interface_name = interface.find('target').get('dev') interface_bridge = interface.find('source').get('bridge') From 904337b67770cf5d59d8f540cf9b17d1f4534dda Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 21:19:06 -0400 Subject: [PATCH 21/43] Fix busted changelog from previous commit --- debian/changelog | 227 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 224 insertions(+), 3 deletions(-) diff --git a/debian/changelog b/debian/changelog index f50f9601..47d24aca 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,226 @@ -pvc (0.9.20~git~git-e13baf8) unstable; urgency=medium +pvc (0.9.20-0) unstable; urgency=high - * Unstable revision for commit e13baf8 + * [Daemons] Implemented a Zookeeper schema handler and version 0 schema + * [Daemons] Completes major refactoring of codebase to make use of the schema handler + * [Daemons] Adds support for dynamic chema changges and "hot reloading" of pvcnoded processes + * [Daemons] Adds a functional testing script for verifying operation against a test cluster + * [Daemons, CLI] Fixes several minor bugs found by the above script + * [Daemons, CLI] Add support for Debian 11 "Bullseye" - -- Joshua Boniface Mon, 21 Jun 2021 18:22:08 -0400 + -- Joshua M. Boniface Mon, 14 Jun 2021 18:06:27 -0400 + +pvc (0.9.19-0) unstable; urgency=high + + * [CLI] Corrects some flawed conditionals + * [API] Disables SQLAlchemy modification tracking functionality (not used by us) + * [Daemons] Implements new zkhandler module for improved reliability and reusability + * [Daemons] Refactors some code to use new zkhandler module + * [API, CLI] Adds support for "none" migration selector (uses cluster default instead) + * [Daemons] Moves some configuration keys to new /config tree + * [Node Daemon] Increases initial lock timeout for VM migrations to avoid out-of-sync potential + * [Provisioner] Support storing and using textual cluster network labels ("upstream", "storage", "cluster") in templates + * [API] Avoid duplicating existing node states + + -- Joshua M. Boniface Sun, 06 Jun 2021 01:47:41 -0400 + +pvc (0.9.18-0) unstable; urgency=high + + * Adds VM rename functionality to API and CLI client + + -- Joshua M. Boniface Sun, 23 May 2021 17:23:10 -0400 + +pvc (0.9.17-0) unstable; urgency=high + + * [CLI] Fixes bugs in log follow output + + -- Joshua M. Boniface Wed, 19 May 2021 17:06:29 -0400 + +pvc (0.9.16-0) unstable; urgency=high + + * Improves some CLI help messages + * Skips empty local cluster in CLI + * Adjusts how confirmations happen during VM modify restarts + * Fixes bug around corrupted VM log files + * Fixes bug around subprocess pipe exceptions + + -- Joshua M. Boniface Mon, 10 May 2021 01:13:21 -0400 + +pvc (0.9.15-0) unstable; urgency=high + + * [CLI] Adds additional verification (--yes) to several VM management commands + * [CLI] Adds a method to override --yes/confirmation requirements via envvar (PVC_UNSAFE) + * [CLI] Adds description fields to PVC clusters in CLI + + -- Joshua M. Boniface Thu, 08 Apr 2021 13:37:47 -0400 + +pvc (0.9.14-0) unstable; urgency=high + + * Fixes bugs around cloned volume provisioning + * Fixes some minor visual bugs + * Minor license update (from GPL3+ to GPL3) + * Adds qemu-guest-agent support to provisioner-created VMs by default + + -- Joshua M. Boniface Tue, 30 Mar 2021 10:27:37 -0400 + +pvc (0.9.13-0) unstable; urgency=high + + * Adds nicer startup messages for daemons + * Adds additional API field for stored_bytes to pool stats + * Fixes sorting issues with snapshot lists + * Fixes missing increment/decrement of snapshot_count on volumes + * Fixes bad calls in pool element API endpoints + * Fixes inconsistent bytes_tohuman behaviour in daemons + * Adds validation and maximum volume size on creation (must be smaller than the pool free space) + + -- Joshua M. Boniface Wed, 17 Feb 2021 11:33:28 -0500 + +pvc (0.9.12-0) unstable; urgency=high + + * Fixes a bug in the pvcnoded service unit file causing a Zookeeper startup race condition + + -- Joshua M. Boniface Thu, 28 Jan 2021 16:29:58 -0500 + +pvc (0.9.11-0) unstable; urgency=high + + * Documentation updates + * Adds VNC information to VM info + * Goes back to external Ceph commands for disk usage + + -- Joshua M. Boniface Tue, 05 Jan 2021 15:58:26 -0500 + +pvc (0.9.10-0) unstable; urgency=high + + * Moves OSD stats uploading to primary, eliminating reporting failures while hosts are down + * Documentation updates + * Significantly improves RBD locking behaviour in several situations, eliminating cold-cluster start issues and failed VM boot-ups after crashes + * Fixes some timeout delays with fencing + * Fixes bug in validating YAML provisioner userdata + + -- Joshua M. Boniface Tue, 15 Dec 2020 10:45:15 -0500 + +pvc (0.9.9-0) unstable; urgency=high + + * Adds documentation updates + * Removes single-element list stripping and fixes surrounding bugs + * Adds additional fields to some API endpoints for ease of parsing by clients + * Fixes bugs with network configuration + + -- Joshua M. Boniface Wed, 09 Dec 2020 02:20:20 -0500 + +pvc (0.9.8-0) unstable; urgency=high + + * Adds support for cluster backup/restore + * Moves location of `init` command in CLI to make room for the above + * Cleans up some invalid help messages from the API + + -- Joshua M. Boniface Tue, 24 Nov 2020 12:26:57 -0500 + +pvc (0.9.7-0) unstable; urgency=high + + * Fixes bug with provisioner system template modifications + + -- Joshua M. Boniface Thu, 19 Nov 2020 10:48:28 -0500 + +pvc (0.9.6-0) unstable; urgency=high + + * Fixes bug with migrations + + -- Joshua M. Boniface Tue, 17 Nov 2020 13:01:54 -0500 + +pvc (0.9.5-0) unstable; urgency=high + + * Fixes bug with line count in log follow + * Fixes bug with disk stat output being None + * Adds short pretty health output + * Documentation updates + + -- Joshua M. Boniface Tue, 17 Nov 2020 12:34:04 -0500 + +pvc (0.9.4-0) unstable; urgency=high + + * Fixes major bug in OVA parser + + -- Joshua M. Boniface Tue, 10 Nov 2020 15:33:50 -0500 + +pvc (0.9.3-0) unstable; urgency=high + + * Fixes bugs with image & OVA upload parsing + + -- Joshua M. Boniface Mon, 09 Nov 2020 10:28:15 -0500 + +pvc (0.9.2-0) unstable; urgency=high + + * Major linting of the codebase with flake8; adds linting tools + * Implements CLI-based modification of VM vCPUs, memory, networks, and disks without directly editing XML + * Fixes bug where `pvc vm log -f` would show all 1000 lines before starting + * Fixes bug in default provisioner libvirt schema (`drive` -> `driver` typo) + + -- Joshua M. Boniface Sun, 08 Nov 2020 02:03:29 -0500 + +pvc (0.9.1-0) unstable; urgency=high + + * Added per-VM migration method feature + * Fixed bug with provisioner system template listing + + -- Joshua Boniface Thu, 29 Oct 2020 12:15:28 -0400 + +pvc (0.9.0-0) unstable; urgency=high + + * Numerous bugfixes and improvements + + -- Joshua Boniface Sun, 18 Oct 2020 14:31:00 -0400 + +pvc (0.8-1) unstable; urgency=high + + * Fix bug with IPv6 being enabled on bridged interfaces + + -- Joshua Boniface Thu, 15 Oct 2020 11:02:24 -0400 + +pvc (0.8-0) unstable; urgency=medium + + * Numerous bugfixes and improvements + + -- Joshua Boniface Tue, 11 Aug 2020 12:12:07 -0400 + +pvc (0.7-0) unstable; urgency=medium + + * Numerous bugfixes and improvements + + -- Joshua Boniface Sat, 15 Feb 2020 23:24:17 -0500 + +pvc (0.6-0) unstable; urgency=medium + + * Numerous improvements, implementation of provisioner and API client + + -- Joshua Boniface Sat, 08 Feb 2020 18:26:58 -0500 + +pvc (0.5-0) unstable; urgency=medium + + * First public release + + -- Joshua Boniface Thu, 08 Aug 2019 20:55:51 -0400 + +pvc (0.4-0) unstable; urgency=medium + + * Unification of all daemons into node daemon + * Numerous client tweaks + + -- Joshua Boniface Sat, 13 Oct 2018 10:40:14 -0400 + +pvc (0.3-1) unstable; urgency=medium + + * Code and package reorganization pending additional daemons + + -- Joshua Boniface Wed, 12 Sep 2018 12:08:28 -0400 + +pvc (0.2-1) unstable; urgency=medium + + * Minor version bump with stability fixes + + -- Joshua Boniface Wed, 18 Jul 2018 02:18:25 -0400 + +pvc (0.1-1) unstable; urgency=medium + + * Initial packaging release + + -- Joshua Boniface Sun, 17 Jun 2018 02:40:39 -0400 From 93c2fdec9388a7b3c591efb0ed48718ec5034a90 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 21:59:57 -0400 Subject: [PATCH 22/43] Swap order of networks and disks in provisioner Done to make the resulting config match the expectations when using "vm network add", which is that networks are below disks, not above. Not a functional change, just ensures the VM XML is consistent after many changes. --- api-daemon/pvcapid/provisioner.py | 48 +++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/api-daemon/pvcapid/provisioner.py b/api-daemon/pvcapid/provisioner.py index 00dc781d..88a0f69e 100755 --- a/api-daemon/pvcapid/provisioner.py +++ b/api-daemon/pvcapid/provisioner.py @@ -1323,6 +1323,30 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r vm_architecture=system_architecture ) + # Add disk devices + monitor_list = list() + coordinator_names = config['storage_hosts'] + for coordinator in coordinator_names: + monitor_list.append("{}.{}".format(coordinator, config['storage_domain'])) + + ceph_storage_secret = config['ceph_storage_secret_uuid'] + + for volume in vm_data['volumes']: + vm_schema += libvirt_schema.devices_disk_header.format( + ceph_storage_secret=ceph_storage_secret, + disk_pool=volume['pool'], + vm_name=vm_name, + disk_id=volume['disk_id'] + ) + for monitor in monitor_list: + vm_schema += libvirt_schema.devices_disk_coordinator.format( + coordinator_name=monitor, + coordinator_ceph_mon_port=config['ceph_monitor_port'] + ) + vm_schema += libvirt_schema.devices_disk_footer + + vm_schema += libvirt_schema.devices_vhostmd + # Add network devices network_id = 0 for network in vm_data['networks']: @@ -1364,30 +1388,6 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r network_id += 1 - # Add disk devices - monitor_list = list() - coordinator_names = config['storage_hosts'] - for coordinator in coordinator_names: - monitor_list.append("{}.{}".format(coordinator, config['storage_domain'])) - - ceph_storage_secret = config['ceph_storage_secret_uuid'] - - for volume in vm_data['volumes']: - vm_schema += libvirt_schema.devices_disk_header.format( - ceph_storage_secret=ceph_storage_secret, - disk_pool=volume['pool'], - vm_name=vm_name, - disk_id=volume['disk_id'] - ) - for monitor in monitor_list: - vm_schema += libvirt_schema.devices_disk_coordinator.format( - coordinator_name=monitor, - coordinator_ceph_mon_port=config['ceph_monitor_port'] - ) - vm_schema += libvirt_schema.devices_disk_footer - - vm_schema += libvirt_schema.devices_vhostmd - # Add default devices vm_schema += libvirt_schema.devices_default From eeb83da97dac314cc50fe93a2a84d25f4e0e3066 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 22:21:54 -0400 Subject: [PATCH 23/43] Add support for SR-IOV NICs to VMs --- client-cli/cli_lib/network.py | 6 ++ client-cli/cli_lib/vm.py | 171 ++++++++++++++++++++++++--------- client-cli/pvc.py | 44 +++++++-- daemon-common/common.py | 15 ++- daemon-common/network.py | 24 +++-- daemon-common/vm.py | 54 +++++++++-- node-daemon/pvcnoded/Daemon.py | 2 +- 7 files changed, 245 insertions(+), 71 deletions(-) diff --git a/client-cli/cli_lib/network.py b/client-cli/cli_lib/network.py index 4ae9cdd6..52fd704b 100644 --- a/client-cli/cli_lib/network.py +++ b/client-cli/cli_lib/network.py @@ -1034,6 +1034,12 @@ def format_info_sriov_vf(config, vf_information, node): ainformation.append('{}VF User Trust:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['config']['trust']), vf_information['config']['trust'], ansiprint.end())) ainformation.append('{}Query RSS Config:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['config']['query_rss']), vf_information['config']['query_rss'], ansiprint.end())) ainformation.append('') + # PCIe bus information + ainformation.append('{}PCIe domain:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pci']['domain'])) + ainformation.append('{}PCIe bus:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pci']['bus'])) + ainformation.append('{}PCIe slot:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pci']['slot'])) + ainformation.append('{}PCIe function:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pci']['function'])) + ainformation.append('') # Usage information ainformation.append('{}VF Used:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['usage']['used']), vf_information['usage']['used'], ansiprint.end())) if vf_information['usage']['used'] == 'True' and vm_information is not None: diff --git a/client-cli/cli_lib/vm.py b/client-cli/cli_lib/vm.py index e86e5904..d2bb4484 100644 --- a/client-cli/cli_lib/vm.py +++ b/client-cli/cli_lib/vm.py @@ -501,7 +501,7 @@ def format_vm_memory(config, name, memory): return '\n'.join(output_list) -def vm_networks_add(config, vm, network, macaddr, model, restart): +def vm_networks_add(config, vm, network, macaddr, model, sriov, sriov_mode, restart): """ Add a new network to the VM @@ -514,17 +514,19 @@ def vm_networks_add(config, vm, network, macaddr, model, restart): from random import randint import cli_lib.network as pvc_network - # Verify that the provided network is valid - retcode, retdata = pvc_network.net_info(config, network) - if not retcode: - # Ignore the three special networks - if network not in ['upstream', 'cluster', 'storage']: - return False, "Network {} is not present in the cluster.".format(network) + # Verify that the provided network is valid (not in SR-IOV mode) + if not sriov: + retcode, retdata = pvc_network.net_info(config, network) + if not retcode: + # Ignore the three special networks + if network not in ['upstream', 'cluster', 'storage']: + return False, "Network {} is not present in the cluster.".format(network) - if network in ['upstream', 'cluster', 'storage']: - br_prefix = 'br' - else: - br_prefix = 'vmbr' + # Set the bridge prefix + if network in ['upstream', 'cluster', 'storage']: + br_prefix = 'br' + else: + br_prefix = 'vmbr' status, domain_information = vm_info(config, vm) if not status: @@ -551,24 +553,73 @@ def vm_networks_add(config, vm, network, macaddr, model, restart): octetC=random_octet_C ) - device_string = ''.format( - macaddr=macaddr, - bridge="{}{}".format(br_prefix, network), - model=model - ) + # Add an SR-IOV network + if sriov: + valid, sriov_vf_information = pvc_network.net_sriov_vf_info(config, domain_information['node'], network) + if not valid: + return False, 'Specified SR-IOV VF "{}" does not exist on VM node "{}".'.format(network, domain_information['node']) + + # Add a hostdev (direct PCIe) SR-IOV network + if sriov_mode == 'hostdev': + bus_address = 'domain="0x{pci_domain}" bus="0x{pci_bus}" slot="0x{pci_slot}" function="0x{pci_function}"'.format( + pci_domain=sriov_vf_information['pci']['domain'], + pci_bus=sriov_vf_information['pci']['bus'], + pci_slot=sriov_vf_information['pci']['slot'], + pci_function=sriov_vf_information['pci']['function'], + ) + device_string = '
{network}'.format( + macaddr=macaddr, + bus_address=bus_address, + network=network + ) + # Add a macvtap SR-IOV network + elif sriov_mode == 'macvtap': + device_string = ''.format( + macaddr=macaddr, + network=network + ) + else: + return False, "ERROR: Invalid SR-IOV mode specified." + # Add a normal bridged PVC network + else: + device_string = ''.format( + macaddr=macaddr, + bridge="{}{}".format(br_prefix, network), + model=model + ) + device_xml = fromstring(device_string) - last_interface = None all_interfaces = parsed_xml.devices.find('interface') if all_interfaces is None: all_interfaces = [] for interface in all_interfaces: - last_interface = re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1) - if last_interface == network: - return False, 'Network {} is already configured for VM {}.'.format(network, vm) - if last_interface is not None: - for interface in parsed_xml.devices.find('interface'): - if last_interface == re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1): + if sriov: + if sriov_mode == 'hostdev': + if interface.attrib.get('type') == 'hostdev': + interface_address = 'domain="{pci_domain}" bus="{pci_bus}" slot="{pci_slot}" function="{pci_function}"'.format( + interface.source.address.attrib.get('domain'), + interface.source.address.attrib.get('bus'), + interface.source.address.attrib.get('slot'), + interface.source.address.attrib.get('function') + ) + if interface_address == bus_address: + return False, 'Network "{}" is already configured for VM "{}".'.format(network, vm) + elif sriov_mode == 'macvtap': + if interface.attrib.get('type') == 'direct': + interface_dev = interface.source.attrib.get('dev') + if interface_dev == network: + return False, 'Network "{}" is already configured for VM "{}".'.format(network, vm) + else: + if interface.attrib.get('type') == 'bridge': + interface_vni = re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1) + if interface_vni == network: + return False, 'Network "{}" is already configured for VM "{}".'.format(network, vm) + + # Add the interface at the end of the list (or, right above emulator) + if len(all_interfaces) > 0: + for idx, interface in enumerate(parsed_xml.devices.find('interface')): + if idx == len(all_interfaces) - 1: interface.addnext(device_xml) else: parsed_xml.devices.find('emulator').addprevious(device_xml) @@ -581,7 +632,7 @@ def vm_networks_add(config, vm, network, macaddr, model, restart): return vm_modify(config, vm, new_xml, restart) -def vm_networks_remove(config, vm, network, restart): +def vm_networks_remove(config, vm, network, sriov, restart): """ Remove a network to the VM @@ -605,17 +656,33 @@ def vm_networks_remove(config, vm, network, restart): except Exception: return False, 'ERROR: Failed to parse XML data.' + changed = False for interface in parsed_xml.devices.find('interface'): - if_vni = re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1) - if network == if_vni: - interface.getparent().remove(interface) + if sriov: + if interface.attrib.get('type') == 'hostdev': + if_dev = str(interface.sriov_device) + if network == if_dev: + interface.getparent().remove(interface) + changed = True + elif interface.attrib.get('type') == 'direct': + if_dev = str(interface.source.attrib.get('dev')) + if network == if_dev: + interface.getparent().remove(interface) + changed = True + else: + if_vni = re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1) + if network == if_vni: + interface.getparent().remove(interface) + changed = True + if changed: + try: + new_xml = tostring(parsed_xml, pretty_print=True) + except Exception: + return False, 'ERROR: Failed to dump XML data.' - try: - new_xml = tostring(parsed_xml, pretty_print=True) - except Exception: - return False, 'ERROR: Failed to dump XML data.' - - return vm_modify(config, vm, new_xml, restart) + return vm_modify(config, vm, new_xml, restart) + else: + return False, 'ERROR: Network "{}" does not exist on VM.'.format(network) def vm_networks_get(config, vm): @@ -1178,7 +1245,7 @@ def format_info(config, domain_information, long_output): cluster_net_list = call_api(config, 'get', '/network').json() for net in domain_information['networks']: net_vni = net['vni'] - if net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^e.*', net_vni): + if net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^macvtap:.*', net_vni) and not re.match(r'^hostdev:.*', net_vni): if int(net_vni) not in [net['vni'] for net in cluster_net_list]: net_list.append(ansiprint.red() + net_vni + ansiprint.end() + ' [invalid]') else: @@ -1210,17 +1277,31 @@ def format_info(config, domain_information, long_output): width=name_length )) ainformation.append('') - ainformation.append('{}Interfaces:{} {}ID Type Source Model MAC Data (r/w) Packets (r/w) Errors (r/w){}'.format(ansiprint.purple(), ansiprint.end(), ansiprint.bold(), ansiprint.end())) + ainformation.append('{}Interfaces:{} {}ID Type Source Model MAC Data (r/w) Packets (r/w) Errors (r/w){}'.format(ansiprint.purple(), ansiprint.end(), ansiprint.bold(), ansiprint.end())) for net in domain_information['networks']: - ainformation.append(' {0: <3} {1: <7} {2: <10} {3: <8} {4: <18} {5: <12} {6: <15} {7: <12}'.format( + net_type = net['type'] + net_source = net['source'] + net_mac = net['mac'] + if net_type in ['direct', 'hostdev']: + net_model = 'N/A' + net_bytes = 'N/A' + net_packets = 'N/A' + net_errors = 'N/A' + elif net_type in ['bridge']: + net_model = net['model'] + net_bytes = '/'.join([str(format_bytes(net.get('rd_bytes', 0))), str(format_bytes(net.get('wr_bytes', 0)))]) + net_packets = '/'.join([str(format_metric(net.get('rd_packets', 0))), str(format_metric(net.get('wr_packets', 0)))]) + net_errors = '/'.join([str(format_metric(net.get('rd_errors', 0))), str(format_metric(net.get('wr_errors', 0)))]) + + ainformation.append(' {0: <3} {1: <8} {2: <12} {3: <8} {4: <18} {5: <12} {6: <15} {7: <12}'.format( domain_information['networks'].index(net), - net['type'], - net['source'], - net['model'], - net['mac'], - '/'.join([str(format_bytes(net.get('rd_bytes', 0))), str(format_bytes(net.get('wr_bytes', 0)))]), - '/'.join([str(format_metric(net.get('rd_packets', 0))), str(format_metric(net.get('wr_packets', 0)))]), - '/'.join([str(format_metric(net.get('rd_errors', 0))), str(format_metric(net.get('wr_errors', 0)))]), + net_type, + net_source, + net_model, + net_mac, + net_bytes, + net_packets, + net_errors )) # Controller list ainformation.append('') @@ -1259,7 +1340,7 @@ def format_list(config, vm_list, raw): vm_nets_length = 9 vm_ram_length = 8 vm_vcpu_length = 6 - vm_node_length = 8 + vm_node_length = 5 vm_migrated_length = 10 for domain_information in vm_list: net_list = getNiceNetID(domain_information) @@ -1335,7 +1416,7 @@ def format_list(config, vm_list, raw): cluster_net_list = call_api(config, 'get', '/network').json() vm_net_colour = '' for net_vni in net_list: - if net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^e.*', net_vni): + if net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^macvtap:.*', net_vni) and not re.match(r'^hostdev:.*', net_vni): if int(net_vni) not in [net['vni'] for net in cluster_net_list]: vm_net_colour = ansiprint.red() diff --git a/client-cli/pvc.py b/client-cli/pvc.py index cfc57c51..92ac27b8 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -1309,15 +1309,24 @@ def vm_network_get(domain, raw): 'domain' ) @click.argument( - 'vni' + 'net' ) @click.option( '-a', '--macaddr', 'macaddr', default=None, - help='Use this MAC address instead of random generation; must be a valid MAC address in colon-deliniated format.' + help='Use this MAC address instead of random generation; must be a valid MAC address in colon-delimited format.' ) @click.option( '-m', '--model', 'model', default='virtio', - help='The model for the interface; must be a valid libvirt model.' + help='The model for the interface; must be a valid libvirt model. Not used for SR-IOV NETs.' +) +@click.option( + '-s', '--sriov', 'sriov', is_flag=True, default=False, + help='Identify that NET is an SR-IOV device name and not a VNI. Required for adding SR-IOV NETs.' +) +@click.option( + '-d', '--sriov-mode', 'sriov_mode', default='hostdev', + type=click.Choice(['hostdev', 'macvtap']), + help='For SR-IOV NETs, the SR-IOV network device mode.' ) @click.option( '-r', '--restart', 'restart', is_flag=True, default=False, @@ -1329,9 +1338,18 @@ def vm_network_get(domain, raw): help='Confirm the restart' ) @cluster_req -def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag): +def vm_network_add(domain, net, macaddr, model, sriov, sriov_mode, restart, confirm_flag): """ - Add the network VNI to the virtual machine DOMAIN. Networks are always addded to the end of the current list of networks in the virtual machine. + Add the network NET to the virtual machine DOMAIN. Networks are always addded to the end of the current list of networks in the virtual machine. + + NET may be a PVC network VNI, which is added as a bridged device, or a SR-IOV VF device connected in the given mode. + + NOTE: Adding a SR-IOV network device in the "hostdev" mode has the following caveats: + + 1. The VM will not be able to be live migrated; it must be shut down to migrate between nodes. The VM metadata will be updated to force this. + + 2. If an identical SR-IOV VF device is not present on the target node, post-migration startup will fail. It may be prudent to use a node limit here. + """ if restart and not confirm_flag and not config['unsafe']: try: @@ -1339,7 +1357,7 @@ def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag): except Exception: restart = False - retcode, retmsg = pvc_vm.vm_networks_add(config, domain, vni, macaddr, model, restart) + retcode, retmsg = pvc_vm.vm_networks_add(config, domain, net, macaddr, model, sriov, sriov_mode, restart) if retcode and not restart: retmsg = retmsg + " Changes will be applied on next VM start/restart." cleanup(retcode, retmsg) @@ -1353,7 +1371,11 @@ def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag): 'domain' ) @click.argument( - 'vni' + 'net' +) +@click.option( + '-s', '--sriov', 'sriov', is_flag=True, default=False, + help='Identify that NET is an SR-IOV device name and not a VNI. Required for removing SR-IOV NETs.' ) @click.option( '-r', '--restart', 'restart', is_flag=True, default=False, @@ -1365,9 +1387,11 @@ def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag): help='Confirm the restart' ) @cluster_req -def vm_network_remove(domain, vni, restart, confirm_flag): +def vm_network_remove(domain, net, sriov, restart, confirm_flag): """ - Remove the network VNI to the virtual machine DOMAIN. + Remove the network NET from the virtual machine DOMAIN. + + NET may be a PVC network VNI, which is added as a bridged device, or a SR-IOV VF device connected in the given mode. """ if restart and not confirm_flag and not config['unsafe']: try: @@ -1375,7 +1399,7 @@ def vm_network_remove(domain, vni, restart, confirm_flag): except Exception: restart = False - retcode, retmsg = pvc_vm.vm_networks_remove(config, domain, vni, restart) + retcode, retmsg = pvc_vm.vm_networks_remove(config, domain, net, sriov, restart) if retcode and not restart: retmsg = retmsg + " Changes will be applied on next VM start/restart." cleanup(retcode, retmsg) diff --git a/daemon-common/common.py b/daemon-common/common.py index 6271cc85..493cd139 100644 --- a/daemon-common/common.py +++ b/daemon-common/common.py @@ -373,23 +373,28 @@ def getDomainNetworks(parsed_xml, stats_data): net_type = device.attrib.get('type') except Exception: net_type = None + try: net_mac = device.mac.attrib.get('address') except Exception: net_mac = None + try: net_bridge = device.source.attrib.get(net_type) except Exception: net_bridge = None + try: net_model = device.model.attrib.get('type') except Exception: net_model = None + try: net_stats_list = [x for x in stats_data.get('net_stats', []) if x.get('bridge') == net_bridge] net_stats = net_stats_list[0] except Exception: net_stats = {} + net_rd_bytes = net_stats.get('rd_bytes', 0) net_rd_packets = net_stats.get('rd_packets', 0) net_rd_errors = net_stats.get('rd_errors', 0) @@ -398,10 +403,16 @@ def getDomainNetworks(parsed_xml, stats_data): net_wr_packets = net_stats.get('wr_packets', 0) net_wr_errors = net_stats.get('wr_errors', 0) net_wr_drops = net_stats.get('wr_drops', 0) - if net_type in ['direct', 'hostdev']: - net_vni = device.source.attrib.get('dev') + + if net_type == 'direct': + net_vni = 'macvtap:' + device.source.attrib.get('dev') + net_bridge = device.source.attrib.get('dev') + elif net_type == 'hostdev': + net_vni = 'hostdev:' + str(device.sriov_device) + net_bridge = str(device.sriov_device) else: net_vni = re_match(r'[vm]*br([0-9a-z]+)', net_bridge).group(1) + net_obj = { 'type': net_type, 'vni': net_vni, diff --git a/daemon-common/network.py b/daemon-common/network.py index 7a4e48d9..c3504712 100644 --- a/daemon-common/network.py +++ b/daemon-common/network.py @@ -806,7 +806,7 @@ def set_sriov_vf_config(zkhandler, node, vf, vlan_id=None, vlan_qos=None, tx_rat return False, 'Failed to modify configuration of SR-IOV VF "{}" on node "{}".'.format(vf, node) -def set_sriov_vf_vm(zkhandler, node, vf, vm_name, vm_macaddr): +def set_sriov_vf_vm(zkhandler, vm_uuid, node, vf, vf_macaddr, vf_type): # Verify node is valid valid_node = common.verifyNode(zkhandler, node) if not valid_node: @@ -817,11 +817,19 @@ def set_sriov_vf_vm(zkhandler, node, vf, vm_name, vm_macaddr): if not vf_information: return False - zkhandler.write([ + update_list = [ (('node.sriov.vf', node, 'sriov_vf.used', vf), 'True'), - (('node.sriov.vf', node, 'sriov_vf.used_by', vf), vm_name), - (('node.sriov.vf', node, 'sriov_vf.mac', vf), vm_macaddr), - ]) + (('node.sriov.vf', node, 'sriov_vf.used_by', vf), vm_uuid), + (('node.sriov.vf', node, 'sriov_vf.mac', vf), vf_macaddr), + ] + + # Hostdev type SR-IOV prevents the guest from live migrating + if vf_type == 'hostdev': + update_list.append( + (('domain.meta.migrate_method', vm_uuid), 'shutdown') + ) + + zkhandler.write(update_list) return True @@ -837,9 +845,11 @@ def unset_sriov_vf_vm(zkhandler, node, vf): if not vf_information: return False - zkhandler.write([ + update_list = [ (('node.sriov.vf', node, 'sriov_vf.used', vf), 'False'), (('node.sriov.vf', node, 'sriov_vf.used_by', vf), ''), - ]) + ] + + zkhandler.write(update_list) return True diff --git a/daemon-common/vm.py b/daemon-common/vm.py index ed572676..6bba03e0 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -27,6 +27,7 @@ import lxml.etree import daemon_lib.common as common import daemon_lib.ceph as ceph +from daemon_lib.network import set_sriov_vf_vm, unset_sriov_vf_vm # @@ -191,6 +192,21 @@ def define_vm(zkhandler, config_data, target_node, node_limit, node_selector, no if not valid_node: return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node) + # If a SR-IOV network device is being added, set its used state + dnetworks = common.getDomainNetworks(parsed_xml, {}) + for network in dnetworks: + if network['type'] in ['direct', 'hostdev']: + dom_node = zkhandler.read(('domain.node', dom_uuid)) + + # Check if the network is already in use + is_used = zkhandler.read(('node.sriov.vf', dom_node, 'sriov_vf.used', network['source'])) + if is_used == 'True': + used_by_name = searchClusterByUUID(zkhandler, zkhandler.read(('node.sriov.vf', dom_node, 'sriov_vf.used_by', network['source']))) + return False, 'ERROR: Attempted to use SR-IOV network "{}" which is already used by VM "{}" on node "{}".'.format(network['source'], used_by_name, dom_node) + + # We must update the "used" section + set_sriov_vf_vm(zkhandler, dom_uuid, dom_node, network['source'], network['mac'], network['type']) + # Obtain the RBD disk list using the common functions ddisks = common.getDomainDisks(parsed_xml, {}) rbd_list = [] @@ -211,7 +227,7 @@ def define_vm(zkhandler, config_data, target_node, node_limit, node_selector, no formatted_rbd_list = '' # Add the new domain to Zookeeper - result = zkhandler.write([ + zkhandler.write([ (('domain', dom_uuid), dom_name), (('domain.xml', dom_uuid), config_data), (('domain.state', dom_uuid), initial_state), @@ -230,10 +246,7 @@ def define_vm(zkhandler, config_data, target_node, node_limit, node_selector, no (('domain.migrate.sync_lock', dom_uuid), ''), ]) - if result: - return True, 'Added new VM with Name "{}" and UUID "{}" to database.'.format(dom_name, dom_uuid) - else: - return False, 'ERROR: Failed to add new VM.' + return True, 'Added new VM with Name "{}" and UUID "{}" to database.'.format(dom_name, dom_uuid) def modify_vm_metadata(zkhandler, domain, node_limit, node_selector, node_autostart, provisioner_profile, migration_method): @@ -276,7 +289,36 @@ def modify_vm(zkhandler, domain, restart, new_vm_config): try: parsed_xml = lxml.objectify.fromstring(new_vm_config) except Exception: - return False, 'ERROR: Failed to parse XML data.' + return False, 'ERROR: Failed to parse new XML data.' + + # If a SR-IOV network device is being added, set its used state + dnetworks = common.getDomainNetworks(parsed_xml, {}) + for network in dnetworks: + if network['type'] in ['direct', 'hostdev']: + dom_node = zkhandler.read(('domain.node', dom_uuid)) + + # Check if the network is already in use + is_used = zkhandler.read(('node.sriov.vf', dom_node, 'sriov_vf.used', network['source'])) + if is_used == 'True': + used_by_name = searchClusterByUUID(zkhandler, zkhandler.read(('node.sriov.vf', dom_node, 'sriov_vf.used_by', network['source']))) + return False, 'ERROR: Attempted to use SR-IOV network "{}" which is already used by VM "{}" on node "{}".'.format(network['source'], used_by_name, dom_node) + + # We must update the "used" section + set_sriov_vf_vm(zkhandler, dom_uuid, dom_node, network['source'], network['mac'], network['type']) + + # If a SR-IOV network device is being removed, unset its used state + old_vm_config = zkhandler.read(('domain.xml', dom_uuid)) + try: + old_parsed_xml = lxml.objectify.fromstring(old_vm_config) + except Exception: + return False, 'ERROR: Failed to parse old XML data.' + old_dnetworks = common.getDomainNetworks(old_parsed_xml, {}) + for network in old_dnetworks: + if network['type'] in ['direct', 'hostdev']: + if network['mac'] not in [n['mac'] for n in dnetworks]: + dom_node = zkhandler.read(('domain.node', dom_uuid)) + # We must update the "used" section + unset_sriov_vf_vm(zkhandler, dom_node, network['source']) # Obtain the RBD disk list using the common functions ddisks = common.getDomainDisks(parsed_xml, {}) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 57c6ab24..f841ca32 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1129,7 +1129,7 @@ if enable_networking: dev_uevent = vfh.readlines() for line in dev_uevent: if re.match(r'^PCI_SLOT_NAME=.*', line): - dev_pcie_path = line.split('=')[-1] + dev_pcie_path = line.rstrip().split('=')[-1] except FileNotFoundError: # Something must already be using the PCIe device pass From 24ce361a04aa931d59ffc013c900066d74e3bcd2 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 23:18:34 -0400 Subject: [PATCH 24/43] Ensure SR-IOV NIC states are updated on migration --- daemon-common/vm.py | 27 +++++++++++++++++++++++++++ node-daemon/pvcnoded/VMInstance.py | 7 +++++++ 2 files changed, 34 insertions(+) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index 6bba03e0..7ab78a7f 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -555,6 +555,33 @@ def disable_vm(zkhandler, domain): return True, 'Marked VM "{}" as disable.'.format(domain) +def update_vm_sriov_nics(zkhandler, dom_uuid, source_node, target_node): + # Update all the SR-IOV device states on both nodes, used during migrations but called by the node-side + vm_config = zkhandler.read(('domain.xml', dom_uuid)) + parsed_xml = lxml.objectify.fromstring(vm_config) + dnetworks = common.getDomainNetworks(parsed_xml, {}) + retcode = True + retmsg = '' + for network in dnetworks: + if network['type'] in ['direct', 'hostdev']: + # Check if the network is already in use + is_used = zkhandler.read(('node.sriov.vf', target_node, 'sriov_vf.used', network['source'])) + if is_used == 'True': + used_by_name = searchClusterByUUID(zkhandler, zkhandler.read(('node.sriov.vf', target_node, 'sriov_vf.used_by', network['source']))) + if retcode: + retcode = False + retmsg = 'Attempting to use SR-IOV network "{}" which is already used by VM "{}"'.format(network['source'], used_by_name) + + # We must update the "used" section + if retcode: + # This conditional ensure that if we failed the is_used check, we don't try to overwrite the information of a VF that belongs to another VM + set_sriov_vf_vm(zkhandler, dom_uuid, target_node, network['source'], network['mac'], network['type']) + # ... but we still want to free the old node in an case + unset_sriov_vf_vm(zkhandler, source_node, network['source']) + + return retcode, retmsg + + def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): # Validate that VM exists in cluster dom_uuid = getDomainUUID(zkhandler, domain) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index 634f72dd..3fdd72b6 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -34,6 +34,8 @@ import pvcnoded.VMConsoleWatcherInstance as VMConsoleWatcherInstance import daemon_lib.common as daemon_common +from daemon_lib.vm import update_vm_sriov_nics + def flush_locks(zkhandler, logger, dom_uuid, this_node=None): logger.out('Flushing RBD locks for VM "{}"'.format(dom_uuid), state='i') @@ -672,6 +674,11 @@ class VMInstance(object): self.logger.out('Acquired write lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid)) time.sleep(0.5) # Time for reader to acquire the lock + # Update any SR-IOV NIC states now + sriov_update_result, sriov_update_error = update_vm_sriov_nics(self.zkhandler, self.domuuid, self.last_currentnode, self.node) + if not sriov_update_result: + self.logger.out('{}; VM will likely fail to start.'.format(sriov_update_error), state='w', prefix='Domain {}'.format(self.domuuid)) + self.state = self.zkhandler.read(('domain.state', self.domuuid)) self.dom = self.lookupByUUID(self.domuuid) if self.dom: From b532bc91046a133ca3ece0a6af00d5cf090ec723 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 23:22:36 -0400 Subject: [PATCH 25/43] Add missing managed flag for hostdev --- client-cli/cli_lib/vm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client-cli/cli_lib/vm.py b/client-cli/cli_lib/vm.py index d2bb4484..9192d28a 100644 --- a/client-cli/cli_lib/vm.py +++ b/client-cli/cli_lib/vm.py @@ -567,7 +567,7 @@ def vm_networks_add(config, vm, network, macaddr, model, sriov, sriov_mode, rest pci_slot=sriov_vf_information['pci']['slot'], pci_function=sriov_vf_information['pci']['function'], ) - device_string = '
{network}'.format( + device_string = '
{network}'.format( macaddr=macaddr, bus_address=bus_address, network=network From 7d42fba3730650c308ca99f289ddb6f7eeab93d1 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 23:28:53 -0400 Subject: [PATCH 26/43] Ensure being in migrate doesn't abort shutdown --- node-daemon/pvcnoded/VMInstance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index 3fdd72b6..759aaa66 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -382,7 +382,7 @@ class VMInstance(object): # Abort shutdown if the state changes to start current_state = self.zkhandler.read(('domain.state', self.domuuid)) - if current_state not in ['shutdown', 'restart']: + if current_state not in ['shutdown', 'restart', 'migrate']: self.logger.out('Aborting VM shutdown due to state change', state='i', prefix='Domain {}'.format(self.domuuid)) is_aborted = True break From 68c7481aa26245b48de467972f03ac5336e42bab Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 23:35:52 -0400 Subject: [PATCH 27/43] Ensure offline migrations update SR-IOV NIC states --- daemon-common/vm.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index 7ab78a7f..3357794e 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -630,6 +630,10 @@ def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): retmsg = 'Permanently migrating VM "{}" to node "{}".'.format(domain, target_node) + if target_state not in ['migrate', 'migrate-live']: + # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here + update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) + lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ @@ -698,6 +702,10 @@ def migrate_vm(zkhandler, domain, target_node, force_migrate, wait=False, force_ retmsg = 'Migrating VM "{}" to node "{}".'.format(domain, target_node) + if target_state not in ['migrate', 'migrate-live']: + # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here + update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) + lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ @@ -741,6 +749,10 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): retmsg = 'Unmigrating VM "{}" back to node "{}".'.format(domain, target_node) + if target_state not in ['migrate', 'migrate-live']: + # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here + update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) + lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ From dc560c1dcb9d204c284441ddbe25a82005c2bcea Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 23:46:47 -0400 Subject: [PATCH 28/43] Better handle retcodes in migrate update --- daemon-common/vm.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index 3357794e..4b69b2c1 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -569,16 +569,21 @@ def update_vm_sriov_nics(zkhandler, dom_uuid, source_node, target_node): if is_used == 'True': used_by_name = searchClusterByUUID(zkhandler, zkhandler.read(('node.sriov.vf', target_node, 'sriov_vf.used_by', network['source']))) if retcode: - retcode = False + retcode_this = False retmsg = 'Attempting to use SR-IOV network "{}" which is already used by VM "{}"'.format(network['source'], used_by_name) + else: + retcode_this = True # We must update the "used" section - if retcode: + if retcode_this: # This conditional ensure that if we failed the is_used check, we don't try to overwrite the information of a VF that belongs to another VM set_sriov_vf_vm(zkhandler, dom_uuid, target_node, network['source'], network['mac'], network['type']) # ... but we still want to free the old node in an case unset_sriov_vf_vm(zkhandler, source_node, network['source']) + if not retcode_this: + retcode = retcode_this + return retcode, retmsg From 60e1da09dd793466f33c9da206edd6cc3b2b4746 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 00:00:50 -0400 Subject: [PATCH 29/43] Don't try any shenannegans when updating NICs Trying to do this on the VMInstance side had problems because we can't differentiate the 3 types of migration there. So, just update this in the API side and hope everything goes well. This introduces an edge bug: if a VM is using a macvtap SR-IOV device, and then tries to migrate, and the migrate is aborted, the NIC lists will be inconsistent. When I revamp the VMInstance in the future, I should be able to correct this, but for now we'll have to live with that edgecase. --- daemon-common/vm.py | 23 +++++++++++------------ node-daemon/pvcnoded/VMInstance.py | 7 ------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index 4b69b2c1..aaba3758 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -635,10 +635,6 @@ def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): retmsg = 'Permanently migrating VM "{}" to node "{}".'.format(domain, target_node) - if target_state not in ['migrate', 'migrate-live']: - # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here - update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) - lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ @@ -650,6 +646,9 @@ def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) @@ -702,15 +701,12 @@ def migrate_vm(zkhandler, domain, target_node, force_migrate, wait=False, force_ return False, 'ERROR: Could not find a valid migration target for VM "{}".'.format(domain) # Don't overwrite an existing last_node when using force_migrate + real_current_node = current_node # Used for the SR-IOV update if last_node and force_migrate: current_node = last_node retmsg = 'Migrating VM "{}" to node "{}".'.format(domain, target_node) - if target_state not in ['migrate', 'migrate-live']: - # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here - update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) - lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ @@ -722,6 +718,9 @@ def migrate_vm(zkhandler, domain, target_node, force_migrate, wait=False, force_ # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, real_current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) @@ -747,6 +746,7 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): else: target_state = 'migrate' + current_node = zkhandler.read(('domain.node', dom_uuid)) target_node = zkhandler.read(('domain.last_node', dom_uuid)) if target_node == '': @@ -754,10 +754,6 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): retmsg = 'Unmigrating VM "{}" back to node "{}".'.format(domain, target_node) - if target_state not in ['migrate', 'migrate-live']: - # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here - update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) - lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ @@ -769,6 +765,9 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index 759aaa66..dd0f3d0e 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -34,8 +34,6 @@ import pvcnoded.VMConsoleWatcherInstance as VMConsoleWatcherInstance import daemon_lib.common as daemon_common -from daemon_lib.vm import update_vm_sriov_nics - def flush_locks(zkhandler, logger, dom_uuid, this_node=None): logger.out('Flushing RBD locks for VM "{}"'.format(dom_uuid), state='i') @@ -674,11 +672,6 @@ class VMInstance(object): self.logger.out('Acquired write lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid)) time.sleep(0.5) # Time for reader to acquire the lock - # Update any SR-IOV NIC states now - sriov_update_result, sriov_update_error = update_vm_sriov_nics(self.zkhandler, self.domuuid, self.last_currentnode, self.node) - if not sriov_update_result: - self.logger.out('{}; VM will likely fail to start.'.format(sriov_update_error), state='w', prefix='Domain {}'.format(self.domuuid)) - self.state = self.zkhandler.read(('domain.state', self.domuuid)) self.dom = self.lookupByUUID(self.domuuid) if self.dom: From e623909a433fda6ff9e658477fc9066ba236abac Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 00:54:32 -0400 Subject: [PATCH 30/43] Store PHY MAC for VFs and restore after free --- daemon-common/migrations/versions/1.json | 2 +- daemon-common/network.py | 1 + daemon-common/zkhandler.py | 1 + node-daemon/pvcnoded/Daemon.py | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/daemon-common/migrations/versions/1.json b/daemon-common/migrations/versions/1.json index 5ccc1bf0..f0fc66a5 100644 --- a/daemon-common/migrations/versions/1.json +++ b/daemon-common/migrations/versions/1.json @@ -1 +1 @@ -{"version": "1", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "migrate.sync_lock": "/migrate_sync_lock"}, "network": {"vni": "", "type": "/nettype", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file +{"version": "1", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "migrate.sync_lock": "/migrate_sync_lock"}, "network": {"vni": "", "type": "/nettype", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/network.py b/daemon-common/network.py index c3504712..3e034cf8 100644 --- a/daemon-common/network.py +++ b/daemon-common/network.py @@ -848,6 +848,7 @@ def unset_sriov_vf_vm(zkhandler, node, vf): update_list = [ (('node.sriov.vf', node, 'sriov_vf.used', vf), 'False'), (('node.sriov.vf', node, 'sriov_vf.used_by', vf), ''), + (('node.sriov.vf', node, 'sriov_vf.mac', vf), zkhandler.read(('node.sriov.vf', node, 'sriov_vf.phy_mac', vf))) ] zkhandler.write(update_list) diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index 5ecb1cbf..ded85876 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -500,6 +500,7 @@ class ZKSchema(object): 'pf': '/pf', 'mtu': '/mtu', 'mac': '/mac', + 'phy_mac': '/phy_mac', 'config': '/config', 'config.vlan_id': '/config/vlan_id', 'config.vlan_qos': '/config/vlan_qos', diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index f841ca32..8f52f754 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1150,6 +1150,7 @@ if enable_networking: (('node.sriov.vf', myhostname, 'sriov_vf.pf', vfphy), pf), (('node.sriov.vf', myhostname, 'sriov_vf.mtu', vfphy), mtu), (('node.sriov.vf', myhostname, 'sriov_vf.mac', vfphy), vf['address']), + (('node.sriov.vf', myhostname, 'sriov_vf.phy_mac', vfphy), vf['address']), (('node.sriov.vf', myhostname, 'sriov_vf.config', vfphy), ''), (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '0')), (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '0')), From 1787a970abd554472c319556753e7ef24278ceeb Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 02:21:32 -0400 Subject: [PATCH 31/43] Fix bug in address check format string --- client-cli/cli_lib/vm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/client-cli/cli_lib/vm.py b/client-cli/cli_lib/vm.py index 9192d28a..f84eec05 100644 --- a/client-cli/cli_lib/vm.py +++ b/client-cli/cli_lib/vm.py @@ -598,10 +598,10 @@ def vm_networks_add(config, vm, network, macaddr, model, sriov, sriov_mode, rest if sriov_mode == 'hostdev': if interface.attrib.get('type') == 'hostdev': interface_address = 'domain="{pci_domain}" bus="{pci_bus}" slot="{pci_slot}" function="{pci_function}"'.format( - interface.source.address.attrib.get('domain'), - interface.source.address.attrib.get('bus'), - interface.source.address.attrib.get('slot'), - interface.source.address.attrib.get('function') + pci_domain=interface.source.address.attrib.get('domain'), + pci_bus=interface.source.address.attrib.get('bus'), + pci_slot=interface.source.address.attrib.get('slot'), + pci_function=interface.source.address.attrib.get('function') ) if interface_address == bus_address: return False, 'Network "{}" is already configured for VM "{}".'.format(network, vm) From 6cd0ccf0ad4d31e6dbf354065a6d6d308822238b Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 02:21:55 -0400 Subject: [PATCH 32/43] Fix network check on VM config modification --- daemon-common/vm.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index aaba3758..a8585799 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -291,9 +291,18 @@ def modify_vm(zkhandler, domain, restart, new_vm_config): except Exception: return False, 'ERROR: Failed to parse new XML data.' + # Get our old network list for comparison purposes + old_vm_config = zkhandler.read(('domain.xml', dom_uuid)) + old_parsed_xml = lxml.objectify.fromstring(old_vm_config) + old_dnetworks = common.getDomainNetworks(old_parsed_xml, {}) + # If a SR-IOV network device is being added, set its used state dnetworks = common.getDomainNetworks(parsed_xml, {}) for network in dnetworks: + # Ignore networks that are already there + if network in old_dnetworks: + continue + if network['type'] in ['direct', 'hostdev']: dom_node = zkhandler.read(('domain.node', dom_uuid)) @@ -307,12 +316,6 @@ def modify_vm(zkhandler, domain, restart, new_vm_config): set_sriov_vf_vm(zkhandler, dom_uuid, dom_node, network['source'], network['mac'], network['type']) # If a SR-IOV network device is being removed, unset its used state - old_vm_config = zkhandler.read(('domain.xml', dom_uuid)) - try: - old_parsed_xml = lxml.objectify.fromstring(old_vm_config) - except Exception: - return False, 'ERROR: Failed to parse old XML data.' - old_dnetworks = common.getDomainNetworks(old_parsed_xml, {}) for network in old_dnetworks: if network['type'] in ['direct', 'hostdev']: if network['mac'] not in [n['mac'] for n in dnetworks]: From 26dd24e3f5352f789e5dda13735082ddddf60617 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 02:22:15 -0400 Subject: [PATCH 33/43] Ensure MTU is set on VF when starting up --- node-daemon/pvcnoded/SRIOVVFInstance.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/node-daemon/pvcnoded/SRIOVVFInstance.py b/node-daemon/pvcnoded/SRIOVVFInstance.py index be9ffbc7..ddc1abca 100644 --- a/node-daemon/pvcnoded/SRIOVVFInstance.py +++ b/node-daemon/pvcnoded/SRIOVVFInstance.py @@ -41,9 +41,11 @@ class SRIOVVFInstance(object): self.pf = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.pf', self.vf)) self.mtu = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.mtu', self.vf)) - self.vfid = self.vf.replace('{}v'.format(self.pf), '') + self.logger.out('Setting MTU to {}'.format(self.mtu), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} mtu {}'.format(self.vf, self.mtu)) + # These properties are set via the DataWatch functions, to ensure they are configured on the system self.mac = None self.vlan_id = None From 07dbd55f033ae4e84361024f6d7270b0fbbb00bf Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 02:31:14 -0400 Subject: [PATCH 34/43] Use list comprehension to compare against source --- daemon-common/vm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index a8585799..7b8dc2f9 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -300,7 +300,7 @@ def modify_vm(zkhandler, domain, restart, new_vm_config): dnetworks = common.getDomainNetworks(parsed_xml, {}) for network in dnetworks: # Ignore networks that are already there - if network in old_dnetworks: + if network['source'] in [net['source'] for net in old_dnetworks]: continue if network['type'] in ['direct', 'hostdev']: From 7d2a3b53610812fa78db7cd8f63f4fd7eed53353 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 02:38:16 -0400 Subject: [PATCH 35/43] Ensure Macvtap NICs can use a model Defaults to virtio like a bridged NIC. Otherwise performance is abysmal. --- client-cli/cli_lib/vm.py | 5 +++-- client-cli/pvc.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/client-cli/cli_lib/vm.py b/client-cli/cli_lib/vm.py index f84eec05..41406895 100644 --- a/client-cli/cli_lib/vm.py +++ b/client-cli/cli_lib/vm.py @@ -574,9 +574,10 @@ def vm_networks_add(config, vm, network, macaddr, model, sriov, sriov_mode, rest ) # Add a macvtap SR-IOV network elif sriov_mode == 'macvtap': - device_string = ''.format( + device_string = ''.format( macaddr=macaddr, - network=network + network=network, + model=model ) else: return False, "ERROR: Invalid SR-IOV mode specified." diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 92ac27b8..3be352ee 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -1317,7 +1317,7 @@ def vm_network_get(domain, raw): ) @click.option( '-m', '--model', 'model', default='virtio', - help='The model for the interface; must be a valid libvirt model. Not used for SR-IOV NETs.' + help='The model for the interface; must be a valid libvirt model. Not used for "netdev" SR-IOV NETs.' ) @click.option( '-s', '--sriov', 'sriov', is_flag=True, default=False, From 2928d695c992317d7625a414110ca71487061735 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 03:20:15 -0400 Subject: [PATCH 36/43] Ensure migration method is updated on state changes --- node-daemon/pvcnoded/VMInstance.py | 1 + 1 file changed, 1 insertion(+) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index dd0f3d0e..e9758e5c 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -722,6 +722,7 @@ class VMInstance(object): self.state = self.zkhandler.read(('domain.state', self.domuuid)) self.node = self.zkhandler.read(('domain.node', self.domuuid)) self.lastnode = self.zkhandler.read(('domain.last_node', self.domuuid)) + self.migration_method = self.zkhandler.read(('domain.meta.migrate_method', self.domuuid)) # Check the current state of the VM try: From 3490ecbb595b57df5054f727019bc48eda1a42ba Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 03:31:06 -0400 Subject: [PATCH 37/43] Remove explicit ZK address from Patronictl command --- node-daemon/pvcnoded/NodeInstance.py | 1 - 1 file changed, 1 deletion(-) diff --git a/node-daemon/pvcnoded/NodeInstance.py b/node-daemon/pvcnoded/NodeInstance.py index 9cc0c897..d92259fe 100644 --- a/node-daemon/pvcnoded/NodeInstance.py +++ b/node-daemon/pvcnoded/NodeInstance.py @@ -466,7 +466,6 @@ class NodeInstance(object): """ patronictl -c /etc/patroni/config.yml - -d zookeeper://localhost:2181 switchover --candidate {} --force From e6b26745cec61908a24396c629c16101db4a8d38 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 03:40:21 -0400 Subject: [PATCH 38/43] Adjust some help messages in pvc.py --- client-cli/pvc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 3be352ee..966ccb45 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -672,7 +672,7 @@ def vm_define(vmconfig, target_node, node_limit, node_selector, node_autostart, @click.option( '-m', '--method', 'migration_method', default='none', show_default=True, type=click.Choice(['none', 'live', 'shutdown']), - help='The preferred migration method of the VM between nodes; saved with VM.' + help='The preferred migration method of the VM between nodes.' ) @click.option( '-p', '--profile', 'provisioner_profile', default=None, show_default=False, @@ -1316,7 +1316,7 @@ def vm_network_get(domain, raw): help='Use this MAC address instead of random generation; must be a valid MAC address in colon-delimited format.' ) @click.option( - '-m', '--model', 'model', default='virtio', + '-m', '--model', 'model', default='virtio', show_default=True, help='The model for the interface; must be a valid libvirt model. Not used for "netdev" SR-IOV NETs.' ) @click.option( @@ -1324,7 +1324,7 @@ def vm_network_get(domain, raw): help='Identify that NET is an SR-IOV device name and not a VNI. Required for adding SR-IOV NETs.' ) @click.option( - '-d', '--sriov-mode', 'sriov_mode', default='hostdev', + '-d', '--sriov-mode', 'sriov_mode', default='macvtap', show_default=True, type=click.Choice(['hostdev', 'macvtap']), help='For SR-IOV NETs, the SR-IOV network device mode.' ) From 5ec198bf98137da3c11a6b0b7ae983574918f777 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 03:47:27 -0400 Subject: [PATCH 39/43] Update API doc with remaining items --- docs/manuals/swagger.json | 84 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/docs/manuals/swagger.json b/docs/manuals/swagger.json index af4eafaa..41cdc1a9 100644 --- a/docs/manuals/swagger.json +++ b/docs/manuals/swagger.json @@ -4642,6 +4642,90 @@ "tags": [ "network / sriov" ] + }, + "put": { + "description": "", + "parameters": [ + { + "description": "The vLAN ID for vLAN tagging (0 is disabled)", + "in": "query", + "name": "vlan_id", + "required": false, + "type": "integer" + }, + { + "description": "The vLAN QOS priority (0 is disabled)", + "in": "query", + "name": "vlan_qos", + "required": false, + "type": "integer" + }, + { + "description": "The minimum TX rate (0 is disabled)", + "in": "query", + "name": "tx_rate_min", + "required": false, + "type": "integer" + }, + { + "description": "The maximum TX rate (0 is disabled)", + "in": "query", + "name": "tx_rate_max", + "required": false, + "type": "integer" + }, + { + "description": "The administrative link state", + "enum": [ + "auto", + "enable", + "disable" + ], + "in": "query", + "name": "link_state", + "required": false, + "type": "string" + }, + { + "description": "Enable or disable spoof checking", + "in": "query", + "name": "spoof_check", + "required": false, + "type": "boolean" + }, + { + "description": "Enable or disable VF user trust", + "in": "query", + "name": "trust", + "required": false, + "type": "boolean" + }, + { + "description": "Enable or disable query RSS support", + "in": "query", + "name": "query_rss", + "required": false, + "type": "boolean" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/Message" + } + }, + "400": { + "description": "Bad request", + "schema": { + "$ref": "#/definitions/Message" + } + } + }, + "summary": "Set the configuration of {vf} on {node}", + "tags": [ + "network / sriov" + ] } }, "/api/v1/status": { From 7d2b7441c2a54f9cf3f3b36f32a7f7cac39ff370 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 03:52:53 -0400 Subject: [PATCH 40/43] Mention SR-IOV in the Daemon and Ansible manuals --- docs/manuals/ansible.md | 18 ++++++++++++++++++ docs/manuals/daemon.md | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/docs/manuals/ansible.md b/docs/manuals/ansible.md index b89b0f94..e3b163d0 100644 --- a/docs/manuals/ansible.md +++ b/docs/manuals/ansible.md @@ -451,6 +451,12 @@ pvc_nodes: pvc_bridge_device: bondU +pvc_sriov_enable: True +pvc_sriov_device: + - phy: ens1f0 + mtu: 9000 + vfcount: 6 + pvc_upstream_device: "{{ networks['upstream']['device'] }}" pvc_upstream_mtu: "{{ networks['upstream']['mtu'] }}" pvc_upstream_domain: "{{ networks['upstream']['domain'] }}" @@ -901,6 +907,18 @@ The IPMI password for the node management controller. Unless a per-host override The device name of the underlying network interface to be used for "bridged"-type client networks. For each "bridged"-type network, an IEEE 802.3q vLAN and bridge will be created on top of this device to pass these networks. In most cases, using the reflexive `networks['cluster']['raw_device']` or `networks['upstream']['raw_device']` from the Base role is sufficient. +#### `pvc_sriov_enable` + +* *optional* + +Whether to enable or disable SR-IOV functionality. + +#### `pvc_sriov_device` + +* *optional* + +A list of SR-IOV devices. See the Daemon manual for details. + #### `pvc__*` The next set of entries is hard-coded to use the values from the global `networks` list. It should not need to be changed under most circumstances. Refer to the previous sections for specific notes about each entry. diff --git a/docs/manuals/daemon.md b/docs/manuals/daemon.md index e651727c..de46dbe6 100644 --- a/docs/manuals/daemon.md +++ b/docs/manuals/daemon.md @@ -146,6 +146,11 @@ pvc: console_log_lines: 1000 networking: bridge_device: ens4 + sriov_enable: True + sriov_device: + - phy: ens1f0 + mtu: 9000 + vfcount: 7 upstream: device: ens4 mtu: 1500 @@ -422,6 +427,34 @@ How many lines of VM console logs to keep in the Zookeeper database for each VM. The network interface device used to create Bridged client network vLANs on. For most clusters, should match the underlying device of the various static networks (e.g. `ens4` or `bond0`), though may also use a separate network interface. +#### `system` → `configuration` → `networking` → `sriov_enable` + +* *optional*, defaults to `False` +* *requires* `functions` → `enable_networking` + +Enables (or disables) SR-IOV functionality in PVC. If enabled, at least one `sriov_device` entry should be specified. + +#### `system` → `configuration` → `networking` → `sriov_device` + +* *optional* +* *requires* `functions` → `enable_networking` + +Contains a list of SR-IOV PF (physical function) devices and their basic configuration. Each element contains the following entries: + +##### `phy`: + +* *required* + +The raw Linux network device with SR-IOV PF functionality. + +##### `mtu` + +The MTU of the PF device, set on daemon startup. + +##### `vfcount` + +The number of VF devices to create on this PF. VF devices are then managed via PVC on a per-node basis. + #### `system` → `configuration` → `networking` * *optional* From 75f256021776004d01cd2aaead65512adc00c90c Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 04:20:38 -0400 Subject: [PATCH 41/43] Add documentation on SR-IOV client networks --- docs/cluster-architecture.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/cluster-architecture.md b/docs/cluster-architecture.md index 3b9b5396..edc264d3 100644 --- a/docs/cluster-architecture.md +++ b/docs/cluster-architecture.md @@ -12,6 +12,7 @@ + [PVC client networks](#pvc-client-networks) - [Bridged (unmanaged) Client Networks](#bridged--unmanaged--client-networks) - [VXLAN (managed) Client Networks](#vxlan--managed--client-networks) + - [SR-IOV Client Networks](#sriov-client-networks) - [Other Client Networks](#other-client-networks) * [Node Layout: Considering how nodes are laid out](#node-layout--considering-how-nodes-are-laid-out) + [Node Functions: Coordinators versus Hypervisors](#node-functions--coordinators-versus-hypervisors) @@ -184,6 +185,26 @@ With this client network type, PVC is in full control of the network. No vLAN co NOTE: These networks may introduce a bottleneck and tromboning if there is a large amount of external and/or inter-network traffic on the cluster. The administrator should consider this carefully when deciding whether to use managed or bridged networks and properly evaluate the inter-network traffic requirements. +#### SR-IOV Client Networks + +The third type of client network is the SR-IOV network. SR-IOV (Single-Root I/O Virtualization) is a technique and feature enabled on modern high-performance NICs (for instance, those from Intel or nVidia) which allows a single physical Ethernet port (a "PF" in SR-IOV terminology) to be split, at a hardware level, into multiple virtual Ethernet ports ("VF"s), which can then be managed separately. Starting with version 0.9.21, PVC support SR-IOV PF and VF configuration at the node level, and these VFs can be passed into VMs in two ways. + +SR-IOV's main benefit is to offload bridging and network functions from the hypervisor layer, and direct them onto the hardware itself. This can increase network throughput in some situations, as well as provide near-complete isolation of guest networks from the hypervisors (in contrast with bridges which *can* expose client traffic to the hypervisors, and VXLANs which *do* expose client traffic to the hypervisors). For instance, a VF can have a vLAN specified, and the tagging/untagging of packets is then carried out at the hardware layer. + +There are however caveats to working with SR-IOV. At the most basic level, the biggest difference with SR-IOV compared to the other two network types is that SR-IOV must be configured on a per-node basis. That is, each node must have SR-IOV explicitly enabled, it's specific PF devices defined, and a set of VFs created at PVC startup. Generally, with identical PVC nodes, this will not be a problem but is something to consider, especially if the servers are mismatched in any way. It is thus also possible to set some nodes with SR-IOV functionality, and others without, though care must be taken in this situation to set node limits in the VM metadata of any VMs which use SR-IOV VFs to prevent failed migrations. + +PFs are defined in the `pvcnoded.yml` configuration of each node, via the `sriov_device` list. Each PF can have an arbitrary number of VFs (`vfcount`) allocated, though each NIC vendor and model has specific limits. Once configured, specifically with Intel NICs, PFs (and specifically, the `vfcount` attribute in the driver) are immutable and cannot be changed easily without completely flushing the node and rebooting it, so care should be taken to select the desired settings as early in the cluster configuration as possible. + +Once created, VFs are also managed on a per-node basis. That is, each VF, on each host, even if they have the exact same device names, is managed separately. For instance, the PF `ens1f0` creating a VF `ens1f0v0` on "`hv1`", can have a different configuration from the identically-named VF `ens1f0v0` on "`hv2`". The administrator is responsible for ensuring consistency here, and for ensuring that devices do not overlap (e.g. assigning the same VF name to VMs on two separate nodes which might migrate to each other). PVC will however explicitly prevent two VMs from being assigned to the same VF on the same node, even if this may be technically possible in some cases. + +When attaching VFs to VMs, there are two supported modes: `macvtap`, and `hostdev`. + +`macvtap`, as the name suggests, uses the Linux `macvtap` driver to connect the VF to the VM. Once attached, the vNIC behaves just like a "bridged" network connection above, and like "bridged" connections, the "mode" of the NIC can be specificed, defaulting to "virtio" but supporting various emulated devices instead. Note that in this mode, vLANs cannot be configured on the guest side; they must be specified in the VF configuration (`pvc network sriov vf set`) with one vLAN per VF. VMs with `macvtap` interfaces can be live migrated between nodes without issue, assuming there is a corresponding free VF on the destination node, and the SR-IOV functionality is transparent to the VM. + +`hostdev` is a direct PCIe passthrough method. With a VF attached to a VM in `hostdev` mode, the virtual PCIe NIC device itself becomes hidden from the node, and is visible only to the guest, where it appears as a discrete PCIe device. In this mode, vLANs and other attributes can be set on the guest side at will, though setting vLANs and other properties in the VF configuration is still supported. The main caveat to this mode is that VMs with connected `hostdev` SR-IOV VFs *cannot be live migrated between nodes*. Only a `shutdown` migration is supported, and, like `macvtap`, an identical PCIe device at the same bus address must be present on the target node. To prevent unexpected failures, PVC will explicitly set the VM metadata for the "migration method" to "shutdown" the first time that a `hostdev` VF is attached to it; if this changes later, the administrator must change this back explicitly. + +Generally speaking, SR-IOV connections are not recommended unless there is a good usecase for them. On modern hardware, software bridges are extremely performant, and are much simpler to manage. The functionality is provided for those rare usecases where SR-IOV is asbolutely required by the administrator, but care must be taken to understand all the requirements and caveats of SR-IOV before using it in production. + #### Other Client Networks Future PVC versions may support other client network types, such as direct-routing between VMs. From 1ae34c19608c0c8be890871d40756b867b68ae92 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 04:31:02 -0400 Subject: [PATCH 42/43] Fix bad messages in volume remove --- client-cli/cli_lib/vm.py | 2 +- client-cli/pvc.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/client-cli/cli_lib/vm.py b/client-cli/cli_lib/vm.py index 41406895..d06e4821 100644 --- a/client-cli/cli_lib/vm.py +++ b/client-cli/cli_lib/vm.py @@ -912,7 +912,7 @@ def vm_volumes_remove(config, vm, volume, restart): xml = domain_information.get('xml', None) if xml is None: - return False, "VM does not have a valid XML doccument." + return False, "VM does not have a valid XML document." try: parsed_xml = fromstring(xml) diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 966ccb45..cd573666 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -1506,7 +1506,7 @@ def vm_volume_add(domain, volume, disk_id, bus, disk_type, restart, confirm_flag 'domain' ) @click.argument( - 'vni' + 'volume' ) @click.option( '-r', '--restart', 'restart', is_flag=True, default=False, @@ -1518,9 +1518,9 @@ def vm_volume_add(domain, volume, disk_id, bus, disk_type, restart, confirm_flag help='Confirm the restart' ) @cluster_req -def vm_volume_remove(domain, vni, restart, confirm_flag): +def vm_volume_remove(domain, volume, restart, confirm_flag): """ - Remove the volume VNI to the virtual machine DOMAIN. + Remove VOLUME from the virtual machine DOMAIN; VOLUME must be a file path or RBD path in 'pool/volume' format. """ if restart and not confirm_flag and not config['unsafe']: try: @@ -1528,7 +1528,7 @@ def vm_volume_remove(domain, vni, restart, confirm_flag): except Exception: restart = False - retcode, retmsg = pvc_vm.vm_volumes_remove(config, domain, vni, restart) + retcode, retmsg = pvc_vm.vm_volumes_remove(config, domain, volume, restart) if retcode and not restart: retmsg = retmsg + " Changes will be applied on next VM start/restart." cleanup(retcode, retmsg) From 8d21da904134e61ff96d114efd41b5d1eaaf16ad Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 04:33:12 -0400 Subject: [PATCH 43/43] Add some additional interaction tests --- test-cluster.sh | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/test-cluster.sh b/test-cluster.sh index 794c897b..2b01e60f 100755 --- a/test-cluster.sh +++ b/test-cluster.sh @@ -22,10 +22,11 @@ _pvc maintenance off backup_tmp=$(mktemp) _pvc task backup --file ${backup_tmp} _pvc task restore --yes --file ${backup_tmp} -rm ${backup_tmp} +rm ${backup_tmp} || true # Provisioner tests _pvc provisioner profile list test +_pvc vm network get testX _pvc provisioner create --wait testX test sleep 30 @@ -50,9 +51,16 @@ sleep 5 _pvc vm move --wait --target hv1 testX sleep 5 _pvc vm meta testX --limit hv1 --selector vms --method live --profile test --no-autostart +_pvc vm vcpu set testX 4 +_pvc vm vcpu get testX +_pvc vm memory set testX 4096 +_pvc vm memory get testX +_pvc vm vcpu set testX 2 +_pvc vm memory set testX 2048 --restart --yes +sleep 5 _pvc vm list testX _pvc vm info --long testX -rm ${vm_tmp} +rm ${vm_tmp} || true # Node tests _pvc node primary --wait hv1 @@ -84,6 +92,14 @@ _pvc network dhcp remove --yes 10001 12:34:56:78:90:ab _pvc network modify --domain test10001.local 10001 _pvc network list _pvc network info --long 10001 + +# Network-VM interaction tests +_pvc vm network add testX 10001 --model virtio --restart --yes +sleep 30 +_pvc vm network get testX +_pvc vm network remove testX 10001 --restart --yes +sleep 5 + _pvc network remove --yes 10001 # Storage tests @@ -106,6 +122,14 @@ _pvc storage volume snapshot add testing testerX asnapshotX _pvc storage volume snapshot rename testing testerX asnapshotX asnapshotY _pvc storage volume snapshot list _pvc storage volume snapshot remove --yes testing testerX asnapshotY + +# Storage-VM interaction tests +_pvc vm volume add testX --type rbd --disk-id sdh --bus scsi testing/testerY --restart --yes +sleep 30 +_pvc vm volume get testX +_pvc vm volume remove testX testing/testerY --restart --yes +sleep 5 + _pvc storage volume remove --yes testing testerY _pvc storage volume remove --yes testing testerX _pvc storage pool remove --yes testing