diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index 2ce9c16b..2bca5f8c 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -2719,6 +2719,301 @@ class API_Network_ACL_Element(Resource): api.add_resource(API_Network_ACL_Element, '/network//acl/') +########################################################## +# Client API - SR-IOV +########################################################## + +# /sriov +class API_SRIOV_Root(Resource): + @Authenticator + def get(self): + pass + + +api.add_resource(API_SRIOV_Root, '/sriov') + + +# /sriov/pf +class API_SRIOV_PF_Root(Resource): + @RequestParser([ + {'name': 'node', 'required': True, 'helptext': "A valid node must be specified."}, + ]) + @Authenticator + def get(self, reqargs): + """ + Return a list of SR-IOV PFs on a given node + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + type: object + id: sriov_pf + properties: + phy: + type: string + description: The name of the SR-IOV PF device + mtu: + type: string + description: The MTU of the SR-IOV PF device + vfs: + type: list + items: + type: string + description: The PHY name of a VF of this PF + """ + return api_helper.sriov_pf_list(reqargs.get('node')) + + +api.add_resource(API_SRIOV_PF_Root, '/sriov/pf') + + +# /sriov/pf/ +class API_SRIOV_PF_Node(Resource): + @Authenticator + def get(self, node): + """ + Return a list of SR-IOV PFs on node {node} + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + $ref: '#/definitions/sriov_pf' + """ + return api_helper.sriov_pf_list(node) + + +api.add_resource(API_SRIOV_PF_Node, '/sriov/pf/') + + +# /sriov/vf +class API_SRIOV_VF_Root(Resource): + @RequestParser([ + {'name': 'node', 'required': True, 'helptext': "A valid node must be specified."}, + {'name': 'pf', 'required': False, 'helptext': "A PF parent may be specified."}, + ]) + @Authenticator + def get(self, reqargs): + """ + Return a list of SR-IOV VFs on a given node, optionally limited to those in the specified PF + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + type: object + id: sriov_vf + properties: + phy: + type: string + description: The name of the SR-IOV VF device + pf: + type: string + description: The name of the SR-IOV PF parent of this VF device + mtu: + type: integer + description: The current MTU of the VF device + mac: + type: string + description: The current MAC address of the VF device + config: + type: object + id: sriov_vf_config + properties: + vlan_id: + type: string + description: The tagged vLAN ID of the SR-IOV VF device + vlan_qos: + type: string + description: The QOS group of the tagged vLAN + tx_rate_min: + type: string + description: The minimum TX rate of the SR-IOV VF device + tx_rate_max: + type: string + description: The maximum TX rate of the SR-IOV VF device + spoof_check: + type: boolean + description: Whether device spoof checking is enabled or disabled + link_state: + type: string + description: The current SR-IOV VF link state (either enabled, disabled, or auto) + trust: + type: boolean + description: Whether guest device trust is enabled or disabled + query_rss: + type: boolean + description: Whether VF RSS querying is enabled or disabled + usage: + type: object + id: sriov_vf_usage + properties: + used: + type: boolean + description: Whether the SR-IOV VF is currently used by a VM or not + domain: + type: boolean + description: The UUID of the domain the SR-IOV VF is currently used by + """ + return api_helper.sriov_vf_list(reqargs.get('node'), reqargs.get('pf', None)) + + +api.add_resource(API_SRIOV_VF_Root, '/sriov/vf') + + +# /sriov/vf/ +class API_SRIOV_VF_Node(Resource): + @RequestParser([ + {'name': 'pf', 'required': False, 'helptext': "A PF parent may be specified."}, + ]) + @Authenticator + def get(self, node, reqargs): + """ + Return a list of SR-IOV VFs on node {node}, optionally limited to those in the specified PF + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + $ref: '#/definitions/sriov_vf' + """ + return api_helper.sriov_vf_list(node, reqargs.get('pf', None)) + + +api.add_resource(API_SRIOV_VF_Node, '/sriov/vf/') + + +# /sriov/vf// +class API_SRIOV_VF_Element(Resource): + @Authenticator + def get(self, node, vf): + """ + Return information about {vf} on {node} + --- + tags: + - network / sriov + responses: + 200: + description: OK + schema: + $ref: '#/definitions/sriov_vf' + 404: + description: Not found + schema: + type: object + id: Message + """ + vf_list = list() + full_vf_list, _ = api_helper.sriov_vf_list(node) + for vf_element in full_vf_list: + if vf_element['phy'] == vf: + vf_list.append(vf_element) + + if len(vf_list) == 1: + return vf_list, 200 + else: + return {'message': "No VF '{}' found on node '{}'".format(vf, node)}, 404 + + @RequestParser([ + {'name': 'vlan_id'}, + {'name': 'vlan_qos'}, + {'name': 'tx_rate_min'}, + {'name': 'tx_rate_max'}, + {'name': 'link_state', 'choices': ('auto', 'enable', 'disable'), 'helptext': "A valid state must be specified"}, + {'name': 'spoof_check'}, + {'name': 'trust'}, + {'name': 'query_rss'}, + ]) + @Authenticator + def put(self, node, vf, reqargs): + """ + Set the configuration of {vf} on {node} + --- + tags: + - network / sriov + parameters: + - in: query + name: vlan_id + type: integer + required: false + description: The vLAN ID for vLAN tagging (0 is disabled) + - in: query + name: vlan_qos + type: integer + required: false + description: The vLAN QOS priority (0 is disabled) + - in: query + name: tx_rate_min + type: integer + required: false + description: The minimum TX rate (0 is disabled) + - in: query + name: tx_rate_max + type: integer + required: false + description: The maximum TX rate (0 is disabled) + - in: query + name: link_state + type: string + required: false + description: The administrative link state + enum: + - auto + - enable + - disable + - in: query + name: spoof_check + type: boolean + required: false + description: Enable or disable spoof checking + - in: query + name: trust + type: boolean + required: false + description: Enable or disable VF user trust + - in: query + name: query_rss + type: boolean + required: false + description: Enable or disable query RSS support + responses: + 200: + description: OK + schema: + type: object + id: Message + 400: + description: Bad request + schema: + type: object + id: Message + """ + return api_helper.update_sriov_vf_config( + node, + vf, + reqargs.get('vlan_id', None), + reqargs.get('vlan_qos', None), + reqargs.get('tx_rate_min', None), + reqargs.get('tx_rate_max', None), + reqargs.get('link_state', None), + reqargs.get('spoof_check', None), + reqargs.get('trust', None), + reqargs.get('query_rss', None), + ) + + +api.add_resource(API_SRIOV_VF_Element, '/sriov/vf//') + + ########################################################## # Client API - Storage ########################################################## diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index 974130c6..81ce3093 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -978,6 +978,80 @@ def net_acl_remove(zkhandler, network, description): return output, retcode +# +# SR-IOV functions +# +@ZKConnection(config) +def sriov_pf_list(zkhandler, node): + """ + List all PFs on a given node. + """ + retflag, retdata = pvc_network.get_list_sriov_pf(zkhandler, node) + + if retflag: + if retdata: + retcode = 200 + else: + retcode = 404 + retdata = { + 'message': 'PF not found.' + } + else: + retcode = 400 + retdata = { + 'message': retdata + } + + return retdata, retcode + + +@ZKConnection(config) +def sriov_vf_list(zkhandler, node, pf=None): + """ + List all VFs on a given node, optionally limited to PF. + """ + retflag, retdata = pvc_network.get_list_sriov_vf(zkhandler, node, pf) + + if retflag: + retcode = 200 + else: + retcode = 400 + + if retflag: + if retdata: + retcode = 200 + else: + retcode = 404 + retdata = { + 'message': 'VF not found.' + } + else: + retcode = 400 + retdata = { + 'message': retdata + } + + return retdata, retcode + + +@ZKConnection(config) +def update_sriov_vf_config(zkhandler, node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss): + """ + Update configuration of a VF on NODE. + """ + retflag, retdata = pvc_network.set_sriov_vf_config(zkhandler, node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss) + + if retflag: + retcode = 200 + else: + retcode = 400 + + output = { + 'message': retdata.replace('\"', '\'') + } + return output, retcode + + # # Ceph functions # diff --git a/api-daemon/pvcapid/provisioner.py b/api-daemon/pvcapid/provisioner.py index 00dc781d..88a0f69e 100755 --- a/api-daemon/pvcapid/provisioner.py +++ b/api-daemon/pvcapid/provisioner.py @@ -1323,6 +1323,30 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r vm_architecture=system_architecture ) + # Add disk devices + monitor_list = list() + coordinator_names = config['storage_hosts'] + for coordinator in coordinator_names: + monitor_list.append("{}.{}".format(coordinator, config['storage_domain'])) + + ceph_storage_secret = config['ceph_storage_secret_uuid'] + + for volume in vm_data['volumes']: + vm_schema += libvirt_schema.devices_disk_header.format( + ceph_storage_secret=ceph_storage_secret, + disk_pool=volume['pool'], + vm_name=vm_name, + disk_id=volume['disk_id'] + ) + for monitor in monitor_list: + vm_schema += libvirt_schema.devices_disk_coordinator.format( + coordinator_name=monitor, + coordinator_ceph_mon_port=config['ceph_monitor_port'] + ) + vm_schema += libvirt_schema.devices_disk_footer + + vm_schema += libvirt_schema.devices_vhostmd + # Add network devices network_id = 0 for network in vm_data['networks']: @@ -1364,30 +1388,6 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r network_id += 1 - # Add disk devices - monitor_list = list() - coordinator_names = config['storage_hosts'] - for coordinator in coordinator_names: - monitor_list.append("{}.{}".format(coordinator, config['storage_domain'])) - - ceph_storage_secret = config['ceph_storage_secret_uuid'] - - for volume in vm_data['volumes']: - vm_schema += libvirt_schema.devices_disk_header.format( - ceph_storage_secret=ceph_storage_secret, - disk_pool=volume['pool'], - vm_name=vm_name, - disk_id=volume['disk_id'] - ) - for monitor in monitor_list: - vm_schema += libvirt_schema.devices_disk_coordinator.format( - coordinator_name=monitor, - coordinator_ceph_mon_port=config['ceph_monitor_port'] - ) - vm_schema += libvirt_schema.devices_disk_footer - - vm_schema += libvirt_schema.devices_vhostmd - # Add default devices vm_schema += libvirt_schema.devices_default diff --git a/client-cli/cli_lib/network.py b/client-cli/cli_lib/network.py index fa006e92..52fd704b 100644 --- a/client-cli/cli_lib/network.py +++ b/client-cli/cli_lib/network.py @@ -360,7 +360,6 @@ def net_acl_add(config, net, direction, description, rule, order): def net_acl_remove(config, net, description): - """ Remove a network ACL @@ -378,27 +377,135 @@ def net_acl_remove(config, net, description): return retstatus, response.json().get('message', '') +# +# SR-IOV functions +# +def net_sriov_pf_list(config, node): + """ + List all PFs on NODE + + API endpoint: GET /api/v1/sriov/pf/ + API arguments: node={node} + API schema: [{json_data_object},{json_data_object},etc.] + """ + response = call_api(config, 'get', '/sriov/pf/{}'.format(node)) + + if response.status_code == 200: + return True, response.json() + else: + return False, response.json().get('message', '') + + +def net_sriov_vf_set(config, node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss): + """ + Mdoify configuration of a SR-IOV VF + + API endpoint: PUT /api/v1/sriov/vf// + API arguments: vlan_id={vlan_id}, vlan_qos={vlan_qos}, tx_rate_min={tx_rate_min}, tx_rate_max={tx_rate_max}, + link_state={link_state}, spoof_check={spoof_check}, trust={trust}, query_rss={query_rss} + API schema: {"message": "{data}"} + """ + params = dict() + + # Update any params that we've sent + if vlan_id is not None: + params['vlan_id'] = vlan_id + + if vlan_qos is not None: + params['vlan_qos'] = vlan_qos + + if tx_rate_min is not None: + params['tx_rate_min'] = tx_rate_min + + if tx_rate_max is not None: + params['tx_rate_max'] = tx_rate_max + + if link_state is not None: + params['link_state'] = link_state + + if spoof_check is not None: + params['spoof_check'] = spoof_check + + if trust is not None: + params['trust'] = trust + + if query_rss is not None: + params['query_rss'] = query_rss + + # Write the new configuration to the API + response = call_api(config, 'put', '/sriov/vf/{node}/{vf}'.format(node=node, vf=vf), params=params) + + if response.status_code == 200: + retstatus = True + else: + retstatus = False + + return retstatus, response.json().get('message', '') + + +def net_sriov_vf_list(config, node, pf=None): + """ + List all VFs on NODE, optionally limited by PF + + API endpoint: GET /api/v1/sriov/vf/ + API arguments: node={node}, pf={pf} + API schema: [{json_data_object},{json_data_object},etc.] + """ + params = dict() + params['pf'] = pf + + response = call_api(config, 'get', '/sriov/vf/{}'.format(node), params=params) + + if response.status_code == 200: + return True, response.json() + else: + return False, response.json().get('message', '') + + +def net_sriov_vf_info(config, node, vf): + """ + Get info about VF on NODE + + API endpoint: GET /api/v1/sriov/vf// + API arguments: + API schema: [{json_data_object}] + """ + response = call_api(config, 'get', '/sriov/vf/{}/{}'.format(node, vf)) + + if response.status_code == 200: + if isinstance(response.json(), list) and len(response.json()) != 1: + # No exact match; return not found + return False, "VF not found." + else: + # Return a single instance if the response is a list + if isinstance(response.json(), list): + return True, response.json()[0] + # This shouldn't happen, but is here just in case + else: + return True, response.json() + else: + return False, response.json().get('message', '') + + # # Output display functions # -def getOutputColours(network_information): - if network_information['ip6']['network'] != "None": - v6_flag_colour = ansiprint.green() +def getColour(value): + if value in ['True', "start"]: + return ansiprint.green() + elif value in ["restart", "shutdown"]: + return ansiprint.yellow() + elif value in ["stop", "fail"]: + return ansiprint.red() else: - v6_flag_colour = ansiprint.blue() - if network_information['ip4']['network'] != "None": - v4_flag_colour = ansiprint.green() - else: - v4_flag_colour = ansiprint.blue() + return ansiprint.blue() - if network_information['ip6']['dhcp_flag'] == "True": - dhcp6_flag_colour = ansiprint.green() - else: - dhcp6_flag_colour = ansiprint.blue() - if network_information['ip4']['dhcp_flag'] == "True": - dhcp4_flag_colour = ansiprint.green() - else: - dhcp4_flag_colour = ansiprint.blue() + +def getOutputColours(network_information): + v6_flag_colour = getColour(network_information['ip6']['network']) + v4_flag_colour = getColour(network_information['ip4']['network']) + dhcp6_flag_colour = getColour(network_information['ip6']['dhcp_flag']) + dhcp4_flag_colour = getColour(network_information['ip4']['dhcp_flag']) return v6_flag_colour, v4_flag_colour, dhcp6_flag_colour, dhcp4_flag_colour @@ -700,3 +807,245 @@ def format_list_acl(acl_list): ) return '\n'.join(sorted(acl_list_output)) + + +def format_list_sriov_pf(pf_list): + # The maximum column width of the VFs column + max_vfs_length = 70 + + # Handle when we get an empty entry + if not pf_list: + pf_list = list() + + pf_list_output = [] + + # Determine optimal column widths + pf_phy_length = 6 + pf_mtu_length = 4 + pf_vfs_length = 4 + + for pf_information in pf_list: + # phy column + _pf_phy_length = len(str(pf_information['phy'])) + 1 + if _pf_phy_length > pf_phy_length: + pf_phy_length = _pf_phy_length + # mtu column + _pf_mtu_length = len(str(pf_information['mtu'])) + 1 + if _pf_mtu_length > pf_mtu_length: + pf_mtu_length = _pf_mtu_length + # vfs column + _pf_vfs_length = len(str(', '.join(pf_information['vfs']))) + 1 + if _pf_vfs_length > pf_vfs_length: + pf_vfs_length = _pf_vfs_length + + # We handle columnizing very long lists later + if pf_vfs_length > max_vfs_length: + pf_vfs_length = max_vfs_length + + # Format the string (header) + pf_list_output.append('{bold}\ +{pf_phy: <{pf_phy_length}} \ +{pf_mtu: <{pf_mtu_length}} \ +{pf_vfs: <{pf_vfs_length}} \ +{end_bold}'.format( + bold=ansiprint.bold(), + end_bold=ansiprint.end(), + pf_phy_length=pf_phy_length, + pf_mtu_length=pf_mtu_length, + pf_vfs_length=pf_vfs_length, + pf_phy='Device', + pf_mtu='MTU', + pf_vfs='VFs') + ) + + for pf_information in pf_list: + # Figure out how to nicely columnize our list + nice_vfs_list = [list()] + vfs_lines = 0 + cur_vfs_length = 0 + for vfs in pf_information['vfs']: + vfs_len = len(vfs) + cur_vfs_length += vfs_len + 2 # for the comma and space + if cur_vfs_length > max_vfs_length: + cur_vfs_length = 0 + vfs_lines += 1 + nice_vfs_list.append(list()) + nice_vfs_list[vfs_lines].append(vfs) + + # Append the lines + pf_list_output.append('{bold}\ +{pf_phy: <{pf_phy_length}} \ +{pf_mtu: <{pf_mtu_length}} \ +{pf_vfs: <{pf_vfs_length}} \ +{end_bold}'.format( + bold='', + end_bold='', + pf_phy_length=pf_phy_length, + pf_mtu_length=pf_mtu_length, + pf_vfs_length=pf_vfs_length, + pf_phy=pf_information['phy'], + pf_mtu=pf_information['mtu'], + pf_vfs=', '.join(nice_vfs_list[0])) + ) + + if len(nice_vfs_list) > 1: + for idx in range(1, len(nice_vfs_list)): + pf_list_output.append('{bold}\ +{pf_phy: <{pf_phy_length}} \ +{pf_mtu: <{pf_mtu_length}} \ +{pf_vfs: <{pf_vfs_length}} \ +{end_bold}'.format( + bold='', + end_bold='', + pf_phy_length=pf_phy_length, + pf_mtu_length=pf_mtu_length, + pf_vfs_length=pf_vfs_length, + pf_phy='', + pf_mtu='', + pf_vfs=', '.join(nice_vfs_list[idx])) + ) + + return '\n'.join(pf_list_output) + + +def format_list_sriov_vf(vf_list): + # Handle when we get an empty entry + if not vf_list: + vf_list = list() + + vf_list_output = [] + + # Determine optimal column widths + vf_phy_length = 4 + vf_pf_length = 3 + vf_mtu_length = 4 + vf_mac_length = 11 + vf_used_length = 5 + vf_domain_length = 5 + + for vf_information in vf_list: + # phy column + _vf_phy_length = len(str(vf_information['phy'])) + 1 + if _vf_phy_length > vf_phy_length: + vf_phy_length = _vf_phy_length + # pf column + _vf_pf_length = len(str(vf_information['pf'])) + 1 + if _vf_pf_length > vf_pf_length: + vf_pf_length = _vf_pf_length + # mtu column + _vf_mtu_length = len(str(vf_information['mtu'])) + 1 + if _vf_mtu_length > vf_mtu_length: + vf_mtu_length = _vf_mtu_length + # mac column + _vf_mac_length = len(str(vf_information['mac'])) + 1 + if _vf_mac_length > vf_mac_length: + vf_mac_length = _vf_mac_length + # used column + _vf_used_length = len(str(vf_information['usage']['used'])) + 1 + if _vf_used_length > vf_used_length: + vf_used_length = _vf_used_length + # domain column + _vf_domain_length = len(str(vf_information['usage']['domain'])) + 1 + if _vf_domain_length > vf_domain_length: + vf_domain_length = _vf_domain_length + + # Format the string (header) + vf_list_output.append('{bold}\ +{vf_phy: <{vf_phy_length}} \ +{vf_pf: <{vf_pf_length}} \ +{vf_mtu: <{vf_mtu_length}} \ +{vf_mac: <{vf_mac_length}} \ +{vf_used: <{vf_used_length}} \ +{vf_domain: <{vf_domain_length}} \ +{end_bold}'.format( + bold=ansiprint.bold(), + end_bold=ansiprint.end(), + vf_phy_length=vf_phy_length, + vf_pf_length=vf_pf_length, + vf_mtu_length=vf_mtu_length, + vf_mac_length=vf_mac_length, + vf_used_length=vf_used_length, + vf_domain_length=vf_domain_length, + vf_phy='Device', + vf_pf='PF', + vf_mtu='MTU', + vf_mac='MAC Address', + vf_used='Used', + vf_domain='Domain') + ) + + for vf_information in vf_list: + vf_list_output.append('{bold}\ +{vf_phy: <{vf_phy_length}} \ +{vf_pf: <{vf_pf_length}} \ +{vf_mtu: <{vf_mtu_length}} \ +{vf_mac: <{vf_mac_length}} \ +{vf_used: <{vf_used_length}} \ +{vf_domain: <{vf_domain_length}} \ +{end_bold}'.format( + bold=ansiprint.bold(), + end_bold=ansiprint.end(), + vf_phy_length=vf_phy_length, + vf_pf_length=vf_pf_length, + vf_mtu_length=vf_mtu_length, + vf_mac_length=vf_mac_length, + vf_used_length=vf_used_length, + vf_domain_length=vf_domain_length, + vf_phy=vf_information['phy'], + vf_pf=vf_information['pf'], + vf_mtu=vf_information['mtu'], + vf_mac=vf_information['mac'], + vf_used=vf_information['usage']['used'], + vf_domain=vf_information['usage']['domain']) + ) + + return '\n'.join(vf_list_output) + + +def format_info_sriov_vf(config, vf_information, node): + if not vf_information: + return "No VF found" + + # Get information on the using VM if applicable + if vf_information['usage']['used'] == 'True' and vf_information['usage']['domain']: + vm_information = call_api(config, 'get', '/vm/{vm}'.format(vm=vf_information['usage']['domain'])).json() + if isinstance(vm_information, list) and len(vm_information) > 0: + vm_information = vm_information[0] + else: + vm_information = None + + # Format a nice output: do this line-by-line then concat the elements at the end + ainformation = [] + ainformation.append('{}SR-IOV VF information:{}'.format(ansiprint.bold(), ansiprint.end())) + ainformation.append('') + # Basic information + ainformation.append('{}PHY:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['phy'])) + ainformation.append('{}PF:{} {} @ {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pf'], node)) + ainformation.append('{}MTU:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['mtu'])) + ainformation.append('{}MAC Address:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['mac'])) + ainformation.append('') + # Configuration information + ainformation.append('{}vLAN ID:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['vlan_id'])) + ainformation.append('{}vLAN QOS priority:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['vlan_qos'])) + ainformation.append('{}Minimum TX Rate:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['tx_rate_min'])) + ainformation.append('{}Maximum TX Rate:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['tx_rate_max'])) + ainformation.append('{}Link State:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['config']['link_state'])) + ainformation.append('{}Spoof Checking:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['config']['spoof_check']), vf_information['config']['spoof_check'], ansiprint.end())) + ainformation.append('{}VF User Trust:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['config']['trust']), vf_information['config']['trust'], ansiprint.end())) + ainformation.append('{}Query RSS Config:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['config']['query_rss']), vf_information['config']['query_rss'], ansiprint.end())) + ainformation.append('') + # PCIe bus information + ainformation.append('{}PCIe domain:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pci']['domain'])) + ainformation.append('{}PCIe bus:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pci']['bus'])) + ainformation.append('{}PCIe slot:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pci']['slot'])) + ainformation.append('{}PCIe function:{} {}'.format(ansiprint.purple(), ansiprint.end(), vf_information['pci']['function'])) + ainformation.append('') + # Usage information + ainformation.append('{}VF Used:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), getColour(vf_information['usage']['used']), vf_information['usage']['used'], ansiprint.end())) + if vf_information['usage']['used'] == 'True' and vm_information is not None: + ainformation.append('{}Using Domain:{} {} ({}) ({}{}{})'.format(ansiprint.purple(), ansiprint.end(), vf_information['usage']['domain'], vm_information['name'], getColour(vm_information['state']), vm_information['state'], ansiprint.end())) + else: + ainformation.append('{}Using Domain:{} N/A'.format(ansiprint.purple(), ansiprint.end())) + + # Join it all together + return '\n'.join(ainformation) diff --git a/client-cli/cli_lib/vm.py b/client-cli/cli_lib/vm.py index e316995e..d06e4821 100644 --- a/client-cli/cli_lib/vm.py +++ b/client-cli/cli_lib/vm.py @@ -501,7 +501,7 @@ def format_vm_memory(config, name, memory): return '\n'.join(output_list) -def vm_networks_add(config, vm, network, macaddr, model, restart): +def vm_networks_add(config, vm, network, macaddr, model, sriov, sriov_mode, restart): """ Add a new network to the VM @@ -514,17 +514,19 @@ def vm_networks_add(config, vm, network, macaddr, model, restart): from random import randint import cli_lib.network as pvc_network - # Verify that the provided network is valid - retcode, retdata = pvc_network.net_info(config, network) - if not retcode: - # Ignore the three special networks - if network not in ['upstream', 'cluster', 'storage']: - return False, "Network {} is not present in the cluster.".format(network) + # Verify that the provided network is valid (not in SR-IOV mode) + if not sriov: + retcode, retdata = pvc_network.net_info(config, network) + if not retcode: + # Ignore the three special networks + if network not in ['upstream', 'cluster', 'storage']: + return False, "Network {} is not present in the cluster.".format(network) - if network in ['upstream', 'cluster', 'storage']: - br_prefix = 'br' - else: - br_prefix = 'vmbr' + # Set the bridge prefix + if network in ['upstream', 'cluster', 'storage']: + br_prefix = 'br' + else: + br_prefix = 'vmbr' status, domain_information = vm_info(config, vm) if not status: @@ -551,24 +553,74 @@ def vm_networks_add(config, vm, network, macaddr, model, restart): octetC=random_octet_C ) - device_string = ''.format( - macaddr=macaddr, - bridge="{}{}".format(br_prefix, network), - model=model - ) + # Add an SR-IOV network + if sriov: + valid, sriov_vf_information = pvc_network.net_sriov_vf_info(config, domain_information['node'], network) + if not valid: + return False, 'Specified SR-IOV VF "{}" does not exist on VM node "{}".'.format(network, domain_information['node']) + + # Add a hostdev (direct PCIe) SR-IOV network + if sriov_mode == 'hostdev': + bus_address = 'domain="0x{pci_domain}" bus="0x{pci_bus}" slot="0x{pci_slot}" function="0x{pci_function}"'.format( + pci_domain=sriov_vf_information['pci']['domain'], + pci_bus=sriov_vf_information['pci']['bus'], + pci_slot=sriov_vf_information['pci']['slot'], + pci_function=sriov_vf_information['pci']['function'], + ) + device_string = '
{network}'.format( + macaddr=macaddr, + bus_address=bus_address, + network=network + ) + # Add a macvtap SR-IOV network + elif sriov_mode == 'macvtap': + device_string = ''.format( + macaddr=macaddr, + network=network, + model=model + ) + else: + return False, "ERROR: Invalid SR-IOV mode specified." + # Add a normal bridged PVC network + else: + device_string = ''.format( + macaddr=macaddr, + bridge="{}{}".format(br_prefix, network), + model=model + ) + device_xml = fromstring(device_string) - last_interface = None all_interfaces = parsed_xml.devices.find('interface') if all_interfaces is None: all_interfaces = [] for interface in all_interfaces: - last_interface = re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1) - if last_interface == network: - return False, 'Network {} is already configured for VM {}.'.format(network, vm) - if last_interface is not None: - for interface in parsed_xml.devices.find('interface'): - if last_interface == re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1): + if sriov: + if sriov_mode == 'hostdev': + if interface.attrib.get('type') == 'hostdev': + interface_address = 'domain="{pci_domain}" bus="{pci_bus}" slot="{pci_slot}" function="{pci_function}"'.format( + pci_domain=interface.source.address.attrib.get('domain'), + pci_bus=interface.source.address.attrib.get('bus'), + pci_slot=interface.source.address.attrib.get('slot'), + pci_function=interface.source.address.attrib.get('function') + ) + if interface_address == bus_address: + return False, 'Network "{}" is already configured for VM "{}".'.format(network, vm) + elif sriov_mode == 'macvtap': + if interface.attrib.get('type') == 'direct': + interface_dev = interface.source.attrib.get('dev') + if interface_dev == network: + return False, 'Network "{}" is already configured for VM "{}".'.format(network, vm) + else: + if interface.attrib.get('type') == 'bridge': + interface_vni = re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1) + if interface_vni == network: + return False, 'Network "{}" is already configured for VM "{}".'.format(network, vm) + + # Add the interface at the end of the list (or, right above emulator) + if len(all_interfaces) > 0: + for idx, interface in enumerate(parsed_xml.devices.find('interface')): + if idx == len(all_interfaces) - 1: interface.addnext(device_xml) else: parsed_xml.devices.find('emulator').addprevious(device_xml) @@ -581,7 +633,7 @@ def vm_networks_add(config, vm, network, macaddr, model, restart): return vm_modify(config, vm, new_xml, restart) -def vm_networks_remove(config, vm, network, restart): +def vm_networks_remove(config, vm, network, sriov, restart): """ Remove a network to the VM @@ -605,17 +657,33 @@ def vm_networks_remove(config, vm, network, restart): except Exception: return False, 'ERROR: Failed to parse XML data.' + changed = False for interface in parsed_xml.devices.find('interface'): - if_vni = re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1) - if network == if_vni: - interface.getparent().remove(interface) + if sriov: + if interface.attrib.get('type') == 'hostdev': + if_dev = str(interface.sriov_device) + if network == if_dev: + interface.getparent().remove(interface) + changed = True + elif interface.attrib.get('type') == 'direct': + if_dev = str(interface.source.attrib.get('dev')) + if network == if_dev: + interface.getparent().remove(interface) + changed = True + else: + if_vni = re.match(r'[vm]*br([0-9a-z]+)', interface.source.attrib.get('bridge')).group(1) + if network == if_vni: + interface.getparent().remove(interface) + changed = True + if changed: + try: + new_xml = tostring(parsed_xml, pretty_print=True) + except Exception: + return False, 'ERROR: Failed to dump XML data.' - try: - new_xml = tostring(parsed_xml, pretty_print=True) - except Exception: - return False, 'ERROR: Failed to dump XML data.' - - return vm_modify(config, vm, new_xml, restart) + return vm_modify(config, vm, new_xml, restart) + else: + return False, 'ERROR: Network "{}" does not exist on VM.'.format(network) def vm_networks_get(config, vm): @@ -844,7 +912,7 @@ def vm_volumes_remove(config, vm, volume, restart): xml = domain_information.get('xml', None) if xml is None: - return False, "VM does not have a valid XML doccument." + return False, "VM does not have a valid XML document." try: parsed_xml = fromstring(xml) @@ -1175,17 +1243,14 @@ def format_info(config, domain_information, long_output): # Network list net_list = [] + cluster_net_list = call_api(config, 'get', '/network').json() for net in domain_information['networks']: - # Split out just the numerical (VNI) part of the brXXXX name - net_vnis = re.findall(r'\d+', net['source']) - if net_vnis: - net_vni = net_vnis[0] - else: - net_vni = re.sub('br', '', net['source']) - - response = call_api(config, 'get', '/network/{net}'.format(net=net_vni)) - if response.status_code != 200 and net_vni not in ['cluster', 'storage', 'upstream']: - net_list.append(ansiprint.red() + net_vni + ansiprint.end() + ' [invalid]') + net_vni = net['vni'] + if net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^macvtap:.*', net_vni) and not re.match(r'^hostdev:.*', net_vni): + if int(net_vni) not in [net['vni'] for net in cluster_net_list]: + net_list.append(ansiprint.red() + net_vni + ansiprint.end() + ' [invalid]') + else: + net_list.append(net_vni) else: net_list.append(net_vni) @@ -1213,17 +1278,31 @@ def format_info(config, domain_information, long_output): width=name_length )) ainformation.append('') - ainformation.append('{}Interfaces:{} {}ID Type Source Model MAC Data (r/w) Packets (r/w) Errors (r/w){}'.format(ansiprint.purple(), ansiprint.end(), ansiprint.bold(), ansiprint.end())) + ainformation.append('{}Interfaces:{} {}ID Type Source Model MAC Data (r/w) Packets (r/w) Errors (r/w){}'.format(ansiprint.purple(), ansiprint.end(), ansiprint.bold(), ansiprint.end())) for net in domain_information['networks']: - ainformation.append(' {0: <3} {1: <7} {2: <10} {3: <8} {4: <18} {5: <12} {6: <15} {7: <12}'.format( + net_type = net['type'] + net_source = net['source'] + net_mac = net['mac'] + if net_type in ['direct', 'hostdev']: + net_model = 'N/A' + net_bytes = 'N/A' + net_packets = 'N/A' + net_errors = 'N/A' + elif net_type in ['bridge']: + net_model = net['model'] + net_bytes = '/'.join([str(format_bytes(net.get('rd_bytes', 0))), str(format_bytes(net.get('wr_bytes', 0)))]) + net_packets = '/'.join([str(format_metric(net.get('rd_packets', 0))), str(format_metric(net.get('wr_packets', 0)))]) + net_errors = '/'.join([str(format_metric(net.get('rd_errors', 0))), str(format_metric(net.get('wr_errors', 0)))]) + + ainformation.append(' {0: <3} {1: <8} {2: <12} {3: <8} {4: <18} {5: <12} {6: <15} {7: <12}'.format( domain_information['networks'].index(net), - net['type'], - net['source'], - net['model'], - net['mac'], - '/'.join([str(format_bytes(net.get('rd_bytes', 0))), str(format_bytes(net.get('wr_bytes', 0)))]), - '/'.join([str(format_metric(net.get('rd_packets', 0))), str(format_metric(net.get('wr_packets', 0)))]), - '/'.join([str(format_metric(net.get('rd_errors', 0))), str(format_metric(net.get('wr_errors', 0)))]), + net_type, + net_source, + net_model, + net_mac, + net_bytes, + net_packets, + net_errors )) # Controller list ainformation.append('') @@ -1242,13 +1321,7 @@ def format_list(config, vm_list, raw): # Network list net_list = [] for net in domain_information['networks']: - # Split out just the numerical (VNI) part of the brXXXX name - net_vnis = re.findall(r'\d+', net['source']) - if net_vnis: - net_vni = net_vnis[0] - else: - net_vni = re.sub('br', '', net['source']) - net_list.append(net_vni) + net_list.append(net['vni']) return net_list # Handle raw mode since it just lists the names @@ -1268,7 +1341,7 @@ def format_list(config, vm_list, raw): vm_nets_length = 9 vm_ram_length = 8 vm_vcpu_length = 6 - vm_node_length = 8 + vm_node_length = 5 vm_migrated_length = 10 for domain_information in vm_list: net_list = getNiceNetID(domain_information) @@ -1324,8 +1397,6 @@ def format_list(config, vm_list, raw): ) ) - # Keep track of nets we found to be valid to cut down on duplicate API hits - valid_net_list = [] # Format the string (elements) for domain_information in vm_list: if domain_information['state'] == 'start': @@ -1342,18 +1413,13 @@ def format_list(config, vm_list, raw): vm_state_colour = ansiprint.blue() # Handle colouring for an invalid network config - raw_net_list = getNiceNetID(domain_information) - net_list = [] + net_list = getNiceNetID(domain_information) + cluster_net_list = call_api(config, 'get', '/network').json() vm_net_colour = '' - for net_vni in raw_net_list: - if net_vni not in valid_net_list: - response = call_api(config, 'get', '/network/{net}'.format(net=net_vni)) - if response.status_code != 200 and net_vni not in ['cluster', 'storage', 'upstream']: + for net_vni in net_list: + if net_vni not in ['cluster', 'storage', 'upstream'] and not re.match(r'^macvtap:.*', net_vni) and not re.match(r'^hostdev:.*', net_vni): + if int(net_vni) not in [net['vni'] for net in cluster_net_list]: vm_net_colour = ansiprint.red() - else: - valid_net_list.append(net_vni) - - net_list.append(net_vni) vm_list_output.append( '{bold}{vm_name: <{vm_name_length}} {vm_uuid: <{vm_uuid_length}} \ diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 3a04b5ac..cd573666 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -672,7 +672,7 @@ def vm_define(vmconfig, target_node, node_limit, node_selector, node_autostart, @click.option( '-m', '--method', 'migration_method', default='none', show_default=True, type=click.Choice(['none', 'live', 'shutdown']), - help='The preferred migration method of the VM between nodes; saved with VM.' + help='The preferred migration method of the VM between nodes.' ) @click.option( '-p', '--profile', 'provisioner_profile', default=None, show_default=False, @@ -1309,15 +1309,24 @@ def vm_network_get(domain, raw): 'domain' ) @click.argument( - 'vni' + 'net' ) @click.option( '-a', '--macaddr', 'macaddr', default=None, - help='Use this MAC address instead of random generation; must be a valid MAC address in colon-deliniated format.' + help='Use this MAC address instead of random generation; must be a valid MAC address in colon-delimited format.' ) @click.option( - '-m', '--model', 'model', default='virtio', - help='The model for the interface; must be a valid libvirt model.' + '-m', '--model', 'model', default='virtio', show_default=True, + help='The model for the interface; must be a valid libvirt model. Not used for "netdev" SR-IOV NETs.' +) +@click.option( + '-s', '--sriov', 'sriov', is_flag=True, default=False, + help='Identify that NET is an SR-IOV device name and not a VNI. Required for adding SR-IOV NETs.' +) +@click.option( + '-d', '--sriov-mode', 'sriov_mode', default='macvtap', show_default=True, + type=click.Choice(['hostdev', 'macvtap']), + help='For SR-IOV NETs, the SR-IOV network device mode.' ) @click.option( '-r', '--restart', 'restart', is_flag=True, default=False, @@ -1329,9 +1338,18 @@ def vm_network_get(domain, raw): help='Confirm the restart' ) @cluster_req -def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag): +def vm_network_add(domain, net, macaddr, model, sriov, sriov_mode, restart, confirm_flag): """ - Add the network VNI to the virtual machine DOMAIN. Networks are always addded to the end of the current list of networks in the virtual machine. + Add the network NET to the virtual machine DOMAIN. Networks are always addded to the end of the current list of networks in the virtual machine. + + NET may be a PVC network VNI, which is added as a bridged device, or a SR-IOV VF device connected in the given mode. + + NOTE: Adding a SR-IOV network device in the "hostdev" mode has the following caveats: + + 1. The VM will not be able to be live migrated; it must be shut down to migrate between nodes. The VM metadata will be updated to force this. + + 2. If an identical SR-IOV VF device is not present on the target node, post-migration startup will fail. It may be prudent to use a node limit here. + """ if restart and not confirm_flag and not config['unsafe']: try: @@ -1339,7 +1357,7 @@ def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag): except Exception: restart = False - retcode, retmsg = pvc_vm.vm_networks_add(config, domain, vni, macaddr, model, restart) + retcode, retmsg = pvc_vm.vm_networks_add(config, domain, net, macaddr, model, sriov, sriov_mode, restart) if retcode and not restart: retmsg = retmsg + " Changes will be applied on next VM start/restart." cleanup(retcode, retmsg) @@ -1353,7 +1371,11 @@ def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag): 'domain' ) @click.argument( - 'vni' + 'net' +) +@click.option( + '-s', '--sriov', 'sriov', is_flag=True, default=False, + help='Identify that NET is an SR-IOV device name and not a VNI. Required for removing SR-IOV NETs.' ) @click.option( '-r', '--restart', 'restart', is_flag=True, default=False, @@ -1365,9 +1387,11 @@ def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag): help='Confirm the restart' ) @cluster_req -def vm_network_remove(domain, vni, restart, confirm_flag): +def vm_network_remove(domain, net, sriov, restart, confirm_flag): """ - Remove the network VNI to the virtual machine DOMAIN. + Remove the network NET from the virtual machine DOMAIN. + + NET may be a PVC network VNI, which is added as a bridged device, or a SR-IOV VF device connected in the given mode. """ if restart and not confirm_flag and not config['unsafe']: try: @@ -1375,7 +1399,7 @@ def vm_network_remove(domain, vni, restart, confirm_flag): except Exception: restart = False - retcode, retmsg = pvc_vm.vm_networks_remove(config, domain, vni, restart) + retcode, retmsg = pvc_vm.vm_networks_remove(config, domain, net, sriov, restart) if retcode and not restart: retmsg = retmsg + " Changes will be applied on next VM start/restart." cleanup(retcode, retmsg) @@ -1482,7 +1506,7 @@ def vm_volume_add(domain, volume, disk_id, bus, disk_type, restart, confirm_flag 'domain' ) @click.argument( - 'vni' + 'volume' ) @click.option( '-r', '--restart', 'restart', is_flag=True, default=False, @@ -1494,9 +1518,9 @@ def vm_volume_add(domain, volume, disk_id, bus, disk_type, restart, confirm_flag help='Confirm the restart' ) @cluster_req -def vm_volume_remove(domain, vni, restart, confirm_flag): +def vm_volume_remove(domain, volume, restart, confirm_flag): """ - Remove the volume VNI to the virtual machine DOMAIN. + Remove VOLUME from the virtual machine DOMAIN; VOLUME must be a file path or RBD path in 'pool/volume' format. """ if restart and not confirm_flag and not config['unsafe']: try: @@ -1504,7 +1528,7 @@ def vm_volume_remove(domain, vni, restart, confirm_flag): except Exception: restart = False - retcode, retmsg = pvc_vm.vm_volumes_remove(config, domain, vni, restart) + retcode, retmsg = pvc_vm.vm_volumes_remove(config, domain, volume, restart) if retcode and not restart: retmsg = retmsg + " Changes will be applied on next VM start/restart." cleanup(retcode, retmsg) @@ -2101,6 +2125,154 @@ def net_acl_list(net, limit, direction): cleanup(retcode, retdata) +############################################################################### +# pvc network sriov +############################################################################### +@click.group(name='sriov', short_help='Manage SR-IOV network resources.', context_settings=CONTEXT_SETTINGS) +def net_sriov(): + """ + Manage SR-IOV network resources on nodes (PFs and VFs). + """ + pass + + +############################################################################### +# pvc network sriov pf +############################################################################### +@click.group(name='pf', short_help='Manage PF devices.', context_settings=CONTEXT_SETTINGS) +def net_sriov_pf(): + """ + Manage SR-IOV PF devices on nodes. + """ + pass + + +############################################################################### +# pvc network sriov pf list +############################################################################### +@click.command(name='list', short_help='List PF devices.') +@click.argument( + 'node' +) +@cluster_req +def net_sriov_pf_list(node): + """ + List all SR-IOV PFs on NODE. + """ + retcode, retdata = pvc_network.net_sriov_pf_list(config, node) + if retcode: + retdata = pvc_network.format_list_sriov_pf(retdata) + cleanup(retcode, retdata) + + +############################################################################### +# pvc network sriov vf +############################################################################### +@click.group(name='vf', short_help='Manage VF devices.', context_settings=CONTEXT_SETTINGS) +def net_sriov_vf(): + """ + Manage SR-IOV VF devices on nodes. + """ + pass + + +############################################################################### +# pvc network sriov vf set +############################################################################### +@click.command(name='set', short_help='Set VF device properties.') +@click.option( + '--vlan-id', 'vlan_id', default=None, show_default=False, + help='The vLAN ID for vLAN tagging.' +) +@click.option( + '--qos-prio', 'vlan_qos', default=None, show_default=False, + help='The vLAN QOS priority.' +) +@click.option( + '--tx-min', 'tx_rate_min', default=None, show_default=False, + help='The minimum TX rate.' +) +@click.option( + '--tx-max', 'tx_rate_max', default=None, show_default=False, + help='The maximum TX rate.' +) +@click.option( + '--link-state', 'link_state', default=None, show_default=False, + type=click.Choice(['auto', 'enable', 'disable']), + help='The administrative link state.' +) +@click.option( + '--spoof-check/--no-spoof-check', 'spoof_check', is_flag=True, default=None, show_default=False, + help='Enable or disable spoof checking.' +) +@click.option( + '--trust/--no-trust', 'trust', is_flag=True, default=None, show_default=False, + help='Enable or disable VF user trust.' +) +@click.option( + '--query-rss/--no-query-rss', 'query_rss', is_flag=True, default=None, show_default=False, + help='Enable or disable query RSS support.' +) +@click.argument( + 'node' +) +@click.argument( + 'vf' +) +@cluster_req +def net_sriov_vf_set(node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss): + """ + Set a property of SR-IOV VF on NODE. + """ + if vlan_id is None and vlan_qos is None and tx_rate_min is None and tx_rate_max is None and link_state is None and spoof_check is None and trust is None and query_rss is None: + cleanup(False, 'At least one configuration property must be specified to update.') + + retcode, retmsg = pvc_network.net_sriov_vf_set(config, node, vf, vlan_id, vlan_qos, tx_rate_min, tx_rate_max, link_state, spoof_check, trust, query_rss) + cleanup(retcode, retmsg) + + +############################################################################### +# pvc network sriov vf list +############################################################################### +@click.command(name='list', short_help='List VF devices.') +@click.argument( + 'node' +) +@click.argument( + 'pf', default=None, required=False +) +@cluster_req +def net_sriov_vf_list(node, pf): + """ + List all SR-IOV VFs on NODE, optionally limited to device PF. + """ + retcode, retdata = pvc_network.net_sriov_vf_list(config, node, pf) + if retcode: + retdata = pvc_network.format_list_sriov_vf(retdata) + cleanup(retcode, retdata) + + +############################################################################### +# pvc network sriov vf info +############################################################################### +@click.command(name='info', short_help='List VF devices.') +@click.argument( + 'node' +) +@click.argument( + 'vf' +) +@cluster_req +def net_sriov_vf_info(node, vf): + """ + Show details of the SR-IOV VF on NODE. + """ + retcode, retdata = pvc_network.net_sriov_vf_info(config, node, vf) + if retcode: + retdata = pvc_network.format_info_sriov_vf(config, retdata, node) + cleanup(retcode, retdata) + + ############################################################################### # pvc storage ############################################################################### @@ -4475,6 +4647,7 @@ cli_network.add_command(net_info) cli_network.add_command(net_list) cli_network.add_command(net_dhcp) cli_network.add_command(net_acl) +cli_network.add_command(net_sriov) net_dhcp.add_command(net_dhcp_list) net_dhcp.add_command(net_dhcp_add) @@ -4484,6 +4657,15 @@ net_acl.add_command(net_acl_add) net_acl.add_command(net_acl_remove) net_acl.add_command(net_acl_list) +net_sriov.add_command(net_sriov_pf) +net_sriov.add_command(net_sriov_vf) + +net_sriov_pf.add_command(net_sriov_pf_list) + +net_sriov_vf.add_command(net_sriov_vf_list) +net_sriov_vf.add_command(net_sriov_vf_info) +net_sriov_vf.add_command(net_sriov_vf_set) + ceph_benchmark.add_command(ceph_benchmark_run) ceph_benchmark.add_command(ceph_benchmark_info) ceph_benchmark.add_command(ceph_benchmark_list) diff --git a/daemon-common/common.py b/daemon-common/common.py index 335c6afe..493cd139 100644 --- a/daemon-common/common.py +++ b/daemon-common/common.py @@ -26,6 +26,7 @@ import subprocess import signal from json import loads from re import match as re_match +from re import split as re_split from distutils.util import strtobool from threading import Thread from shlex import split as shlex_split @@ -372,23 +373,28 @@ def getDomainNetworks(parsed_xml, stats_data): net_type = device.attrib.get('type') except Exception: net_type = None + try: net_mac = device.mac.attrib.get('address') except Exception: net_mac = None + try: net_bridge = device.source.attrib.get(net_type) except Exception: net_bridge = None + try: net_model = device.model.attrib.get('type') except Exception: net_model = None + try: net_stats_list = [x for x in stats_data.get('net_stats', []) if x.get('bridge') == net_bridge] net_stats = net_stats_list[0] except Exception: net_stats = {} + net_rd_bytes = net_stats.get('rd_bytes', 0) net_rd_packets = net_stats.get('rd_packets', 0) net_rd_errors = net_stats.get('rd_errors', 0) @@ -397,9 +403,19 @@ def getDomainNetworks(parsed_xml, stats_data): net_wr_packets = net_stats.get('wr_packets', 0) net_wr_errors = net_stats.get('wr_errors', 0) net_wr_drops = net_stats.get('wr_drops', 0) + + if net_type == 'direct': + net_vni = 'macvtap:' + device.source.attrib.get('dev') + net_bridge = device.source.attrib.get('dev') + elif net_type == 'hostdev': + net_vni = 'hostdev:' + str(device.sriov_device) + net_bridge = str(device.sriov_device) + else: + net_vni = re_match(r'[vm]*br([0-9a-z]+)', net_bridge).group(1) + net_obj = { 'type': net_type, - 'vni': re_match(r'[vm]*br([0-9a-z]+)', net_bridge).group(1), + 'vni': net_vni, 'mac': net_mac, 'source': net_bridge, 'model': net_model, @@ -681,3 +697,25 @@ def removeIPAddress(ipaddr, cidrnetmask, dev): dev ) ) + + +# +# Sort a set of interface names (e.g. ens1f1v10) +# +def sortInterfaceNames(interface_names): + # We can't handle non-list inputs + if not isinstance(interface_names, list): + return interface_names + + def atoi(text): + return int(text) if text.isdigit() else text + + def natural_keys(text): + """ + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + """ + return [atoi(c) for c in re_split(r'(\d+)', text)] + + return sorted(interface_names, key=natural_keys) diff --git a/daemon-common/migrations/versions/1.json b/daemon-common/migrations/versions/1.json new file mode 100644 index 00000000..f0fc66a5 --- /dev/null +++ b/daemon-common/migrations/versions/1.json @@ -0,0 +1 @@ +{"version": "1", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "migrate.sync_lock": "/migrate_sync_lock"}, "network": {"vni": "", "type": "/nettype", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/network.py b/daemon-common/network.py index 67f7bdea..3e034cf8 100644 --- a/daemon-common/network.py +++ b/daemon-common/network.py @@ -21,6 +21,8 @@ import re +import daemon_lib.common as common + # # Cluster search functions @@ -629,3 +631,226 @@ def get_list_acl(zkhandler, network, limit, direction, is_fuzzy=True): acl_list.append(acl) return True, acl_list + + +# +# SR-IOV functions +# +# These are separate since they don't work like other network types +# +def getSRIOVPFInformation(zkhandler, node, pf): + mtu = zkhandler.read(('node.sriov.pf', node, 'sriov_pf.mtu', pf)) + + retcode, vf_list = get_list_sriov_vf(zkhandler, node, pf) + if retcode: + vfs = common.sortInterfaceNames([vf['phy'] for vf in vf_list if vf['pf'] == pf]) + else: + vfs = [] + + # Construct a data structure to represent the data + pf_information = { + 'phy': pf, + 'mtu': mtu, + 'vfs': vfs, + } + return pf_information + + +def get_info_sriov_pf(zkhandler, node, pf): + pf_information = getSRIOVPFInformation(zkhandler, node, pf) + if not pf_information: + return False, 'ERROR: Could not get information about SR-IOV PF "{}" on node "{}"'.format(pf, node) + + return True, pf_information + + +def get_list_sriov_pf(zkhandler, node): + pf_list = list() + pf_phy_list = zkhandler.children(('node.sriov.pf', node)) + for phy in pf_phy_list: + retcode, pf_information = get_info_sriov_pf(zkhandler, node, phy) + if retcode: + pf_list.append(pf_information) + + return True, pf_list + + +def getSRIOVVFInformation(zkhandler, node, vf): + if not zkhandler.exists(('node.sriov.vf', node, 'sriov_vf', vf)): + return [] + + pf = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pf', vf)) + mtu = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.mtu', vf)) + mac = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.mac', vf)) + vlan_id = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.vlan_id', vf)) + vlan_qos = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.vlan_qos', vf)) + tx_rate_min = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.tx_rate_min', vf)) + tx_rate_max = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.tx_rate_max', vf)) + link_state = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.link_state', vf)) + spoof_check = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.spoof_check', vf)) + trust = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.trust', vf)) + query_rss = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.config.query_rss', vf)) + pci_domain = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pci.domain', vf)) + pci_bus = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pci.bus', vf)) + pci_slot = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pci.slot', vf)) + pci_function = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.pci.function', vf)) + used = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.used', vf)) + used_by_domain = zkhandler.read(('node.sriov.vf', node, 'sriov_vf.used_by', vf)) + + vf_information = { + 'phy': vf, + 'pf': pf, + 'mtu': mtu, + 'mac': mac, + 'config': { + 'vlan_id': vlan_id, + 'vlan_qos': vlan_qos, + 'tx_rate_min': tx_rate_min, + 'tx_rate_max': tx_rate_max, + 'link_state': link_state, + 'spoof_check': spoof_check, + 'trust': trust, + 'query_rss': query_rss, + }, + 'pci': { + 'domain': pci_domain, + 'bus': pci_bus, + 'slot': pci_slot, + 'function': pci_function, + }, + 'usage': { + 'used': used, + 'domain': used_by_domain, + } + } + return vf_information + + +def get_info_sriov_vf(zkhandler, node, vf): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False, 'ERROR: Specified node "{}" is invalid.'.format(node) + + vf_information = getSRIOVVFInformation(zkhandler, node, vf) + if not vf_information: + return False, 'ERROR: Could not find SR-IOV VF "{}" on node "{}"'.format(vf, node) + + return True, vf_information + + +def get_list_sriov_vf(zkhandler, node, pf=None): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False, 'ERROR: Specified node "{}" is invalid.'.format(node) + + vf_list = list() + vf_phy_list = common.sortInterfaceNames(zkhandler.children(('node.sriov.vf', node))) + for phy in vf_phy_list: + retcode, vf_information = get_info_sriov_vf(zkhandler, node, phy) + if retcode: + if pf is not None: + if vf_information['pf'] == pf: + vf_list.append(vf_information) + else: + vf_list.append(vf_information) + + return True, vf_list + + +def set_sriov_vf_config(zkhandler, node, vf, vlan_id=None, vlan_qos=None, tx_rate_min=None, tx_rate_max=None, link_state=None, spoof_check=None, trust=None, query_rss=None): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False, 'ERROR: Specified node "{}" is invalid.'.format(node) + + # Verify VF is valid + vf_information = getSRIOVVFInformation(zkhandler, node, vf) + if not vf_information: + return False, 'ERROR: Could not find SR-IOV VF "{}" on node "{}".'.format(vf, node) + + update_list = list() + + if vlan_id is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.vlan_id', vf), vlan_id)) + + if vlan_qos is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.vlan_qos', vf), vlan_qos)) + + if tx_rate_min is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.tx_rate_min', vf), tx_rate_min)) + + if tx_rate_max is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.tx_rate_max', vf), tx_rate_max)) + + if link_state is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.link_state', vf), link_state)) + + if spoof_check is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.spoof_check', vf), spoof_check)) + + if trust is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.trust', vf), trust)) + + if query_rss is not None: + update_list.append((('node.sriov.vf', node, 'sriov_vf.config.query_rss', vf), query_rss)) + + if len(update_list) < 1: + return False, 'ERROR: No changes to apply.' + + result = zkhandler.write(update_list) + if result: + return True, 'Successfully modified configuration of SR-IOV VF "{}" on node "{}".'.format(vf, node) + else: + return False, 'Failed to modify configuration of SR-IOV VF "{}" on node "{}".'.format(vf, node) + + +def set_sriov_vf_vm(zkhandler, vm_uuid, node, vf, vf_macaddr, vf_type): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False + + # Verify VF is valid + vf_information = getSRIOVVFInformation(zkhandler, node, vf) + if not vf_information: + return False + + update_list = [ + (('node.sriov.vf', node, 'sriov_vf.used', vf), 'True'), + (('node.sriov.vf', node, 'sriov_vf.used_by', vf), vm_uuid), + (('node.sriov.vf', node, 'sriov_vf.mac', vf), vf_macaddr), + ] + + # Hostdev type SR-IOV prevents the guest from live migrating + if vf_type == 'hostdev': + update_list.append( + (('domain.meta.migrate_method', vm_uuid), 'shutdown') + ) + + zkhandler.write(update_list) + + return True + + +def unset_sriov_vf_vm(zkhandler, node, vf): + # Verify node is valid + valid_node = common.verifyNode(zkhandler, node) + if not valid_node: + return False + + # Verify VF is valid + vf_information = getSRIOVVFInformation(zkhandler, node, vf) + if not vf_information: + return False + + update_list = [ + (('node.sriov.vf', node, 'sriov_vf.used', vf), 'False'), + (('node.sriov.vf', node, 'sriov_vf.used_by', vf), ''), + (('node.sriov.vf', node, 'sriov_vf.mac', vf), zkhandler.read(('node.sriov.vf', node, 'sriov_vf.phy_mac', vf))) + ] + + zkhandler.write(update_list) + + return True diff --git a/daemon-common/vm.py b/daemon-common/vm.py index ed572676..7b8dc2f9 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -27,6 +27,7 @@ import lxml.etree import daemon_lib.common as common import daemon_lib.ceph as ceph +from daemon_lib.network import set_sriov_vf_vm, unset_sriov_vf_vm # @@ -191,6 +192,21 @@ def define_vm(zkhandler, config_data, target_node, node_limit, node_selector, no if not valid_node: return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node) + # If a SR-IOV network device is being added, set its used state + dnetworks = common.getDomainNetworks(parsed_xml, {}) + for network in dnetworks: + if network['type'] in ['direct', 'hostdev']: + dom_node = zkhandler.read(('domain.node', dom_uuid)) + + # Check if the network is already in use + is_used = zkhandler.read(('node.sriov.vf', dom_node, 'sriov_vf.used', network['source'])) + if is_used == 'True': + used_by_name = searchClusterByUUID(zkhandler, zkhandler.read(('node.sriov.vf', dom_node, 'sriov_vf.used_by', network['source']))) + return False, 'ERROR: Attempted to use SR-IOV network "{}" which is already used by VM "{}" on node "{}".'.format(network['source'], used_by_name, dom_node) + + # We must update the "used" section + set_sriov_vf_vm(zkhandler, dom_uuid, dom_node, network['source'], network['mac'], network['type']) + # Obtain the RBD disk list using the common functions ddisks = common.getDomainDisks(parsed_xml, {}) rbd_list = [] @@ -211,7 +227,7 @@ def define_vm(zkhandler, config_data, target_node, node_limit, node_selector, no formatted_rbd_list = '' # Add the new domain to Zookeeper - result = zkhandler.write([ + zkhandler.write([ (('domain', dom_uuid), dom_name), (('domain.xml', dom_uuid), config_data), (('domain.state', dom_uuid), initial_state), @@ -230,10 +246,7 @@ def define_vm(zkhandler, config_data, target_node, node_limit, node_selector, no (('domain.migrate.sync_lock', dom_uuid), ''), ]) - if result: - return True, 'Added new VM with Name "{}" and UUID "{}" to database.'.format(dom_name, dom_uuid) - else: - return False, 'ERROR: Failed to add new VM.' + return True, 'Added new VM with Name "{}" and UUID "{}" to database.'.format(dom_name, dom_uuid) def modify_vm_metadata(zkhandler, domain, node_limit, node_selector, node_autostart, provisioner_profile, migration_method): @@ -276,7 +289,39 @@ def modify_vm(zkhandler, domain, restart, new_vm_config): try: parsed_xml = lxml.objectify.fromstring(new_vm_config) except Exception: - return False, 'ERROR: Failed to parse XML data.' + return False, 'ERROR: Failed to parse new XML data.' + + # Get our old network list for comparison purposes + old_vm_config = zkhandler.read(('domain.xml', dom_uuid)) + old_parsed_xml = lxml.objectify.fromstring(old_vm_config) + old_dnetworks = common.getDomainNetworks(old_parsed_xml, {}) + + # If a SR-IOV network device is being added, set its used state + dnetworks = common.getDomainNetworks(parsed_xml, {}) + for network in dnetworks: + # Ignore networks that are already there + if network['source'] in [net['source'] for net in old_dnetworks]: + continue + + if network['type'] in ['direct', 'hostdev']: + dom_node = zkhandler.read(('domain.node', dom_uuid)) + + # Check if the network is already in use + is_used = zkhandler.read(('node.sriov.vf', dom_node, 'sriov_vf.used', network['source'])) + if is_used == 'True': + used_by_name = searchClusterByUUID(zkhandler, zkhandler.read(('node.sriov.vf', dom_node, 'sriov_vf.used_by', network['source']))) + return False, 'ERROR: Attempted to use SR-IOV network "{}" which is already used by VM "{}" on node "{}".'.format(network['source'], used_by_name, dom_node) + + # We must update the "used" section + set_sriov_vf_vm(zkhandler, dom_uuid, dom_node, network['source'], network['mac'], network['type']) + + # If a SR-IOV network device is being removed, unset its used state + for network in old_dnetworks: + if network['type'] in ['direct', 'hostdev']: + if network['mac'] not in [n['mac'] for n in dnetworks]: + dom_node = zkhandler.read(('domain.node', dom_uuid)) + # We must update the "used" section + unset_sriov_vf_vm(zkhandler, dom_node, network['source']) # Obtain the RBD disk list using the common functions ddisks = common.getDomainDisks(parsed_xml, {}) @@ -513,6 +558,38 @@ def disable_vm(zkhandler, domain): return True, 'Marked VM "{}" as disable.'.format(domain) +def update_vm_sriov_nics(zkhandler, dom_uuid, source_node, target_node): + # Update all the SR-IOV device states on both nodes, used during migrations but called by the node-side + vm_config = zkhandler.read(('domain.xml', dom_uuid)) + parsed_xml = lxml.objectify.fromstring(vm_config) + dnetworks = common.getDomainNetworks(parsed_xml, {}) + retcode = True + retmsg = '' + for network in dnetworks: + if network['type'] in ['direct', 'hostdev']: + # Check if the network is already in use + is_used = zkhandler.read(('node.sriov.vf', target_node, 'sriov_vf.used', network['source'])) + if is_used == 'True': + used_by_name = searchClusterByUUID(zkhandler, zkhandler.read(('node.sriov.vf', target_node, 'sriov_vf.used_by', network['source']))) + if retcode: + retcode_this = False + retmsg = 'Attempting to use SR-IOV network "{}" which is already used by VM "{}"'.format(network['source'], used_by_name) + else: + retcode_this = True + + # We must update the "used" section + if retcode_this: + # This conditional ensure that if we failed the is_used check, we don't try to overwrite the information of a VF that belongs to another VM + set_sriov_vf_vm(zkhandler, dom_uuid, target_node, network['source'], network['mac'], network['type']) + # ... but we still want to free the old node in an case + unset_sriov_vf_vm(zkhandler, source_node, network['source']) + + if not retcode_this: + retcode = retcode_this + + return retcode, retmsg + + def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): # Validate that VM exists in cluster dom_uuid = getDomainUUID(zkhandler, domain) @@ -572,6 +649,9 @@ def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) @@ -624,6 +704,7 @@ def migrate_vm(zkhandler, domain, target_node, force_migrate, wait=False, force_ return False, 'ERROR: Could not find a valid migration target for VM "{}".'.format(domain) # Don't overwrite an existing last_node when using force_migrate + real_current_node = current_node # Used for the SR-IOV update if last_node and force_migrate: current_node = last_node @@ -640,6 +721,9 @@ def migrate_vm(zkhandler, domain, target_node, force_migrate, wait=False, force_ # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, real_current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) @@ -665,6 +749,7 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): else: target_state = 'migrate' + current_node = zkhandler.read(('domain.node', dom_uuid)) target_node = zkhandler.read(('domain.last_node', dom_uuid)) if target_node == '': @@ -683,6 +768,9 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index f201429d..ded85876 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -426,7 +426,7 @@ class ZKHandler(object): # class ZKSchema(object): # Current version - _version = 0 + _version = 1 # Root for doing nested keys _schema_root = '' @@ -483,7 +483,40 @@ class ZKSchema(object): 'memory.provisioned': '/memprov', 'ipmi.hostname': '/ipmihostname', 'ipmi.username': '/ipmiusername', - 'ipmi.password': '/ipmipassword' + 'ipmi.password': '/ipmipassword', + 'sriov': '/sriov', + 'sriov.pf': '/sriov/pf', + 'sriov.vf': '/sriov/vf', + }, + # The schema of an individual SR-IOV PF entry (/nodes/{node_name}/sriov/pf/{pf}) + 'sriov_pf': { + 'phy': '', # The root key + 'mtu': '/mtu', + 'vfcount': '/vfcount' + }, + # The schema of an individual SR-IOV VF entry (/nodes/{node_name}/sriov/vf/{vf}) + 'sriov_vf': { + 'phy': '', # The root key + 'pf': '/pf', + 'mtu': '/mtu', + 'mac': '/mac', + 'phy_mac': '/phy_mac', + 'config': '/config', + 'config.vlan_id': '/config/vlan_id', + 'config.vlan_qos': '/config/vlan_qos', + 'config.tx_rate_min': '/config/tx_rate_min', + 'config.tx_rate_max': '/config/tx_rate_max', + 'config.spoof_check': '/config/spoof_check', + 'config.link_state': '/config/link_state', + 'config.trust': '/config/trust', + 'config.query_rss': '/config/query_rss', + 'pci': '/pci', + 'pci.domain': '/pci/domain', + 'pci.bus': '/pci/bus', + 'pci.slot': '/pci/slot', + 'pci.function': '/pci/function', + 'used': '/used', + 'used_by': '/used_by' }, # The schema of an individual domain entry (/domains/{domain_uuid}) 'domain': { @@ -709,6 +742,10 @@ class ZKSchema(object): if not zkhandler.zk_conn.exists(nkipath): result = False + # One might expect child keys under node (specifically, sriov.pf and sriov.vf) to be + # managed here as well, but those are created automatically every time pvcnoded starts + # and thus never need to be validated or applied. + # These two have several children layers that must be parsed through for elem in ['volume']: # First read all the subelements of the key class (pool layer) @@ -782,6 +819,10 @@ class ZKSchema(object): if not zkhandler.zk_conn.exists(nkipath): zkhandler.zk_conn.create(nkipath, ''.encode(zkhandler.encoding)) + # One might expect child keys under node (specifically, sriov.pf and sriov.vf) to be + # managed here as well, but those are created automatically every time pvcnoded starts + # and thus never need to be validated or applied. + # These two have several children layers that must be parsed through for elem in ['volume']: # First read all the subelements of the key class (pool layer) diff --git a/docs/cluster-architecture.md b/docs/cluster-architecture.md index c262dc3e..4fe3f7c4 100644 --- a/docs/cluster-architecture.md +++ b/docs/cluster-architecture.md @@ -12,6 +12,7 @@ + [PVC client networks](#pvc-client-networks) - [Bridged (unmanaged) Client Networks](#bridged--unmanaged--client-networks) - [VXLAN (managed) Client Networks](#vxlan--managed--client-networks) + - [SR-IOV Client Networks](#sriov-client-networks) - [Other Client Networks](#other-client-networks) * [Node Layout: Considering how nodes are laid out](#node-layout--considering-how-nodes-are-laid-out) + [Node Functions: Coordinators versus Hypervisors](#node-functions--coordinators-versus-hypervisors) @@ -184,6 +185,26 @@ With this client network type, PVC is in full control of the network. No vLAN co NOTE: These networks may introduce a bottleneck and tromboning if there is a large amount of external and/or inter-network traffic on the cluster. The administrator should consider this carefully when deciding whether to use managed or bridged networks and properly evaluate the inter-network traffic requirements. +#### SR-IOV Client Networks + +The third type of client network is the SR-IOV network. SR-IOV (Single-Root I/O Virtualization) is a technique and feature enabled on modern high-performance NICs (for instance, those from Intel or nVidia) which allows a single physical Ethernet port (a "PF" in SR-IOV terminology) to be split, at a hardware level, into multiple virtual Ethernet ports ("VF"s), which can then be managed separately. Starting with version 0.9.21, PVC support SR-IOV PF and VF configuration at the node level, and these VFs can be passed into VMs in two ways. + +SR-IOV's main benefit is to offload bridging and network functions from the hypervisor layer, and direct them onto the hardware itself. This can increase network throughput in some situations, as well as provide near-complete isolation of guest networks from the hypervisors (in contrast with bridges which *can* expose client traffic to the hypervisors, and VXLANs which *do* expose client traffic to the hypervisors). For instance, a VF can have a vLAN specified, and the tagging/untagging of packets is then carried out at the hardware layer. + +There are however caveats to working with SR-IOV. At the most basic level, the biggest difference with SR-IOV compared to the other two network types is that SR-IOV must be configured on a per-node basis. That is, each node must have SR-IOV explicitly enabled, it's specific PF devices defined, and a set of VFs created at PVC startup. Generally, with identical PVC nodes, this will not be a problem but is something to consider, especially if the servers are mismatched in any way. It is thus also possible to set some nodes with SR-IOV functionality, and others without, though care must be taken in this situation to set node limits in the VM metadata of any VMs which use SR-IOV VFs to prevent failed migrations. + +PFs are defined in the `pvcnoded.yml` configuration of each node, via the `sriov_device` list. Each PF can have an arbitrary number of VFs (`vfcount`) allocated, though each NIC vendor and model has specific limits. Once configured, specifically with Intel NICs, PFs (and specifically, the `vfcount` attribute in the driver) are immutable and cannot be changed easily without completely flushing the node and rebooting it, so care should be taken to select the desired settings as early in the cluster configuration as possible. + +Once created, VFs are also managed on a per-node basis. That is, each VF, on each host, even if they have the exact same device names, is managed separately. For instance, the PF `ens1f0` creating a VF `ens1f0v0` on "`hv1`", can have a different configuration from the identically-named VF `ens1f0v0` on "`hv2`". The administrator is responsible for ensuring consistency here, and for ensuring that devices do not overlap (e.g. assigning the same VF name to VMs on two separate nodes which might migrate to each other). PVC will however explicitly prevent two VMs from being assigned to the same VF on the same node, even if this may be technically possible in some cases. + +When attaching VFs to VMs, there are two supported modes: `macvtap`, and `hostdev`. + +`macvtap`, as the name suggests, uses the Linux `macvtap` driver to connect the VF to the VM. Once attached, the vNIC behaves just like a "bridged" network connection above, and like "bridged" connections, the "mode" of the NIC can be specificed, defaulting to "virtio" but supporting various emulated devices instead. Note that in this mode, vLANs cannot be configured on the guest side; they must be specified in the VF configuration (`pvc network sriov vf set`) with one vLAN per VF. VMs with `macvtap` interfaces can be live migrated between nodes without issue, assuming there is a corresponding free VF on the destination node, and the SR-IOV functionality is transparent to the VM. + +`hostdev` is a direct PCIe passthrough method. With a VF attached to a VM in `hostdev` mode, the virtual PCIe NIC device itself becomes hidden from the node, and is visible only to the guest, where it appears as a discrete PCIe device. In this mode, vLANs and other attributes can be set on the guest side at will, though setting vLANs and other properties in the VF configuration is still supported. The main caveat to this mode is that VMs with connected `hostdev` SR-IOV VFs *cannot be live migrated between nodes*. Only a `shutdown` migration is supported, and, like `macvtap`, an identical PCIe device at the same bus address must be present on the target node. To prevent unexpected failures, PVC will explicitly set the VM metadata for the "migration method" to "shutdown" the first time that a `hostdev` VF is attached to it; if this changes later, the administrator must change this back explicitly. + +Generally speaking, SR-IOV connections are not recommended unless there is a good usecase for them. On modern hardware, software bridges are extremely performant, and are much simpler to manage. The functionality is provided for those rare usecases where SR-IOV is asbolutely required by the administrator, but care must be taken to understand all the requirements and caveats of SR-IOV before using it in production. + #### Other Client Networks Future PVC versions may support other client network types, such as direct-routing between VMs. diff --git a/docs/manuals/ansible.md b/docs/manuals/ansible.md index b89b0f94..e3b163d0 100644 --- a/docs/manuals/ansible.md +++ b/docs/manuals/ansible.md @@ -451,6 +451,12 @@ pvc_nodes: pvc_bridge_device: bondU +pvc_sriov_enable: True +pvc_sriov_device: + - phy: ens1f0 + mtu: 9000 + vfcount: 6 + pvc_upstream_device: "{{ networks['upstream']['device'] }}" pvc_upstream_mtu: "{{ networks['upstream']['mtu'] }}" pvc_upstream_domain: "{{ networks['upstream']['domain'] }}" @@ -901,6 +907,18 @@ The IPMI password for the node management controller. Unless a per-host override The device name of the underlying network interface to be used for "bridged"-type client networks. For each "bridged"-type network, an IEEE 802.3q vLAN and bridge will be created on top of this device to pass these networks. In most cases, using the reflexive `networks['cluster']['raw_device']` or `networks['upstream']['raw_device']` from the Base role is sufficient. +#### `pvc_sriov_enable` + +* *optional* + +Whether to enable or disable SR-IOV functionality. + +#### `pvc_sriov_device` + +* *optional* + +A list of SR-IOV devices. See the Daemon manual for details. + #### `pvc__*` The next set of entries is hard-coded to use the values from the global `networks` list. It should not need to be changed under most circumstances. Refer to the previous sections for specific notes about each entry. diff --git a/docs/manuals/daemon.md b/docs/manuals/daemon.md index e651727c..de46dbe6 100644 --- a/docs/manuals/daemon.md +++ b/docs/manuals/daemon.md @@ -146,6 +146,11 @@ pvc: console_log_lines: 1000 networking: bridge_device: ens4 + sriov_enable: True + sriov_device: + - phy: ens1f0 + mtu: 9000 + vfcount: 7 upstream: device: ens4 mtu: 1500 @@ -422,6 +427,34 @@ How many lines of VM console logs to keep in the Zookeeper database for each VM. The network interface device used to create Bridged client network vLANs on. For most clusters, should match the underlying device of the various static networks (e.g. `ens4` or `bond0`), though may also use a separate network interface. +#### `system` → `configuration` → `networking` → `sriov_enable` + +* *optional*, defaults to `False` +* *requires* `functions` → `enable_networking` + +Enables (or disables) SR-IOV functionality in PVC. If enabled, at least one `sriov_device` entry should be specified. + +#### `system` → `configuration` → `networking` → `sriov_device` + +* *optional* +* *requires* `functions` → `enable_networking` + +Contains a list of SR-IOV PF (physical function) devices and their basic configuration. Each element contains the following entries: + +##### `phy`: + +* *required* + +The raw Linux network device with SR-IOV PF functionality. + +##### `mtu` + +The MTU of the PF device, set on daemon startup. + +##### `vfcount` + +The number of VF devices to create on this PF. VF devices are then managed via PVC on a per-node basis. + #### `system` → `configuration` → `networking` * *optional* diff --git a/docs/manuals/swagger.json b/docs/manuals/swagger.json index c52cfc92..41cdc1a9 100644 --- a/docs/manuals/swagger.json +++ b/docs/manuals/swagger.json @@ -764,6 +764,99 @@ }, "type": "object" }, + "sriov_pf": { + "properties": { + "mtu": { + "description": "The MTU of the SR-IOV PF device", + "type": "string" + }, + "phy": { + "description": "The name of the SR-IOV PF device", + "type": "string" + }, + "vfs": { + "items": { + "description": "The PHY name of a VF of this PF", + "type": "string" + }, + "type": "list" + } + }, + "type": "object" + }, + "sriov_vf": { + "properties": { + "config": { + "id": "sriov_vf_config", + "properties": { + "link_state": { + "description": "The current SR-IOV VF link state (either enabled, disabled, or auto)", + "type": "string" + }, + "query_rss": { + "description": "Whether VF RSS querying is enabled or disabled", + "type": "boolean" + }, + "spoof_check": { + "description": "Whether device spoof checking is enabled or disabled", + "type": "boolean" + }, + "trust": { + "description": "Whether guest device trust is enabled or disabled", + "type": "boolean" + }, + "tx_rate_max": { + "description": "The maximum TX rate of the SR-IOV VF device", + "type": "string" + }, + "tx_rate_min": { + "description": "The minimum TX rate of the SR-IOV VF device", + "type": "string" + }, + "vlan_id": { + "description": "The tagged vLAN ID of the SR-IOV VF device", + "type": "string" + }, + "vlan_qos": { + "description": "The QOS group of the tagged vLAN", + "type": "string" + } + }, + "type": "object" + }, + "mac": { + "description": "The current MAC address of the VF device", + "type": "string" + }, + "mtu": { + "description": "The current MTU of the VF device", + "type": "integer" + }, + "pf": { + "description": "The name of the SR-IOV PF parent of this VF device", + "type": "string" + }, + "phy": { + "description": "The name of the SR-IOV VF device", + "type": "string" + }, + "usage": { + "id": "sriov_vf_usage", + "properties": { + "domain": { + "description": "The UUID of the domain the SR-IOV VF is currently used by", + "type": "boolean" + }, + "used": { + "description": "Whether the SR-IOV VF is currently used by a VM or not", + "type": "boolean" + } + }, + "type": "object" + } + }, + "type": "object" + }, "storage-template": { "properties": { "disks": { @@ -1459,8 +1552,15 @@ }, "/api/v1/initialize": { "post": { - "description": "Note: Normally used only once during cluster bootstrap; checks for the existence of the \"/primary_node\" key before proceeding and returns 400 if found", + "description": "
If the 'overwrite' option is not True, the cluster will return 400 if the `/config/primary_node` key is found. If 'overwrite' is True, the existing cluster
data will be erased and new, empty data written in its place.

All node daemons should be stopped before running this command, and the API daemon started manually to avoid undefined behavior.", "parameters": [ + { + "description": "A flag to enable or disable (default) overwriting existing data", + "in": "query", + "name": "overwrite", + "required": false, + "type": "bool" + }, { "description": "A confirmation string to ensure that the API consumer really means it", "in": "query", @@ -4453,6 +4553,181 @@ ] } }, + "/api/v1/sriov/pf": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_pf" + } + } + }, + "summary": "Return a list of SR-IOV PFs on a given node", + "tags": [ + "network / sriov" + ] + } + }, + "/api/v1/sriov/pf/{node}": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_pf" + } + } + }, + "summary": "Return a list of SR-IOV PFs on node {node}", + "tags": [ + "network / sriov" + ] + } + }, + "/api/v1/sriov/vf": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_vf" + } + } + }, + "summary": "Return a list of SR-IOV VFs on a given node, optionally limited to those in the specified PF", + "tags": [ + "network / sriov" + ] + } + }, + "/api/v1/sriov/vf/{node}": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_vf" + } + } + }, + "summary": "Return a list of SR-IOV VFs on node {node}, optionally limited to those in the specified PF", + "tags": [ + "network / sriov" + ] + } + }, + "/api/v1/sriov/vf/{node}/{vf}": { + "get": { + "description": "", + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/sriov_vf" + } + }, + "404": { + "description": "Not found", + "schema": { + "$ref": "#/definitions/Message" + } + } + }, + "summary": "Return information about {vf} on {node}", + "tags": [ + "network / sriov" + ] + }, + "put": { + "description": "", + "parameters": [ + { + "description": "The vLAN ID for vLAN tagging (0 is disabled)", + "in": "query", + "name": "vlan_id", + "required": false, + "type": "integer" + }, + { + "description": "The vLAN QOS priority (0 is disabled)", + "in": "query", + "name": "vlan_qos", + "required": false, + "type": "integer" + }, + { + "description": "The minimum TX rate (0 is disabled)", + "in": "query", + "name": "tx_rate_min", + "required": false, + "type": "integer" + }, + { + "description": "The maximum TX rate (0 is disabled)", + "in": "query", + "name": "tx_rate_max", + "required": false, + "type": "integer" + }, + { + "description": "The administrative link state", + "enum": [ + "auto", + "enable", + "disable" + ], + "in": "query", + "name": "link_state", + "required": false, + "type": "string" + }, + { + "description": "Enable or disable spoof checking", + "in": "query", + "name": "spoof_check", + "required": false, + "type": "boolean" + }, + { + "description": "Enable or disable VF user trust", + "in": "query", + "name": "trust", + "required": false, + "type": "boolean" + }, + { + "description": "Enable or disable query RSS support", + "in": "query", + "name": "query_rss", + "required": false, + "type": "boolean" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/Message" + } + }, + "400": { + "description": "Bad request", + "schema": { + "$ref": "#/definitions/Message" + } + } + }, + "summary": "Set the configuration of {vf} on {node}", + "tags": [ + "network / sriov" + ] + } + }, "/api/v1/status": { "get": { "description": "", @@ -5721,7 +5996,8 @@ "mem", "vcpus", "load", - "vms" + "vms", + "none (cluster default)" ], "in": "query", "name": "selector", diff --git a/node-daemon/pvcnoded.sample.yaml b/node-daemon/pvcnoded.sample.yaml index 89c17603..b728c84b 100644 --- a/node-daemon/pvcnoded.sample.yaml +++ b/node-daemon/pvcnoded.sample.yaml @@ -157,6 +157,21 @@ pvc: networking: # bridge_device: Underlying device to use for bridged vLAN networks; usually the device underlying bridge_device: ens4 + # sriov_enable: Enable or disable (default if absent) SR-IOV network support + sriov_enable: False + # sriov_device: Underlying device(s) to use for SR-IOV networks; can be bridge_device or other NIC(s) + sriov_device: + # The physical device name + - phy: ens1f1 + # The preferred MTU of the physical device; OPTIONAL - defaults to the interface default if unset + mtu: 9000 + # The number of VFs to enable on this device + # NOTE: This defines the maximum number of VMs which can be provisioned on this physical device; VMs + # are allocated to these VFs manually by the administrator and thus all nodes should have the + # same number + # NOTE: This value cannot be changed at runtime on Intel(R) NICs; the node will need to be restarted + # if this value changes + vfcount: 8 # upstream: Upstream physical interface device upstream: # device: Upstream interface device name diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 4d339ebe..8f52f754 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -49,6 +49,7 @@ import daemon_lib.common as common import pvcnoded.VMInstance as VMInstance import pvcnoded.NodeInstance as NodeInstance import pvcnoded.VXNetworkInstance as VXNetworkInstance +import pvcnoded.SRIOVVFInstance as SRIOVVFInstance import pvcnoded.DNSAggregatorInstance as DNSAggregatorInstance import pvcnoded.CephInstance as CephInstance import pvcnoded.MetadataAPIInstance as MetadataAPIInstance @@ -223,6 +224,12 @@ def readConfig(pvcnoded_config_file, myhostname): 'upstream_mtu': o_config['pvc']['system']['configuration']['networking']['upstream']['mtu'], 'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['upstream']['address'], } + + # Check if SR-IOV is enabled and activate + config_networking['enable_sriov'] = o_config['pvc']['system']['configuration']['networking'].get('sriov_enable', False) + if config_networking['enable_sriov']: + config_networking['sriov_device'] = list(o_config['pvc']['system']['configuration']['networking']['sriov_device']) + except Exception as e: print('ERROR: Failed to load configuration: {}'.format(e)) exit(1) @@ -289,6 +296,7 @@ if debug: # Handle the enable values enable_hypervisor = config['enable_hypervisor'] enable_networking = config['enable_networking'] +enable_sriov = config['enable_sriov'] enable_storage = config['enable_storage'] ############################################################################### @@ -380,7 +388,40 @@ else: fmt_purple = '' ############################################################################### -# PHASE 2a - Create local IP addresses for static networks +# PHASE 2a - Activate SR-IOV support +############################################################################### + +# This happens before other networking steps to enable using VFs for cluster functions. +if enable_networking and enable_sriov: + logger.out('Setting up SR-IOV device support', state='i') + # Enable unsafe interruptts for the vfio_iommu_type1 kernel module + try: + common.run_os_command('modprobe vfio_iommu_type1 allow_unsafe_interrupts=1') + with open('/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts', 'w') as mfh: + mfh.write('Y') + except Exception: + logger.out('Failed to enable kernel modules; SR-IOV may fail.', state='w') + + # Loop through our SR-IOV NICs and enable the numvfs for each + for device in config['sriov_device']: + logger.out('Preparing SR-IOV PF {} with {} VFs'.format(device['phy'], device['vfcount']), state='i') + try: + with open('/sys/class/net/{}/device/sriov_numvfs'.format(device['phy']), 'r') as vfh: + current_sriov_count = vfh.read().strip() + with open('/sys/class/net/{}/device/sriov_numvfs'.format(device['phy']), 'w') as vfh: + vfh.write(str(device['vfcount'])) + except FileNotFoundError: + logger.out('Failed to open SR-IOV configuration for PF {}; device may not support SR-IOV.'.format(device), state='w') + except OSError: + logger.out('Failed to set SR-IOV VF count for PF {} to {}; already set to {}.'.format(device['phy'], device['vfcount'], current_sriov_count), state='w') + + if device.get('mtu', None) is not None: + logger.out('Setting SR-IOV PF {} to MTU {}'.format(device['phy'], device['mtu']), state='i') + common.run_os_command('ip link set {} mtu {} up'.format(device['phy'], device['mtu'])) + + +############################################################################### +# PHASE 2b - Create local IP addresses for static networks ############################################################################### if enable_networking: @@ -444,7 +485,7 @@ if enable_networking: common.run_os_command('ip route add default via {} dev {}'.format(upstream_gateway, 'brupstream')) ############################################################################### -# PHASE 2b - Prepare sysctl for pvcnoded +# PHASE 2c - Prepare sysctl for pvcnoded ############################################################################### if enable_networking: @@ -877,12 +918,15 @@ logger.out('Setting up objects', state='i') d_node = dict() d_network = dict() +d_sriov_vf = dict() d_domain = dict() d_osd = dict() d_pool = dict() d_volume = dict() # Dict of Dicts node_list = [] network_list = [] +sriov_pf_list = [] +sriov_vf_list = [] domain_list = [] osd_list = [] pool_list = [] @@ -1037,6 +1081,124 @@ if enable_networking: for node in d_node: d_node[node].update_network_list(d_network) + # Add the SR-IOV PFs and VFs to Zookeeper + # These do not behave like the objects; they are not dynamic (the API cannot change them), and they + # exist for the lifetime of this Node instance. The objects are set here in Zookeeper on a per-node + # basis, under the Node configuration tree. + # MIGRATION: The schema.schema.get ensures that the current active Schema contains the required keys + if enable_sriov and zkhandler.schema.schema.get('sriov_pf', None) is not None: + vf_list = list() + for device in config['sriov_device']: + pf = device['phy'] + vfcount = device['vfcount'] + if device.get('mtu', None) is None: + mtu = 1500 + else: + mtu = device['mtu'] + + # Create the PF device in Zookeeper + zkhandler.write([ + (('node.sriov.pf', myhostname, 'sriov_pf', pf), ''), + (('node.sriov.pf', myhostname, 'sriov_pf.mtu', pf), mtu), + (('node.sriov.pf', myhostname, 'sriov_pf.vfcount', pf), vfcount), + ]) + # Append the device to the list of PFs + sriov_pf_list.append(pf) + + # Get the list of VFs from `ip link show` + vf_list = json.loads(common.run_os_command('ip --json link show {}'.format(pf))[1])[0].get('vfinfo_list', []) + for vf in vf_list: + # { + # 'vf': 3, + # 'link_type': 'ether', + # 'address': '00:00:00:00:00:00', + # 'broadcast': 'ff:ff:ff:ff:ff:ff', + # 'vlan_list': [{'vlan': 101, 'qos': 2}], + # 'rate': {'max_tx': 0, 'min_tx': 0}, + # 'spoofchk': True, + # 'link_state': 'auto', + # 'trust': False, + # 'query_rss_en': False + # } + vfphy = '{}v{}'.format(pf, vf['vf']) + + # Get the PCIe bus information + dev_pcie_path = None + try: + with open('/sys/class/net/{}/device/uevent'.format(vfphy)) as vfh: + dev_uevent = vfh.readlines() + for line in dev_uevent: + if re.match(r'^PCI_SLOT_NAME=.*', line): + dev_pcie_path = line.rstrip().split('=')[-1] + except FileNotFoundError: + # Something must already be using the PCIe device + pass + + # Add the VF to Zookeeper if it does not yet exist + if not zkhandler.exists(('node.sriov.vf', myhostname, 'sriov_vf', vfphy)): + if dev_pcie_path is not None: + pcie_domain, pcie_bus, pcie_slot, pcie_function = re.split(r':|\.', dev_pcie_path) + else: + # We can't add the device - for some reason we can't get any information on its PCIe bus path, + # so just ignore this one, and continue. + # This shouldn't happen under any real circumstances, unless the admin tries to attach a non-existent + # VF to a VM manually, then goes ahead and adds that VF to the system with the VM running. + continue + + zkhandler.write([ + (('node.sriov.vf', myhostname, 'sriov_vf', vfphy), ''), + (('node.sriov.vf', myhostname, 'sriov_vf.pf', vfphy), pf), + (('node.sriov.vf', myhostname, 'sriov_vf.mtu', vfphy), mtu), + (('node.sriov.vf', myhostname, 'sriov_vf.mac', vfphy), vf['address']), + (('node.sriov.vf', myhostname, 'sriov_vf.phy_mac', vfphy), vf['address']), + (('node.sriov.vf', myhostname, 'sriov_vf.config', vfphy), ''), + (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '0')), + (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '0')), + (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_min', vfphy), vf['rate']['min_tx']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_max', vfphy), vf['rate']['max_tx']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.spoof_check', vfphy), vf['spoofchk']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.link_state', vfphy), vf['link_state']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.trust', vfphy), vf['trust']), + (('node.sriov.vf', myhostname, 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']), + (('node.sriov.vf', myhostname, 'sriov_vf.pci', vfphy), ''), + (('node.sriov.vf', myhostname, 'sriov_vf.pci.domain', vfphy), pcie_domain), + (('node.sriov.vf', myhostname, 'sriov_vf.pci.bus', vfphy), pcie_bus), + (('node.sriov.vf', myhostname, 'sriov_vf.pci.slot', vfphy), pcie_slot), + (('node.sriov.vf', myhostname, 'sriov_vf.pci.function', vfphy), pcie_function), + (('node.sriov.vf', myhostname, 'sriov_vf.used', vfphy), False), + (('node.sriov.vf', myhostname, 'sriov_vf.used_by', vfphy), ''), + ]) + + # Append the device to the list of VFs + sriov_vf_list.append(vfphy) + + # Remove any obsolete PFs from Zookeeper if they go away + for pf in zkhandler.children(('node.sriov.pf', myhostname)): + if pf not in sriov_pf_list: + zkhandler.delete([ + ('node.sriov.pf', myhostname, 'sriov_pf', pf) + ]) + # Remove any obsolete VFs from Zookeeper if their PF goes away + for vf in zkhandler.children(('node.sriov.vf', myhostname)): + vf_pf = zkhandler.read(('node.sriov.vf', myhostname, 'sriov_vf.pf', vf)) + if vf_pf not in sriov_pf_list: + zkhandler.delete([ + ('node.sriov.vf', myhostname, 'sriov_vf', vf) + ]) + + # SR-IOV VF objects + # This is a ChildrenWatch just for consistency; the list never changes at runtime + @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('node.sriov.vf', myhostname)) + def update_sriov_vfs(new_sriov_vf_list): + global sriov_vf_list, d_sriov_vf + + # Add VFs to the list + for vf in common.sortInterfaceNames(new_sriov_vf_list): + d_sriov_vf[vf] = SRIOVVFInstance.SRIOVVFInstance(vf, zkhandler, config, logger, this_node) + + sriov_vf_list = sorted(new_sriov_vf_list) + logger.out('{}SR-IOV VF list:{} {}'.format(fmt_blue, fmt_end, ' '.join(sriov_vf_list)), state='i') + if enable_hypervisor: # VM command pipeline key @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.cmd.domain')) @@ -1526,6 +1688,9 @@ def collect_vm_stats(queue): logger.out("Getting network statistics for VM {}".format(domain_name), state='d', prefix='vm-thread') domain_network_stats = [] for interface in tree.findall('devices/interface'): + interface_type = interface.get('type') + if interface_type not in ['bridge']: + continue interface_name = interface.find('target').get('dev') interface_bridge = interface.find('source').get('bridge') interface_stats = domain.interfaceStats(interface_name) diff --git a/node-daemon/pvcnoded/NodeInstance.py b/node-daemon/pvcnoded/NodeInstance.py index 9cc0c897..d92259fe 100644 --- a/node-daemon/pvcnoded/NodeInstance.py +++ b/node-daemon/pvcnoded/NodeInstance.py @@ -466,7 +466,6 @@ class NodeInstance(object): """ patronictl -c /etc/patroni/config.yml - -d zookeeper://localhost:2181 switchover --candidate {} --force diff --git a/node-daemon/pvcnoded/SRIOVVFInstance.py b/node-daemon/pvcnoded/SRIOVVFInstance.py new file mode 100644 index 00000000..ddc1abca --- /dev/null +++ b/node-daemon/pvcnoded/SRIOVVFInstance.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 + +# SRIOVVFInstance.py - Class implementing a PVC SR-IOV VF and run by pvcnoded +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018-2021 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import daemon_lib.common as common + + +def boolToOnOff(state): + if state and str(state) == 'True': + return 'on' + else: + return 'off' + + +class SRIOVVFInstance(object): + # Initialization function + def __init__(self, vf, zkhandler, config, logger, this_node): + self.vf = vf + self.zkhandler = zkhandler + self.config = config + self.logger = logger + self.this_node = this_node + self.myhostname = self.this_node.name + + self.pf = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.pf', self.vf)) + self.mtu = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.mtu', self.vf)) + self.vfid = self.vf.replace('{}v'.format(self.pf), '') + + self.logger.out('Setting MTU to {}'.format(self.mtu), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} mtu {}'.format(self.vf, self.mtu)) + + # These properties are set via the DataWatch functions, to ensure they are configured on the system + self.mac = None + self.vlan_id = None + self.vlan_qos = None + self.tx_rate_min = None + self.tx_rate_max = None + self.spoof_check = None + self.link_state = None + self.trust = None + self.query_rss = None + + # Zookeeper handlers for changed configs + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.mac', self.vf)) + def watch_vf_mac(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '00:00:00:00:00:00' + + if data != self.mac: + self.mac = data + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_id', self.vf)) + def watch_vf_vlan_id(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.vlan_id: + self.vlan_id = data + self.logger.out('Setting vLAN ID to {}'.format(self.vlan_id), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_qos', self.vf)) + def watch_vf_vlan_qos(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.vlan_qos: + self.vlan_qos = data + self.logger.out('Setting vLAN QOS to {}'.format(self.vlan_qos), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_min', self.vf)) + def watch_vf_tx_rate_min(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.tx_rate_min: + self.tx_rate_min = data + self.logger.out('Setting minimum TX rate to {}'.format(self.tx_rate_min), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} min_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_min)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_max', self.vf)) + def watch_vf_tx_rate_max(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; termaxate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.tx_rate_max: + self.tx_rate_max = data + self.logger.out('Setting maximum TX rate to {}'.format(self.tx_rate_max), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} max_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_max)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.spoof_check', self.vf)) + def watch_vf_spoof_check(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = '0' + + if data != self.spoof_check: + self.spoof_check = data + self.logger.out('Setting spoof checking {}'.format(boolToOnOff(self.spoof_check)), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} spoofchk {}'.format(self.pf, self.vfid, boolToOnOff(self.spoof_check))) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.link_state', self.vf)) + def watch_vf_link_state(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = 'on' + + if data != self.link_state: + self.link_state = data + self.logger.out('Setting link state to {}'.format(boolToOnOff(self.link_state)), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} state {}'.format(self.pf, self.vfid, self.link_state)) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.trust', self.vf)) + def watch_vf_trust(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = 'off' + + if data != self.trust: + self.trust = data + self.logger.out('Setting trust mode {}'.format(boolToOnOff(self.trust)), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} trust {}'.format(self.pf, self.vfid, boolToOnOff(self.trust))) + + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.query_rss', self.vf)) + def watch_vf_query_rss(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + try: + data = data.decode('ascii') + except AttributeError: + data = 'off' + + if data != self.query_rss: + self.query_rss = data + self.logger.out('Setting RSS query ability {}'.format(boolToOnOff(self.query_rss)), state='i', prefix='SR-IOV VF {}'.format(self.vf)) + common.run_os_command('ip link set {} vf {} query_rss {}'.format(self.pf, self.vfid, boolToOnOff(self.query_rss))) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index 2d93442f..e9758e5c 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -380,7 +380,7 @@ class VMInstance(object): # Abort shutdown if the state changes to start current_state = self.zkhandler.read(('domain.state', self.domuuid)) - if current_state not in ['shutdown', 'restart']: + if current_state not in ['shutdown', 'restart', 'migrate']: self.logger.out('Aborting VM shutdown due to state change', state='i', prefix='Domain {}'.format(self.domuuid)) is_aborted = True break @@ -528,11 +528,7 @@ class VMInstance(object): def migrate_shutdown(): self.logger.out('Shutting down VM for offline migration', state='i', prefix='Domain {}'.format(self.domuuid)) - self.zkhandler.write([ - (('domain.state', self.domuuid), 'shutdown') - ]) - while self.zkhandler.read(('domain.state', self.domuuid)) != 'stop': - time.sleep(0.5) + self.shutdown_vm() return True do_migrate_shutdown = False @@ -726,6 +722,7 @@ class VMInstance(object): self.state = self.zkhandler.read(('domain.state', self.domuuid)) self.node = self.zkhandler.read(('domain.node', self.domuuid)) self.lastnode = self.zkhandler.read(('domain.last_node', self.domuuid)) + self.migration_method = self.zkhandler.read(('domain.meta.migrate_method', self.domuuid)) # Check the current state of the VM try: diff --git a/test-cluster.sh b/test-cluster.sh index 794c897b..2b01e60f 100755 --- a/test-cluster.sh +++ b/test-cluster.sh @@ -22,10 +22,11 @@ _pvc maintenance off backup_tmp=$(mktemp) _pvc task backup --file ${backup_tmp} _pvc task restore --yes --file ${backup_tmp} -rm ${backup_tmp} +rm ${backup_tmp} || true # Provisioner tests _pvc provisioner profile list test +_pvc vm network get testX _pvc provisioner create --wait testX test sleep 30 @@ -50,9 +51,16 @@ sleep 5 _pvc vm move --wait --target hv1 testX sleep 5 _pvc vm meta testX --limit hv1 --selector vms --method live --profile test --no-autostart +_pvc vm vcpu set testX 4 +_pvc vm vcpu get testX +_pvc vm memory set testX 4096 +_pvc vm memory get testX +_pvc vm vcpu set testX 2 +_pvc vm memory set testX 2048 --restart --yes +sleep 5 _pvc vm list testX _pvc vm info --long testX -rm ${vm_tmp} +rm ${vm_tmp} || true # Node tests _pvc node primary --wait hv1 @@ -84,6 +92,14 @@ _pvc network dhcp remove --yes 10001 12:34:56:78:90:ab _pvc network modify --domain test10001.local 10001 _pvc network list _pvc network info --long 10001 + +# Network-VM interaction tests +_pvc vm network add testX 10001 --model virtio --restart --yes +sleep 30 +_pvc vm network get testX +_pvc vm network remove testX 10001 --restart --yes +sleep 5 + _pvc network remove --yes 10001 # Storage tests @@ -106,6 +122,14 @@ _pvc storage volume snapshot add testing testerX asnapshotX _pvc storage volume snapshot rename testing testerX asnapshotX asnapshotY _pvc storage volume snapshot list _pvc storage volume snapshot remove --yes testing testerX asnapshotY + +# Storage-VM interaction tests +_pvc vm volume add testX --type rbd --disk-id sdh --bus scsi testing/testerY --restart --yes +sleep 30 +_pvc vm volume get testX +_pvc vm volume remove testX testing/testerY --restart --yes +sleep 5 + _pvc storage volume remove --yes testing testerY _pvc storage volume remove --yes testing testerX _pvc storage pool remove --yes testing