From 50d8aa0586b1dd84b697395f028595651b0c28cc Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sat, 9 Oct 2021 17:02:27 -0400 Subject: [PATCH] Add handlers for client network MTUs Refactors some of the code in VXNetworkInterface to handle MTUs in a more streamlined fashion. Also fixes a bug whereby bridge client networks were being explicitly given the cluster dev MTU which might not be correct. Now adds support for this option explicitly in the configs, and defaults to 1500 for safety (the standard Ethernet MTU). Addresses #144 --- daemon-common/migrations/versions/6.json | 1 + daemon-common/zkhandler.py | 3 +- node-daemon/pvcnoded.sample.yaml | 3 + .../pvcnoded/objects/VXNetworkInstance.py | 131 +++++++++++------- node-daemon/pvcnoded/util/config.py | 1 + 5 files changed, 88 insertions(+), 51 deletions(-) create mode 100644 daemon-common/migrations/versions/6.json diff --git a/daemon-common/migrations/versions/6.json b/daemon-common/migrations/versions/6.json new file mode 100644 index 00000000..2e2cdc96 --- /dev/null +++ b/daemon-common/migrations/versions/6.json @@ -0,0 +1 @@ +{"version": "6", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index cb1b8131..b6857dea 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -466,7 +466,7 @@ class ZKHandler(object): # class ZKSchema(object): # Current version - _version = 5 + _version = 6 # Root for doing nested keys _schema_root = '' @@ -595,6 +595,7 @@ class ZKSchema(object): 'network': { 'vni': '', # The root key 'type': '/nettype', + 'mtu': '/mtu', 'rule': '/firewall_rules', 'rule.in': '/firewall_rules/in', 'rule.out': '/firewall_rules/out', diff --git a/node-daemon/pvcnoded.sample.yaml b/node-daemon/pvcnoded.sample.yaml index 360a14b8..4b08206a 100644 --- a/node-daemon/pvcnoded.sample.yaml +++ b/node-daemon/pvcnoded.sample.yaml @@ -161,6 +161,9 @@ pvc: networking: # bridge_device: Underlying device to use for bridged vLAN networks; usually the device of bridge_device: ens4 + # bridge_mtu: The MTU of the underlying device used for bridged vLAN networks, and thus the maximum + # MTU of the overlying bridge devices. + bridge_mtu: 1500 # sriov_enable: Enable or disable (default if absent) SR-IOV network support sriov_enable: False # sriov_device: Underlying device(s) to use for SR-IOV networks; can be bridge_device or other NIC(s) diff --git a/node-daemon/pvcnoded/objects/VXNetworkInstance.py b/node-daemon/pvcnoded/objects/VXNetworkInstance.py index 35680d99..190d7b50 100644 --- a/node-daemon/pvcnoded/objects/VXNetworkInstance.py +++ b/node-daemon/pvcnoded/objects/VXNetworkInstance.py @@ -39,9 +39,13 @@ class VXNetworkInstance(object): self.cluster_dev = config['cluster_dev'] self.cluster_mtu = config['cluster_mtu'] self.bridge_dev = config['bridge_dev'] + self.bridge_mtu = config['bridge_mtu'] self.nettype = self.zkhandler.read(('network.type', self.vni)) if self.nettype == 'bridged': + self.base_nic = 'vlan{}'.format(self.vni) + self.bridge_nic = 'vmbr{}'.format(self.vni) + self.max_mtu = self.bridge_mtu self.logger.out( 'Creating new bridged network', prefix='VNI {}'.format(self.vni), @@ -49,6 +53,9 @@ class VXNetworkInstance(object): ) self.init_bridged() elif self.nettype == 'managed': + self.base_nic = 'vxlan{}'.format(self.vni) + self.bridge_nic = 'vmbr{}'.format(self.vni) + self.max_mtu = self.cluster_mtu - 50 self.logger.out( 'Creating new managed network', prefix='VNI {}'.format(self.vni), @@ -56,6 +63,9 @@ class VXNetworkInstance(object): ) self.init_managed() else: + self.base_nic = None + self.bridge_nic = None + self.max_mtu = 0 self.logger.out( 'Invalid network type {}'.format(self.nettype), prefix='VNI {}'.format(self.vni), @@ -68,8 +78,12 @@ class VXNetworkInstance(object): self.old_description = None self.description = None - self.vlan_nic = 'vlan{}'.format(self.vni) - self.bridge_nic = 'vmbr{}'.format(self.vni) + try: + self.vx_mtu = self.zkhandler.read(('network.mtu', self.vni)) + if self.vx_mtu == '': + raise + except Exception: + self.vx_mtu = self.max_mtu # Zookeper handlers for changed states @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network', self.vni)) @@ -83,6 +97,17 @@ class VXNetworkInstance(object): self.old_description = self.description self.description = data.decode('ascii') + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.mtu', self.vni)) + def watch_network_mtu(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + if data and self.vx_mtu != data.decode('ascii'): + self.vx_mtu = data.decode('ascii') + self.updateNetworkMTU() + self.createNetworkBridged() # Initialize a managed network @@ -102,8 +127,12 @@ class VXNetworkInstance(object): self.dhcp4_start = self.zkhandler.read(('network.ip4.dhcp_start', self.vni)) self.dhcp4_end = self.zkhandler.read(('network.ip4.dhcp_end', self.vni)) - self.vxlan_nic = 'vxlan{}'.format(self.vni) - self.bridge_nic = 'vmbr{}'.format(self.vni) + try: + self.vx_mtu = self.zkhandler.read(('network.mtu', self.vni)) + if self.vx_mtu == '': + raise + except Exception: + self.vx_mtu = self.max_mtu self.nftables_netconf_filename = '{}/networks/{}.nft'.format(self.config['nft_dynamic_directory'], self.vni) self.firewall_rules = [] @@ -138,7 +167,7 @@ add rule inet filter input tcp dport 80 meta iifname {bridgenic} counter accept # Block traffic into the router from network add rule inet filter input meta iifname {bridgenic} counter drop """.format( - vxlannic=self.vxlan_nic, + vxlannic=self.base_nic, bridgenic=self.bridge_nic ) @@ -147,14 +176,14 @@ add rule inet filter forward ip daddr {netaddr4} counter jump {vxlannic}-in add rule inet filter forward ip saddr {netaddr4} counter jump {vxlannic}-out """.format( netaddr4=self.ip4_network, - vxlannic=self.vxlan_nic, + vxlannic=self.base_nic, ) self.firewall_rules_v6 = """# Jump from forward chain to this chain when matching net (IPv4) add rule inet filter forward ip6 daddr {netaddr6} counter jump {vxlannic}-in add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out """.format( netaddr6=self.ip6_network, - vxlannic=self.vxlan_nic, + vxlannic=self.base_nic, ) self.firewall_rules_in = self.zkhandler.children(('network.rule.in', self.vni)) @@ -209,6 +238,17 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out self.stopDHCPServer() self.startDHCPServer() + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.mtu', self.vni)) + def watch_network_mtu(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + if data and self.vx_mtu != data.decode('ascii'): + self.vx_mtu = data.decode('ascii') + self.updateNetworkMTU() + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.ip6.network', self.vni)) def watch_network_ip6_network(data, stat, event=''): if event and event.type == 'DELETED': @@ -383,6 +423,21 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out def getvni(self): return self.vni + def updateNetworkMTU(self): + # Set MTU of base and bridge NICs + common.run_os_command( + 'ip link set {} mtu {} up'.format( + self.base_nic, + self.vx_mtu + ) + ) + common.run_os_command( + 'ip link set {} mtu {} up'.format( + self.bridge_nic, + self.vx_mtu + ) + ) + def updateDHCPReservations(self, old_reservations_list, new_reservations_list): for reservation in new_reservations_list: if reservation not in old_reservations_list: @@ -457,9 +512,10 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out # Create bridged network configuration def createNetworkBridged(self): self.logger.out( - 'Creating bridged vLAN device {} on interface {}'.format( - self.vlan_nic, - self.bridge_dev + 'Creating bridged vLAN device {} on interface {} MTU {}'.format( + self.base_nic, + self.bridge_dev, + self.vx_mtu ), prefix='VNI {}'.format(self.vni), state='o' @@ -469,7 +525,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out common.run_os_command( 'ip link add link {} name {} type vlan id {}'.format( self.bridge_dev, - self.vlan_nic, + self.base_nic, self.vni ) ) @@ -480,20 +536,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) ) - # Set MTU of vLAN and bridge NICs - vx_mtu = self.cluster_mtu - common.run_os_command( - 'ip link set {} mtu {} up'.format( - self.vlan_nic, - vx_mtu - ) - ) - common.run_os_command( - 'ip link set {} mtu {} up'.format( - self.bridge_nic, - vx_mtu - ) - ) + self.updateNetworkMTU() # Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9) common.run_os_command( @@ -513,15 +556,16 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out common.run_os_command( 'brctl addif {} {}'.format( self.bridge_nic, - self.vlan_nic + self.base_nic ) ) # Create managed network configuration def createNetworkManaged(self): self.logger.out( - 'Creating VXLAN device on interface {}'.format( - self.cluster_dev + 'Creating VXLAN device on interface {} MTU {}'.format( + self.cluster_dev, + self.vx_mtu ), prefix='VNI {}'.format(self.vni), state='o' @@ -530,7 +574,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out # Create VXLAN interface common.run_os_command( 'ip link add {} type vxlan id {} dstport 4789 dev {}'.format( - self.vxlan_nic, + self.base_nic, self.vni, self.cluster_dev ) @@ -542,20 +586,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) ) - # Set MTU of VXLAN and bridge NICs - vx_mtu = self.cluster_mtu - 50 - common.run_os_command( - 'ip link set {} mtu {} up'.format( - self.vxlan_nic, - vx_mtu - ) - ) - common.run_os_command( - 'ip link set {} mtu {} up'.format( - self.bridge_nic, - vx_mtu - ) - ) + self.updateNetworkMTU() # Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9) common.run_os_command( @@ -575,7 +606,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out common.run_os_command( 'brctl addif {} {}'.format( self.bridge_nic, - self.vxlan_nic + self.base_nic ) ) @@ -728,13 +759,13 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) common.run_os_command( 'ip link set {} down'.format( - self.vlan_nic + self.base_nic ) ) common.run_os_command( 'brctl delif {} {}'.format( self.bridge_nic, - self.vlan_nic + self.base_nic ) ) common.run_os_command( @@ -744,7 +775,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) common.run_os_command( 'ip link delete {}'.format( - self.vlan_nic + self.base_nic ) ) @@ -764,13 +795,13 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) common.run_os_command( 'ip link set {} down'.format( - self.vxlan_nic + self.base_nic ) ) common.run_os_command( 'brctl delif {} {}'.format( self.bridge_nic, - self.vxlan_nic + self.base_nic ) ) common.run_os_command( @@ -780,7 +811,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) common.run_os_command( 'ip link delete {}'.format( - self.vxlan_nic + self.base_nic ) ) diff --git a/node-daemon/pvcnoded/util/config.py b/node-daemon/pvcnoded/util/config.py index 8561cd5b..003b0e47 100644 --- a/node-daemon/pvcnoded/util/config.py +++ b/node-daemon/pvcnoded/util/config.py @@ -287,6 +287,7 @@ def get_configuration(): 'upstream_mtu': o_sysnetwork_upstream.get('mtu', None), 'upstream_dev_ip': o_sysnetwork_upstream.get('address', None), 'bridge_dev': o_sysnetworks.get('bridge_device', None), + 'bridge_mtu': o_sysnetworks.get('bridge_mtu', 1500), 'enable_sriov': o_sysnetworks.get('sriov_enable', False), 'sriov_device': o_sysnetworks.get('sriov_device', list()) }