diff --git a/daemon-common/migrations/versions/6.json b/daemon-common/migrations/versions/6.json new file mode 100644 index 00000000..2e2cdc96 --- /dev/null +++ b/daemon-common/migrations/versions/6.json @@ -0,0 +1 @@ +{"version": "6", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index cb1b8131..b6857dea 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -466,7 +466,7 @@ class ZKHandler(object): # class ZKSchema(object): # Current version - _version = 5 + _version = 6 # Root for doing nested keys _schema_root = '' @@ -595,6 +595,7 @@ class ZKSchema(object): 'network': { 'vni': '', # The root key 'type': '/nettype', + 'mtu': '/mtu', 'rule': '/firewall_rules', 'rule.in': '/firewall_rules/in', 'rule.out': '/firewall_rules/out', diff --git a/node-daemon/pvcnoded.sample.yaml b/node-daemon/pvcnoded.sample.yaml index 360a14b8..4b08206a 100644 --- a/node-daemon/pvcnoded.sample.yaml +++ b/node-daemon/pvcnoded.sample.yaml @@ -161,6 +161,9 @@ pvc: networking: # bridge_device: Underlying device to use for bridged vLAN networks; usually the device of bridge_device: ens4 + # bridge_mtu: The MTU of the underlying device used for bridged vLAN networks, and thus the maximum + # MTU of the overlying bridge devices. + bridge_mtu: 1500 # sriov_enable: Enable or disable (default if absent) SR-IOV network support sriov_enable: False # sriov_device: Underlying device(s) to use for SR-IOV networks; can be bridge_device or other NIC(s) diff --git a/node-daemon/pvcnoded/objects/VXNetworkInstance.py b/node-daemon/pvcnoded/objects/VXNetworkInstance.py index 35680d99..190d7b50 100644 --- a/node-daemon/pvcnoded/objects/VXNetworkInstance.py +++ b/node-daemon/pvcnoded/objects/VXNetworkInstance.py @@ -39,9 +39,13 @@ class VXNetworkInstance(object): self.cluster_dev = config['cluster_dev'] self.cluster_mtu = config['cluster_mtu'] self.bridge_dev = config['bridge_dev'] + self.bridge_mtu = config['bridge_mtu'] self.nettype = self.zkhandler.read(('network.type', self.vni)) if self.nettype == 'bridged': + self.base_nic = 'vlan{}'.format(self.vni) + self.bridge_nic = 'vmbr{}'.format(self.vni) + self.max_mtu = self.bridge_mtu self.logger.out( 'Creating new bridged network', prefix='VNI {}'.format(self.vni), @@ -49,6 +53,9 @@ class VXNetworkInstance(object): ) self.init_bridged() elif self.nettype == 'managed': + self.base_nic = 'vxlan{}'.format(self.vni) + self.bridge_nic = 'vmbr{}'.format(self.vni) + self.max_mtu = self.cluster_mtu - 50 self.logger.out( 'Creating new managed network', prefix='VNI {}'.format(self.vni), @@ -56,6 +63,9 @@ class VXNetworkInstance(object): ) self.init_managed() else: + self.base_nic = None + self.bridge_nic = None + self.max_mtu = 0 self.logger.out( 'Invalid network type {}'.format(self.nettype), prefix='VNI {}'.format(self.vni), @@ -68,8 +78,12 @@ class VXNetworkInstance(object): self.old_description = None self.description = None - self.vlan_nic = 'vlan{}'.format(self.vni) - self.bridge_nic = 'vmbr{}'.format(self.vni) + try: + self.vx_mtu = self.zkhandler.read(('network.mtu', self.vni)) + if self.vx_mtu == '': + raise + except Exception: + self.vx_mtu = self.max_mtu # Zookeper handlers for changed states @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network', self.vni)) @@ -83,6 +97,17 @@ class VXNetworkInstance(object): self.old_description = self.description self.description = data.decode('ascii') + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.mtu', self.vni)) + def watch_network_mtu(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + if data and self.vx_mtu != data.decode('ascii'): + self.vx_mtu = data.decode('ascii') + self.updateNetworkMTU() + self.createNetworkBridged() # Initialize a managed network @@ -102,8 +127,12 @@ class VXNetworkInstance(object): self.dhcp4_start = self.zkhandler.read(('network.ip4.dhcp_start', self.vni)) self.dhcp4_end = self.zkhandler.read(('network.ip4.dhcp_end', self.vni)) - self.vxlan_nic = 'vxlan{}'.format(self.vni) - self.bridge_nic = 'vmbr{}'.format(self.vni) + try: + self.vx_mtu = self.zkhandler.read(('network.mtu', self.vni)) + if self.vx_mtu == '': + raise + except Exception: + self.vx_mtu = self.max_mtu self.nftables_netconf_filename = '{}/networks/{}.nft'.format(self.config['nft_dynamic_directory'], self.vni) self.firewall_rules = [] @@ -138,7 +167,7 @@ add rule inet filter input tcp dport 80 meta iifname {bridgenic} counter accept # Block traffic into the router from network add rule inet filter input meta iifname {bridgenic} counter drop """.format( - vxlannic=self.vxlan_nic, + vxlannic=self.base_nic, bridgenic=self.bridge_nic ) @@ -147,14 +176,14 @@ add rule inet filter forward ip daddr {netaddr4} counter jump {vxlannic}-in add rule inet filter forward ip saddr {netaddr4} counter jump {vxlannic}-out """.format( netaddr4=self.ip4_network, - vxlannic=self.vxlan_nic, + vxlannic=self.base_nic, ) self.firewall_rules_v6 = """# Jump from forward chain to this chain when matching net (IPv4) add rule inet filter forward ip6 daddr {netaddr6} counter jump {vxlannic}-in add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out """.format( netaddr6=self.ip6_network, - vxlannic=self.vxlan_nic, + vxlannic=self.base_nic, ) self.firewall_rules_in = self.zkhandler.children(('network.rule.in', self.vni)) @@ -209,6 +238,17 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out self.stopDHCPServer() self.startDHCPServer() + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.mtu', self.vni)) + def watch_network_mtu(data, stat, event=''): + if event and event.type == 'DELETED': + # The key has been deleted after existing before; terminate this watcher + # because this class instance is about to be reaped in Daemon.py + return False + + if data and self.vx_mtu != data.decode('ascii'): + self.vx_mtu = data.decode('ascii') + self.updateNetworkMTU() + @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.ip6.network', self.vni)) def watch_network_ip6_network(data, stat, event=''): if event and event.type == 'DELETED': @@ -383,6 +423,21 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out def getvni(self): return self.vni + def updateNetworkMTU(self): + # Set MTU of base and bridge NICs + common.run_os_command( + 'ip link set {} mtu {} up'.format( + self.base_nic, + self.vx_mtu + ) + ) + common.run_os_command( + 'ip link set {} mtu {} up'.format( + self.bridge_nic, + self.vx_mtu + ) + ) + def updateDHCPReservations(self, old_reservations_list, new_reservations_list): for reservation in new_reservations_list: if reservation not in old_reservations_list: @@ -457,9 +512,10 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out # Create bridged network configuration def createNetworkBridged(self): self.logger.out( - 'Creating bridged vLAN device {} on interface {}'.format( - self.vlan_nic, - self.bridge_dev + 'Creating bridged vLAN device {} on interface {} MTU {}'.format( + self.base_nic, + self.bridge_dev, + self.vx_mtu ), prefix='VNI {}'.format(self.vni), state='o' @@ -469,7 +525,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out common.run_os_command( 'ip link add link {} name {} type vlan id {}'.format( self.bridge_dev, - self.vlan_nic, + self.base_nic, self.vni ) ) @@ -480,20 +536,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) ) - # Set MTU of vLAN and bridge NICs - vx_mtu = self.cluster_mtu - common.run_os_command( - 'ip link set {} mtu {} up'.format( - self.vlan_nic, - vx_mtu - ) - ) - common.run_os_command( - 'ip link set {} mtu {} up'.format( - self.bridge_nic, - vx_mtu - ) - ) + self.updateNetworkMTU() # Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9) common.run_os_command( @@ -513,15 +556,16 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out common.run_os_command( 'brctl addif {} {}'.format( self.bridge_nic, - self.vlan_nic + self.base_nic ) ) # Create managed network configuration def createNetworkManaged(self): self.logger.out( - 'Creating VXLAN device on interface {}'.format( - self.cluster_dev + 'Creating VXLAN device on interface {} MTU {}'.format( + self.cluster_dev, + self.vx_mtu ), prefix='VNI {}'.format(self.vni), state='o' @@ -530,7 +574,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out # Create VXLAN interface common.run_os_command( 'ip link add {} type vxlan id {} dstport 4789 dev {}'.format( - self.vxlan_nic, + self.base_nic, self.vni, self.cluster_dev ) @@ -542,20 +586,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) ) - # Set MTU of VXLAN and bridge NICs - vx_mtu = self.cluster_mtu - 50 - common.run_os_command( - 'ip link set {} mtu {} up'.format( - self.vxlan_nic, - vx_mtu - ) - ) - common.run_os_command( - 'ip link set {} mtu {} up'.format( - self.bridge_nic, - vx_mtu - ) - ) + self.updateNetworkMTU() # Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9) common.run_os_command( @@ -575,7 +606,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out common.run_os_command( 'brctl addif {} {}'.format( self.bridge_nic, - self.vxlan_nic + self.base_nic ) ) @@ -728,13 +759,13 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) common.run_os_command( 'ip link set {} down'.format( - self.vlan_nic + self.base_nic ) ) common.run_os_command( 'brctl delif {} {}'.format( self.bridge_nic, - self.vlan_nic + self.base_nic ) ) common.run_os_command( @@ -744,7 +775,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) common.run_os_command( 'ip link delete {}'.format( - self.vlan_nic + self.base_nic ) ) @@ -764,13 +795,13 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) common.run_os_command( 'ip link set {} down'.format( - self.vxlan_nic + self.base_nic ) ) common.run_os_command( 'brctl delif {} {}'.format( self.bridge_nic, - self.vxlan_nic + self.base_nic ) ) common.run_os_command( @@ -780,7 +811,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out ) common.run_os_command( 'ip link delete {}'.format( - self.vxlan_nic + self.base_nic ) ) diff --git a/node-daemon/pvcnoded/util/config.py b/node-daemon/pvcnoded/util/config.py index 8561cd5b..003b0e47 100644 --- a/node-daemon/pvcnoded/util/config.py +++ b/node-daemon/pvcnoded/util/config.py @@ -287,6 +287,7 @@ def get_configuration(): 'upstream_mtu': o_sysnetwork_upstream.get('mtu', None), 'upstream_dev_ip': o_sysnetwork_upstream.get('address', None), 'bridge_dev': o_sysnetworks.get('bridge_device', None), + 'bridge_mtu': o_sysnetworks.get('bridge_mtu', 1500), 'enable_sriov': o_sysnetworks.get('sriov_enable', False), 'sriov_device': o_sysnetworks.get('sriov_device', list()) }