Add handlers for client network MTUs

Refactors some of the code in VXNetworkInterface to handle MTUs in a
more streamlined fashion. Also fixes a bug whereby bridge client
networks were being explicitly given the cluster dev MTU which might not
be correct. Now adds support for this option explicitly in the configs,
and defaults to 1500 for safety (the standard Ethernet MTU).

Addresses #144
This commit is contained in:
Joshua Boniface 2021-10-09 17:02:27 -04:00
parent db6e65712d
commit 50d8aa0586
5 changed files with 88 additions and 51 deletions

View File

@ -0,0 +1 @@
{"version": "6", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}

View File

@ -466,7 +466,7 @@ class ZKHandler(object):
# #
class ZKSchema(object): class ZKSchema(object):
# Current version # Current version
_version = 5 _version = 6
# Root for doing nested keys # Root for doing nested keys
_schema_root = '' _schema_root = ''
@ -595,6 +595,7 @@ class ZKSchema(object):
'network': { 'network': {
'vni': '', # The root key 'vni': '', # The root key
'type': '/nettype', 'type': '/nettype',
'mtu': '/mtu',
'rule': '/firewall_rules', 'rule': '/firewall_rules',
'rule.in': '/firewall_rules/in', 'rule.in': '/firewall_rules/in',
'rule.out': '/firewall_rules/out', 'rule.out': '/firewall_rules/out',

View File

@ -161,6 +161,9 @@ pvc:
networking: networking:
# bridge_device: Underlying device to use for bridged vLAN networks; usually the device of <cluster> # bridge_device: Underlying device to use for bridged vLAN networks; usually the device of <cluster>
bridge_device: ens4 bridge_device: ens4
# bridge_mtu: The MTU of the underlying device used for bridged vLAN networks, and thus the maximum
# MTU of the overlying bridge devices.
bridge_mtu: 1500
# sriov_enable: Enable or disable (default if absent) SR-IOV network support # sriov_enable: Enable or disable (default if absent) SR-IOV network support
sriov_enable: False sriov_enable: False
# sriov_device: Underlying device(s) to use for SR-IOV networks; can be bridge_device or other NIC(s) # sriov_device: Underlying device(s) to use for SR-IOV networks; can be bridge_device or other NIC(s)

View File

@ -39,9 +39,13 @@ class VXNetworkInstance(object):
self.cluster_dev = config['cluster_dev'] self.cluster_dev = config['cluster_dev']
self.cluster_mtu = config['cluster_mtu'] self.cluster_mtu = config['cluster_mtu']
self.bridge_dev = config['bridge_dev'] self.bridge_dev = config['bridge_dev']
self.bridge_mtu = config['bridge_mtu']
self.nettype = self.zkhandler.read(('network.type', self.vni)) self.nettype = self.zkhandler.read(('network.type', self.vni))
if self.nettype == 'bridged': if self.nettype == 'bridged':
self.base_nic = 'vlan{}'.format(self.vni)
self.bridge_nic = 'vmbr{}'.format(self.vni)
self.max_mtu = self.bridge_mtu
self.logger.out( self.logger.out(
'Creating new bridged network', 'Creating new bridged network',
prefix='VNI {}'.format(self.vni), prefix='VNI {}'.format(self.vni),
@ -49,6 +53,9 @@ class VXNetworkInstance(object):
) )
self.init_bridged() self.init_bridged()
elif self.nettype == 'managed': elif self.nettype == 'managed':
self.base_nic = 'vxlan{}'.format(self.vni)
self.bridge_nic = 'vmbr{}'.format(self.vni)
self.max_mtu = self.cluster_mtu - 50
self.logger.out( self.logger.out(
'Creating new managed network', 'Creating new managed network',
prefix='VNI {}'.format(self.vni), prefix='VNI {}'.format(self.vni),
@ -56,6 +63,9 @@ class VXNetworkInstance(object):
) )
self.init_managed() self.init_managed()
else: else:
self.base_nic = None
self.bridge_nic = None
self.max_mtu = 0
self.logger.out( self.logger.out(
'Invalid network type {}'.format(self.nettype), 'Invalid network type {}'.format(self.nettype),
prefix='VNI {}'.format(self.vni), prefix='VNI {}'.format(self.vni),
@ -68,8 +78,12 @@ class VXNetworkInstance(object):
self.old_description = None self.old_description = None
self.description = None self.description = None
self.vlan_nic = 'vlan{}'.format(self.vni) try:
self.bridge_nic = 'vmbr{}'.format(self.vni) self.vx_mtu = self.zkhandler.read(('network.mtu', self.vni))
if self.vx_mtu == '':
raise
except Exception:
self.vx_mtu = self.max_mtu
# Zookeper handlers for changed states # Zookeper handlers for changed states
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network', self.vni)) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network', self.vni))
@ -83,6 +97,17 @@ class VXNetworkInstance(object):
self.old_description = self.description self.old_description = self.description
self.description = data.decode('ascii') self.description = data.decode('ascii')
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.mtu', self.vni))
def watch_network_mtu(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
if data and self.vx_mtu != data.decode('ascii'):
self.vx_mtu = data.decode('ascii')
self.updateNetworkMTU()
self.createNetworkBridged() self.createNetworkBridged()
# Initialize a managed network # Initialize a managed network
@ -102,8 +127,12 @@ class VXNetworkInstance(object):
self.dhcp4_start = self.zkhandler.read(('network.ip4.dhcp_start', self.vni)) self.dhcp4_start = self.zkhandler.read(('network.ip4.dhcp_start', self.vni))
self.dhcp4_end = self.zkhandler.read(('network.ip4.dhcp_end', self.vni)) self.dhcp4_end = self.zkhandler.read(('network.ip4.dhcp_end', self.vni))
self.vxlan_nic = 'vxlan{}'.format(self.vni) try:
self.bridge_nic = 'vmbr{}'.format(self.vni) self.vx_mtu = self.zkhandler.read(('network.mtu', self.vni))
if self.vx_mtu == '':
raise
except Exception:
self.vx_mtu = self.max_mtu
self.nftables_netconf_filename = '{}/networks/{}.nft'.format(self.config['nft_dynamic_directory'], self.vni) self.nftables_netconf_filename = '{}/networks/{}.nft'.format(self.config['nft_dynamic_directory'], self.vni)
self.firewall_rules = [] self.firewall_rules = []
@ -138,7 +167,7 @@ add rule inet filter input tcp dport 80 meta iifname {bridgenic} counter accept
# Block traffic into the router from network # Block traffic into the router from network
add rule inet filter input meta iifname {bridgenic} counter drop add rule inet filter input meta iifname {bridgenic} counter drop
""".format( """.format(
vxlannic=self.vxlan_nic, vxlannic=self.base_nic,
bridgenic=self.bridge_nic bridgenic=self.bridge_nic
) )
@ -147,14 +176,14 @@ add rule inet filter forward ip daddr {netaddr4} counter jump {vxlannic}-in
add rule inet filter forward ip saddr {netaddr4} counter jump {vxlannic}-out add rule inet filter forward ip saddr {netaddr4} counter jump {vxlannic}-out
""".format( """.format(
netaddr4=self.ip4_network, netaddr4=self.ip4_network,
vxlannic=self.vxlan_nic, vxlannic=self.base_nic,
) )
self.firewall_rules_v6 = """# Jump from forward chain to this chain when matching net (IPv4) self.firewall_rules_v6 = """# Jump from forward chain to this chain when matching net (IPv4)
add rule inet filter forward ip6 daddr {netaddr6} counter jump {vxlannic}-in add rule inet filter forward ip6 daddr {netaddr6} counter jump {vxlannic}-in
add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
""".format( """.format(
netaddr6=self.ip6_network, netaddr6=self.ip6_network,
vxlannic=self.vxlan_nic, vxlannic=self.base_nic,
) )
self.firewall_rules_in = self.zkhandler.children(('network.rule.in', self.vni)) self.firewall_rules_in = self.zkhandler.children(('network.rule.in', self.vni))
@ -209,6 +238,17 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer() self.stopDHCPServer()
self.startDHCPServer() self.startDHCPServer()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.mtu', self.vni))
def watch_network_mtu(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
if data and self.vx_mtu != data.decode('ascii'):
self.vx_mtu = data.decode('ascii')
self.updateNetworkMTU()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.ip6.network', self.vni)) @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.ip6.network', self.vni))
def watch_network_ip6_network(data, stat, event=''): def watch_network_ip6_network(data, stat, event=''):
if event and event.type == 'DELETED': if event and event.type == 'DELETED':
@ -383,6 +423,21 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
def getvni(self): def getvni(self):
return self.vni return self.vni
def updateNetworkMTU(self):
# Set MTU of base and bridge NICs
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.base_nic,
self.vx_mtu
)
)
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.bridge_nic,
self.vx_mtu
)
)
def updateDHCPReservations(self, old_reservations_list, new_reservations_list): def updateDHCPReservations(self, old_reservations_list, new_reservations_list):
for reservation in new_reservations_list: for reservation in new_reservations_list:
if reservation not in old_reservations_list: if reservation not in old_reservations_list:
@ -457,9 +512,10 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
# Create bridged network configuration # Create bridged network configuration
def createNetworkBridged(self): def createNetworkBridged(self):
self.logger.out( self.logger.out(
'Creating bridged vLAN device {} on interface {}'.format( 'Creating bridged vLAN device {} on interface {} MTU {}'.format(
self.vlan_nic, self.base_nic,
self.bridge_dev self.bridge_dev,
self.vx_mtu
), ),
prefix='VNI {}'.format(self.vni), prefix='VNI {}'.format(self.vni),
state='o' state='o'
@ -469,7 +525,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
common.run_os_command( common.run_os_command(
'ip link add link {} name {} type vlan id {}'.format( 'ip link add link {} name {} type vlan id {}'.format(
self.bridge_dev, self.bridge_dev,
self.vlan_nic, self.base_nic,
self.vni self.vni
) )
) )
@ -480,20 +536,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
) )
) )
# Set MTU of vLAN and bridge NICs self.updateNetworkMTU()
vx_mtu = self.cluster_mtu
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.vlan_nic,
vx_mtu
)
)
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.bridge_nic,
vx_mtu
)
)
# Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9) # Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9)
common.run_os_command( common.run_os_command(
@ -513,15 +556,16 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
common.run_os_command( common.run_os_command(
'brctl addif {} {}'.format( 'brctl addif {} {}'.format(
self.bridge_nic, self.bridge_nic,
self.vlan_nic self.base_nic
) )
) )
# Create managed network configuration # Create managed network configuration
def createNetworkManaged(self): def createNetworkManaged(self):
self.logger.out( self.logger.out(
'Creating VXLAN device on interface {}'.format( 'Creating VXLAN device on interface {} MTU {}'.format(
self.cluster_dev self.cluster_dev,
self.vx_mtu
), ),
prefix='VNI {}'.format(self.vni), prefix='VNI {}'.format(self.vni),
state='o' state='o'
@ -530,7 +574,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
# Create VXLAN interface # Create VXLAN interface
common.run_os_command( common.run_os_command(
'ip link add {} type vxlan id {} dstport 4789 dev {}'.format( 'ip link add {} type vxlan id {} dstport 4789 dev {}'.format(
self.vxlan_nic, self.base_nic,
self.vni, self.vni,
self.cluster_dev self.cluster_dev
) )
@ -542,20 +586,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
) )
) )
# Set MTU of VXLAN and bridge NICs self.updateNetworkMTU()
vx_mtu = self.cluster_mtu - 50
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.vxlan_nic,
vx_mtu
)
)
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.bridge_nic,
vx_mtu
)
)
# Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9) # Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9)
common.run_os_command( common.run_os_command(
@ -575,7 +606,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
common.run_os_command( common.run_os_command(
'brctl addif {} {}'.format( 'brctl addif {} {}'.format(
self.bridge_nic, self.bridge_nic,
self.vxlan_nic self.base_nic
) )
) )
@ -728,13 +759,13 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
) )
common.run_os_command( common.run_os_command(
'ip link set {} down'.format( 'ip link set {} down'.format(
self.vlan_nic self.base_nic
) )
) )
common.run_os_command( common.run_os_command(
'brctl delif {} {}'.format( 'brctl delif {} {}'.format(
self.bridge_nic, self.bridge_nic,
self.vlan_nic self.base_nic
) )
) )
common.run_os_command( common.run_os_command(
@ -744,7 +775,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
) )
common.run_os_command( common.run_os_command(
'ip link delete {}'.format( 'ip link delete {}'.format(
self.vlan_nic self.base_nic
) )
) )
@ -764,13 +795,13 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
) )
common.run_os_command( common.run_os_command(
'ip link set {} down'.format( 'ip link set {} down'.format(
self.vxlan_nic self.base_nic
) )
) )
common.run_os_command( common.run_os_command(
'brctl delif {} {}'.format( 'brctl delif {} {}'.format(
self.bridge_nic, self.bridge_nic,
self.vxlan_nic self.base_nic
) )
) )
common.run_os_command( common.run_os_command(
@ -780,7 +811,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
) )
common.run_os_command( common.run_os_command(
'ip link delete {}'.format( 'ip link delete {}'.format(
self.vxlan_nic self.base_nic
) )
) )

View File

@ -287,6 +287,7 @@ def get_configuration():
'upstream_mtu': o_sysnetwork_upstream.get('mtu', None), 'upstream_mtu': o_sysnetwork_upstream.get('mtu', None),
'upstream_dev_ip': o_sysnetwork_upstream.get('address', None), 'upstream_dev_ip': o_sysnetwork_upstream.get('address', None),
'bridge_dev': o_sysnetworks.get('bridge_device', None), 'bridge_dev': o_sysnetworks.get('bridge_device', None),
'bridge_mtu': o_sysnetworks.get('bridge_mtu', 1500),
'enable_sriov': o_sysnetworks.get('sriov_enable', False), 'enable_sriov': o_sysnetworks.get('sriov_enable', False),
'sriov_device': o_sysnetworks.get('sriov_device', list()) 'sriov_device': o_sysnetworks.get('sriov_device', list())
} }