Add handlers for client network MTUs

Refactors some of the code in VXNetworkInterface to handle MTUs in a
more streamlined fashion. Also fixes a bug whereby bridge client
networks were being explicitly given the cluster dev MTU which might not
be correct. Now adds support for this option explicitly in the configs,
and defaults to 1500 for safety (the standard Ethernet MTU).

Addresses #144
This commit is contained in:
Joshua Boniface 2021-10-09 17:02:27 -04:00
parent db6e65712d
commit 50d8aa0586
5 changed files with 88 additions and 51 deletions

View File

@ -0,0 +1 @@
{"version": "6", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}

View File

@ -466,7 +466,7 @@ class ZKHandler(object):
#
class ZKSchema(object):
# Current version
_version = 5
_version = 6
# Root for doing nested keys
_schema_root = ''
@ -595,6 +595,7 @@ class ZKSchema(object):
'network': {
'vni': '', # The root key
'type': '/nettype',
'mtu': '/mtu',
'rule': '/firewall_rules',
'rule.in': '/firewall_rules/in',
'rule.out': '/firewall_rules/out',

View File

@ -161,6 +161,9 @@ pvc:
networking:
# bridge_device: Underlying device to use for bridged vLAN networks; usually the device of <cluster>
bridge_device: ens4
# bridge_mtu: The MTU of the underlying device used for bridged vLAN networks, and thus the maximum
# MTU of the overlying bridge devices.
bridge_mtu: 1500
# sriov_enable: Enable or disable (default if absent) SR-IOV network support
sriov_enable: False
# sriov_device: Underlying device(s) to use for SR-IOV networks; can be bridge_device or other NIC(s)

View File

@ -39,9 +39,13 @@ class VXNetworkInstance(object):
self.cluster_dev = config['cluster_dev']
self.cluster_mtu = config['cluster_mtu']
self.bridge_dev = config['bridge_dev']
self.bridge_mtu = config['bridge_mtu']
self.nettype = self.zkhandler.read(('network.type', self.vni))
if self.nettype == 'bridged':
self.base_nic = 'vlan{}'.format(self.vni)
self.bridge_nic = 'vmbr{}'.format(self.vni)
self.max_mtu = self.bridge_mtu
self.logger.out(
'Creating new bridged network',
prefix='VNI {}'.format(self.vni),
@ -49,6 +53,9 @@ class VXNetworkInstance(object):
)
self.init_bridged()
elif self.nettype == 'managed':
self.base_nic = 'vxlan{}'.format(self.vni)
self.bridge_nic = 'vmbr{}'.format(self.vni)
self.max_mtu = self.cluster_mtu - 50
self.logger.out(
'Creating new managed network',
prefix='VNI {}'.format(self.vni),
@ -56,6 +63,9 @@ class VXNetworkInstance(object):
)
self.init_managed()
else:
self.base_nic = None
self.bridge_nic = None
self.max_mtu = 0
self.logger.out(
'Invalid network type {}'.format(self.nettype),
prefix='VNI {}'.format(self.vni),
@ -68,8 +78,12 @@ class VXNetworkInstance(object):
self.old_description = None
self.description = None
self.vlan_nic = 'vlan{}'.format(self.vni)
self.bridge_nic = 'vmbr{}'.format(self.vni)
try:
self.vx_mtu = self.zkhandler.read(('network.mtu', self.vni))
if self.vx_mtu == '':
raise
except Exception:
self.vx_mtu = self.max_mtu
# Zookeper handlers for changed states
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network', self.vni))
@ -83,6 +97,17 @@ class VXNetworkInstance(object):
self.old_description = self.description
self.description = data.decode('ascii')
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.mtu', self.vni))
def watch_network_mtu(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
if data and self.vx_mtu != data.decode('ascii'):
self.vx_mtu = data.decode('ascii')
self.updateNetworkMTU()
self.createNetworkBridged()
# Initialize a managed network
@ -102,8 +127,12 @@ class VXNetworkInstance(object):
self.dhcp4_start = self.zkhandler.read(('network.ip4.dhcp_start', self.vni))
self.dhcp4_end = self.zkhandler.read(('network.ip4.dhcp_end', self.vni))
self.vxlan_nic = 'vxlan{}'.format(self.vni)
self.bridge_nic = 'vmbr{}'.format(self.vni)
try:
self.vx_mtu = self.zkhandler.read(('network.mtu', self.vni))
if self.vx_mtu == '':
raise
except Exception:
self.vx_mtu = self.max_mtu
self.nftables_netconf_filename = '{}/networks/{}.nft'.format(self.config['nft_dynamic_directory'], self.vni)
self.firewall_rules = []
@ -138,7 +167,7 @@ add rule inet filter input tcp dport 80 meta iifname {bridgenic} counter accept
# Block traffic into the router from network
add rule inet filter input meta iifname {bridgenic} counter drop
""".format(
vxlannic=self.vxlan_nic,
vxlannic=self.base_nic,
bridgenic=self.bridge_nic
)
@ -147,14 +176,14 @@ add rule inet filter forward ip daddr {netaddr4} counter jump {vxlannic}-in
add rule inet filter forward ip saddr {netaddr4} counter jump {vxlannic}-out
""".format(
netaddr4=self.ip4_network,
vxlannic=self.vxlan_nic,
vxlannic=self.base_nic,
)
self.firewall_rules_v6 = """# Jump from forward chain to this chain when matching net (IPv4)
add rule inet filter forward ip6 daddr {netaddr6} counter jump {vxlannic}-in
add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
""".format(
netaddr6=self.ip6_network,
vxlannic=self.vxlan_nic,
vxlannic=self.base_nic,
)
self.firewall_rules_in = self.zkhandler.children(('network.rule.in', self.vni))
@ -209,6 +238,17 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.mtu', self.vni))
def watch_network_mtu(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
if data and self.vx_mtu != data.decode('ascii'):
self.vx_mtu = data.decode('ascii')
self.updateNetworkMTU()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('network.ip6.network', self.vni))
def watch_network_ip6_network(data, stat, event=''):
if event and event.type == 'DELETED':
@ -383,6 +423,21 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
def getvni(self):
return self.vni
def updateNetworkMTU(self):
# Set MTU of base and bridge NICs
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.base_nic,
self.vx_mtu
)
)
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.bridge_nic,
self.vx_mtu
)
)
def updateDHCPReservations(self, old_reservations_list, new_reservations_list):
for reservation in new_reservations_list:
if reservation not in old_reservations_list:
@ -457,9 +512,10 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
# Create bridged network configuration
def createNetworkBridged(self):
self.logger.out(
'Creating bridged vLAN device {} on interface {}'.format(
self.vlan_nic,
self.bridge_dev
'Creating bridged vLAN device {} on interface {} MTU {}'.format(
self.base_nic,
self.bridge_dev,
self.vx_mtu
),
prefix='VNI {}'.format(self.vni),
state='o'
@ -469,7 +525,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
common.run_os_command(
'ip link add link {} name {} type vlan id {}'.format(
self.bridge_dev,
self.vlan_nic,
self.base_nic,
self.vni
)
)
@ -480,20 +536,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
)
)
# Set MTU of vLAN and bridge NICs
vx_mtu = self.cluster_mtu
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.vlan_nic,
vx_mtu
)
)
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.bridge_nic,
vx_mtu
)
)
self.updateNetworkMTU()
# Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9)
common.run_os_command(
@ -513,15 +556,16 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
common.run_os_command(
'brctl addif {} {}'.format(
self.bridge_nic,
self.vlan_nic
self.base_nic
)
)
# Create managed network configuration
def createNetworkManaged(self):
self.logger.out(
'Creating VXLAN device on interface {}'.format(
self.cluster_dev
'Creating VXLAN device on interface {} MTU {}'.format(
self.cluster_dev,
self.vx_mtu
),
prefix='VNI {}'.format(self.vni),
state='o'
@ -530,7 +574,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
# Create VXLAN interface
common.run_os_command(
'ip link add {} type vxlan id {} dstport 4789 dev {}'.format(
self.vxlan_nic,
self.base_nic,
self.vni,
self.cluster_dev
)
@ -542,20 +586,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
)
)
# Set MTU of VXLAN and bridge NICs
vx_mtu = self.cluster_mtu - 50
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.vxlan_nic,
vx_mtu
)
)
common.run_os_command(
'ip link set {} mtu {} up'.format(
self.bridge_nic,
vx_mtu
)
)
self.updateNetworkMTU()
# Disable tx checksum offload on bridge interface (breaks DHCP on Debian < 9)
common.run_os_command(
@ -575,7 +606,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
common.run_os_command(
'brctl addif {} {}'.format(
self.bridge_nic,
self.vxlan_nic
self.base_nic
)
)
@ -728,13 +759,13 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
)
common.run_os_command(
'ip link set {} down'.format(
self.vlan_nic
self.base_nic
)
)
common.run_os_command(
'brctl delif {} {}'.format(
self.bridge_nic,
self.vlan_nic
self.base_nic
)
)
common.run_os_command(
@ -744,7 +775,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
)
common.run_os_command(
'ip link delete {}'.format(
self.vlan_nic
self.base_nic
)
)
@ -764,13 +795,13 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
)
common.run_os_command(
'ip link set {} down'.format(
self.vxlan_nic
self.base_nic
)
)
common.run_os_command(
'brctl delif {} {}'.format(
self.bridge_nic,
self.vxlan_nic
self.base_nic
)
)
common.run_os_command(
@ -780,7 +811,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
)
common.run_os_command(
'ip link delete {}'.format(
self.vxlan_nic
self.base_nic
)
)

View File

@ -287,6 +287,7 @@ def get_configuration():
'upstream_mtu': o_sysnetwork_upstream.get('mtu', None),
'upstream_dev_ip': o_sysnetwork_upstream.get('address', None),
'bridge_dev': o_sysnetworks.get('bridge_device', None),
'bridge_mtu': o_sysnetworks.get('bridge_mtu', 1500),
'enable_sriov': o_sysnetworks.get('sriov_enable', False),
'sriov_device': o_sysnetworks.get('sriov_device', list())
}