From e70255dbd68004395674d761f3bf0d11bdb1b4a1 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 17 Jun 2019 23:28:02 -0400 Subject: [PATCH] Support configurable interface MTUs MTUs were hardcoded at 9000, which breaks if the underlying interface or network switch does not support jumbo frames, a possible deployment limitation. This has non-obvious consequences due to MTU mismatches for certain services (Ceph, Zookeeper, etc.). This commit adds support for configurable MTUs for each interface, set in pvcd.yaml. The example has been updated to reflect this, with a default of 1500 (the Ethernet standard). This commit also adds autoconfiguration of the VNI device MTU based on the `vni_mtu` value, the same for bridge networks and minus 50 (rather than 200 from the hardcoded value, based on the following resource [1]) for VXLAN networks. [1] http://ipengineer.net/2014/06/vxlan-mtu-vs-ip-mtu-consideration/ --- node-daemon/pvcd.sample.yaml | 20 ++++++++++++++++---- node-daemon/pvcd/Daemon.py | 21 ++++++++++++++------- node-daemon/pvcd/VXNetworkInstance.py | 24 ++++++++++++++++-------- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/node-daemon/pvcd.sample.yaml b/node-daemon/pvcd.sample.yaml index a06792e2..cb51485f 100644 --- a/node-daemon/pvcd.sample.yaml +++ b/node-daemon/pvcd.sample.yaml @@ -127,11 +127,23 @@ pvc: # devices: Interface devices configuration devices: # upstream: Upstream physical interface device - upstream: ens4 + upstream: + # name: Upstream interface name + name: ens4 + # mtu: Upstream interface MTU; use 9000 for jumbo frames (requires switch support) + mtu: 1500 # cluster: Cluster (VNIC) physical interface device - cluster: ens4 - # storage: Storage (Ceph) physical interface device - storage: ens4 + cluster: + # name: Cluster (VNIC) interface name + name: ens4 + # mtu: Cluster (VNIC) interface MTU; use 9000 for jumbo frames (requires switch support) + mtu: 1500 + # storage: Storage (Ceph OSD) physical interface device + storage: + # name: Storace (Ceph OSD) interface name + name: ens4 + # mtu: Storage (Ceph OSD) interface MTU; use 9000 for jumbo frames (requires switch support) + mtu: 1500 # addresses: Special network addresses; by-id denotes "address octet equals host number", e.g. .3 for host3 addresses: # upstream: Network address for upstream network, options: None, by-id, / diff --git a/node-daemon/pvcd/Daemon.py b/node-daemon/pvcd/Daemon.py index 4c0a04f1..31569799 100644 --- a/node-daemon/pvcd/Daemon.py +++ b/node-daemon/pvcd/Daemon.py @@ -179,11 +179,14 @@ def readConfig(pvcd_config_file, myhostname): 'pdns_postgresql_dbname': o_config['pvc']['coordinator']['dns']['database']['name'], 'pdns_postgresql_user': o_config['pvc']['coordinator']['dns']['database']['user'], 'pdns_postgresql_password': o_config['pvc']['coordinator']['dns']['database']['pass'], - 'vni_dev': o_config['pvc']['system']['configuration']['networking']['devices']['cluster'], + 'vni_dev': o_config['pvc']['system']['configuration']['networking']['devices']['cluster']['name'], + 'vni_mtu': o_config['pvc']['system']['configuration']['networking']['devices']['cluster']['mtu'], 'vni_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['cluster'], - 'storage_dev': o_config['pvc']['system']['configuration']['networking']['devices']['storage'], + 'storage_dev': o_config['pvc']['system']['configuration']['networking']['devices']['storage']['name'], + 'storage_mtu': o_config['pvc']['system']['configuration']['networking']['devices']['storage']['mtu'], 'storage_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['storage'], - 'upstream_dev': o_config['pvc']['system']['configuration']['networking']['devices']['upstream'], + 'upstream_dev': o_config['pvc']['system']['configuration']['networking']['devices']['upstream']['name'], + 'upstream_mtu': o_config['pvc']['system']['configuration']['networking']['devices']['upstream']['mtu'], 'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['upstream'], } except Exception as e: @@ -311,33 +314,37 @@ logger.out('Starting pvcd on host {}'.format(myfqdn), state='s') if enable_networking: # VNI configuration vni_dev = config['vni_dev'] + vni_mtu = config['vni_mtu'] vni_dev_ip = config['vni_dev_ip'] logger.out('Setting up VNI network interface {}'.format(vni_dev, vni_dev_ip), state='i') - common.run_os_command('ip link set {} mtu 9000 up'.format(vni_dev)) + common.run_os_command('ip link set {} mtu {} up'.format(vni_dev, vni_mtu)) # Cluster bridge configuration logger.out('Setting up Cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i') common.run_os_command('brctl addbr brcluster') common.run_os_command('brctl addif brcluster {}'.format(vni_dev)) - common.run_os_command('ip link set brcluster mtu 9000 up') + common.run_os_command('ip link set brcluster mtu {} up'.format(vni_mtu)) common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster')) # Storage configuration storage_dev = config['storage_dev'] + storage_mtu = config['storage_mtu'] if storage_dev == vni_dev: storage_dev = 'brcluster' + storage_mtu = vni_mtu storage_dev_ip = config['storage_dev_ip'] logger.out('Setting up Storage network on interface {} with IP {}'.format(storage_dev, storage_dev_ip), state='i') - common.run_os_command('ip link set {} mtu 9000 up'.format(storage_dev)) + common.run_os_command('ip link set {} mtu {} up'.format(storage_dev, storage_mtu)) common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, storage_dev)) # Upstream configuration if config['upstream_dev']: upstream_dev = config['upstream_dev'] + upstream_mtu = config['upstream_mtu'] upstream_dev_ip = config['upstream_dev_ip'] upstream_dev_gateway = config['upstream_gateway'] logger.out('Setting up Upstream network on interface {} with IP {}'.format(upstream_dev, upstream_dev_ip), state='i') - common.run_os_command('ip link set {} up'.format(upstream_dev)) + common.run_os_command('ip link set {} mtu {} up'.format(upstream_dev, upstream_mtu)) common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, upstream_dev)) if upstream_dev_gateway: common.run_os_command('ip route add default via {} dev {}'.format(upstream_dev_gateway, upstream_dev)) diff --git a/node-daemon/pvcd/VXNetworkInstance.py b/node-daemon/pvcd/VXNetworkInstance.py index 1c982865..b9320696 100644 --- a/node-daemon/pvcd/VXNetworkInstance.py +++ b/node-daemon/pvcd/VXNetworkInstance.py @@ -437,14 +437,18 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out self.vlan_nic ) ) + + vx_mtu = self.config.vni_mtu common.run_os_command( - 'ip link set {} mtu 8800 up'.format( - self.vlan_nic + 'ip link set {} mtu {} up'.format( + self.vlan_nic, + vx_mtu ) ) common.run_os_command( - 'ip link set {} mtu 8800 up'.format( - self.bridge_nic + 'ip link set {} mtu {} up'.format( + self.bridge_nic, + vx_mtu ) ) common.run_os_command( @@ -481,14 +485,18 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out self.vxlan_nic ) ) + + vx_mtu = self.config.vni_mtu - 50 common.run_os_command( - 'ip link set {} mtu 8800 up'.format( - self.vxlan_nic + 'ip link set {} mtu {} up'.format( + self.vxlan_nic, + vx_mtu ) ) common.run_os_command( - 'ip link set {} mtu 8800 up'.format( - self.bridge_nic + 'ip link set {} mtu {} up'.format( + self.bridge_nic, + vx_mtu ) ) common.run_os_command(