Support configurable interface MTUs

MTUs were hardcoded at 9000, which breaks if the underlying interface
or network switch does not support jumbo frames, a possible deployment
limitation. This has non-obvious consequences due to MTU mismatches
for certain services (Ceph, Zookeeper, etc.).

This commit adds support for configurable MTUs for each interface,
set in pvcd.yaml. The example has been updated to reflect this, with
a default of 1500 (the Ethernet standard).

This commit also adds autoconfiguration of the VNI device MTU based
on the `vni_mtu` value, the same for bridge networks and minus 50
(rather than 200 from the hardcoded value, based on the following
resource [1]) for VXLAN networks.

[1] http://ipengineer.net/2014/06/vxlan-mtu-vs-ip-mtu-consideration/
This commit is contained in:
Joshua Boniface 2019-06-17 23:28:02 -04:00
parent c583ee1709
commit e70255dbd6
3 changed files with 46 additions and 19 deletions

View File

@ -127,11 +127,23 @@ pvc:
# devices: Interface devices configuration # devices: Interface devices configuration
devices: devices:
# upstream: Upstream physical interface device # upstream: Upstream physical interface device
upstream: ens4 upstream:
# name: Upstream interface name
name: ens4
# mtu: Upstream interface MTU; use 9000 for jumbo frames (requires switch support)
mtu: 1500
# cluster: Cluster (VNIC) physical interface device # cluster: Cluster (VNIC) physical interface device
cluster: ens4 cluster:
# storage: Storage (Ceph) physical interface device # name: Cluster (VNIC) interface name
storage: ens4 name: ens4
# mtu: Cluster (VNIC) interface MTU; use 9000 for jumbo frames (requires switch support)
mtu: 1500
# storage: Storage (Ceph OSD) physical interface device
storage:
# name: Storace (Ceph OSD) interface name
name: ens4
# mtu: Storage (Ceph OSD) interface MTU; use 9000 for jumbo frames (requires switch support)
mtu: 1500
# addresses: Special network addresses; by-id denotes "address octet equals host number", e.g. .3 for host3 # addresses: Special network addresses; by-id denotes "address octet equals host number", e.g. .3 for host3
addresses: addresses:
# upstream: Network address for upstream network, options: None, by-id, <static>/<mask> # upstream: Network address for upstream network, options: None, by-id, <static>/<mask>

View File

@ -179,11 +179,14 @@ def readConfig(pvcd_config_file, myhostname):
'pdns_postgresql_dbname': o_config['pvc']['coordinator']['dns']['database']['name'], 'pdns_postgresql_dbname': o_config['pvc']['coordinator']['dns']['database']['name'],
'pdns_postgresql_user': o_config['pvc']['coordinator']['dns']['database']['user'], 'pdns_postgresql_user': o_config['pvc']['coordinator']['dns']['database']['user'],
'pdns_postgresql_password': o_config['pvc']['coordinator']['dns']['database']['pass'], 'pdns_postgresql_password': o_config['pvc']['coordinator']['dns']['database']['pass'],
'vni_dev': o_config['pvc']['system']['configuration']['networking']['devices']['cluster'], 'vni_dev': o_config['pvc']['system']['configuration']['networking']['devices']['cluster']['name'],
'vni_mtu': o_config['pvc']['system']['configuration']['networking']['devices']['cluster']['mtu'],
'vni_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['cluster'], 'vni_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['cluster'],
'storage_dev': o_config['pvc']['system']['configuration']['networking']['devices']['storage'], 'storage_dev': o_config['pvc']['system']['configuration']['networking']['devices']['storage']['name'],
'storage_mtu': o_config['pvc']['system']['configuration']['networking']['devices']['storage']['mtu'],
'storage_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['storage'], 'storage_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['storage'],
'upstream_dev': o_config['pvc']['system']['configuration']['networking']['devices']['upstream'], 'upstream_dev': o_config['pvc']['system']['configuration']['networking']['devices']['upstream']['name'],
'upstream_mtu': o_config['pvc']['system']['configuration']['networking']['devices']['upstream']['mtu'],
'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['upstream'], 'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['upstream'],
} }
except Exception as e: except Exception as e:
@ -311,33 +314,37 @@ logger.out('Starting pvcd on host {}'.format(myfqdn), state='s')
if enable_networking: if enable_networking:
# VNI configuration # VNI configuration
vni_dev = config['vni_dev'] vni_dev = config['vni_dev']
vni_mtu = config['vni_mtu']
vni_dev_ip = config['vni_dev_ip'] vni_dev_ip = config['vni_dev_ip']
logger.out('Setting up VNI network interface {}'.format(vni_dev, vni_dev_ip), state='i') logger.out('Setting up VNI network interface {}'.format(vni_dev, vni_dev_ip), state='i')
common.run_os_command('ip link set {} mtu 9000 up'.format(vni_dev)) common.run_os_command('ip link set {} mtu {} up'.format(vni_dev, vni_mtu))
# Cluster bridge configuration # Cluster bridge configuration
logger.out('Setting up Cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i') logger.out('Setting up Cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
common.run_os_command('brctl addbr brcluster') common.run_os_command('brctl addbr brcluster')
common.run_os_command('brctl addif brcluster {}'.format(vni_dev)) common.run_os_command('brctl addif brcluster {}'.format(vni_dev))
common.run_os_command('ip link set brcluster mtu 9000 up') common.run_os_command('ip link set brcluster mtu {} up'.format(vni_mtu))
common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster')) common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster'))
# Storage configuration # Storage configuration
storage_dev = config['storage_dev'] storage_dev = config['storage_dev']
storage_mtu = config['storage_mtu']
if storage_dev == vni_dev: if storage_dev == vni_dev:
storage_dev = 'brcluster' storage_dev = 'brcluster'
storage_mtu = vni_mtu
storage_dev_ip = config['storage_dev_ip'] storage_dev_ip = config['storage_dev_ip']
logger.out('Setting up Storage network on interface {} with IP {}'.format(storage_dev, storage_dev_ip), state='i') logger.out('Setting up Storage network on interface {} with IP {}'.format(storage_dev, storage_dev_ip), state='i')
common.run_os_command('ip link set {} mtu 9000 up'.format(storage_dev)) common.run_os_command('ip link set {} mtu {} up'.format(storage_dev, storage_mtu))
common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, storage_dev)) common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, storage_dev))
# Upstream configuration # Upstream configuration
if config['upstream_dev']: if config['upstream_dev']:
upstream_dev = config['upstream_dev'] upstream_dev = config['upstream_dev']
upstream_mtu = config['upstream_mtu']
upstream_dev_ip = config['upstream_dev_ip'] upstream_dev_ip = config['upstream_dev_ip']
upstream_dev_gateway = config['upstream_gateway'] upstream_dev_gateway = config['upstream_gateway']
logger.out('Setting up Upstream network on interface {} with IP {}'.format(upstream_dev, upstream_dev_ip), state='i') logger.out('Setting up Upstream network on interface {} with IP {}'.format(upstream_dev, upstream_dev_ip), state='i')
common.run_os_command('ip link set {} up'.format(upstream_dev)) common.run_os_command('ip link set {} mtu {} up'.format(upstream_dev, upstream_mtu))
common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, upstream_dev)) common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, upstream_dev))
if upstream_dev_gateway: if upstream_dev_gateway:
common.run_os_command('ip route add default via {} dev {}'.format(upstream_dev_gateway, upstream_dev)) common.run_os_command('ip route add default via {} dev {}'.format(upstream_dev_gateway, upstream_dev))

View File

@ -437,14 +437,18 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.vlan_nic self.vlan_nic
) )
) )
vx_mtu = self.config.vni_mtu
common.run_os_command( common.run_os_command(
'ip link set {} mtu 8800 up'.format( 'ip link set {} mtu {} up'.format(
self.vlan_nic self.vlan_nic,
vx_mtu
) )
) )
common.run_os_command( common.run_os_command(
'ip link set {} mtu 8800 up'.format( 'ip link set {} mtu {} up'.format(
self.bridge_nic self.bridge_nic,
vx_mtu
) )
) )
common.run_os_command( common.run_os_command(
@ -481,14 +485,18 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.vxlan_nic self.vxlan_nic
) )
) )
vx_mtu = self.config.vni_mtu - 50
common.run_os_command( common.run_os_command(
'ip link set {} mtu 8800 up'.format( 'ip link set {} mtu {} up'.format(
self.vxlan_nic self.vxlan_nic,
vx_mtu
) )
) )
common.run_os_command( common.run_os_command(
'ip link set {} mtu 8800 up'.format( 'ip link set {} mtu {} up'.format(
self.bridge_nic self.bridge_nic,
vx_mtu
) )
) )
common.run_os_command( common.run_os_command(