diff --git a/node-daemon/pvcnoded.sample.yaml b/node-daemon/pvcnoded.sample.yaml index c9dd3717..e1475927 100644 --- a/node-daemon/pvcnoded.sample.yaml +++ b/node-daemon/pvcnoded.sample.yaml @@ -96,6 +96,8 @@ pvc: system: # intervals: Intervals for keepalives and fencing intervals: + # vm_shutdown_timeout: Number of seconds for a VM to 'shutdown' before being forced off + vm_shutdown_timeout: 180 # keepalive_interval: Number of seconds between keepalive/status updates keepalive_interval: 5 # fence_intervals: Number of keepalive_intervals to declare a node dead and fence it diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 7e269cdd..e4415090 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -155,6 +155,7 @@ def readConfig(pvcnoded_config_file, myhostname): 'log_keepalive_cluster_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_cluster_details'], 'log_keepalive_storage_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_storage_details'], 'console_log_lines': o_config['pvc']['system']['configuration']['logging']['console_log_lines'], + 'vm_shutdown_timeout': int(o_config['pvc']['system'['intervals']['vm_shutdown_timeout']), 'keepalive_interval': int(o_config['pvc']['system']['intervals']['keepalive_interval']), 'fence_intervals': int(o_config['pvc']['system']['intervals']['fence_intervals']), 'suicide_intervals': int(o_config['pvc']['system']['intervals']['suicide_intervals']), diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index 81d4584b..a6e32b16 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -301,8 +301,8 @@ class VMInstance(object): self.dom.shutdown() tick = 0 while True: - tick += 2 - time.sleep(2) + tick += 1 + time.sleep(1) # Abort shutdown if the state changes to start current_state = zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid)) @@ -325,9 +325,8 @@ class VMInstance(object): self.console_log_instance.stop() break - # HARDCODE: 90s is a reasonable amount of time for any operating system to shut down cleanly - if tick >= 90: - self.logger.out('Shutdown timeout expired', state='e', prefix='Domain {}:'.format(self.domuuid)) + if tick >= self.config['vm_shutdown_timeout']: + self.logger.out('Shutdown timeout ({}s) expired, forcing off'.format(self.config['vm_shutdown_timeout']), state='e', prefix='Domain {}:'.format(self.domuuid)) zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'stop' }) break