diff --git a/debian/control b/debian/control index db76ceaa..9e4608ed 100644 --- a/debian/control +++ b/debian/control @@ -8,7 +8,7 @@ X-Python3-Version: >= 3.2 Package: pvc-daemon-node Architecture: all -Depends: systemd, pvc-daemon-common, python3-kazoo, python3-psutil, python3-apscheduler, python3-libvirt, python3-psycopg2, python3-dnspython, python3-yaml, python3-distutils, python3-rados, python3-gevent, ipmitool, libvirt-daemon-system, arping, vlan, bridge-utils, dnsmasq, nftables, pdns-server, pdns-backend-pgsql +Depends: systemd, pvc-daemon-common, python3-kazoo, python3-psutil, python3-apscheduler, python3-libvirt, python3-psycopg2, python3-dnspython, python3-yaml, python3-distutils, python3-gevent, ipmitool, libvirt-daemon-system, arping, vlan, bridge-utils, dnsmasq, nftables, pdns-server, pdns-backend-pgsql Suggests: pvc-client-api, pvc-client-cli Description: Parallel Virtual Cluster node daemon (Python 3) A KVM/Zookeeper/Ceph-based VM and private cloud manager diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index cfccfa7b..64be82cc 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -38,7 +38,6 @@ from apscheduler.schedulers.background import BackgroundScheduler from distutils.util import strtobool from queue import Queue from xml.etree import ElementTree -from rados import Rados from daemon_lib.zkhandler import ZKHandler @@ -1314,24 +1313,13 @@ def collect_ceph_stats(queue): if debug: logger.out("Thread starting", state='d', prefix='ceph-thread') - # Connect to the Ceph cluster - try: - ceph_conn = Rados(conffile=config['ceph_config_file'], conf=dict(keyring=config['ceph_admin_keyring'])) - if debug: - logger.out("Connecting to cluster", state='d', prefix='ceph-thread') - ceph_conn.connect(timeout=1) - except Exception as e: - logger.out('Failed to open connection to Ceph cluster: {}'.format(e), state='e') - return - if debug: logger.out("Getting health stats from monitor", state='d', prefix='ceph-thread') # Get Ceph cluster health for local status output - command = {"prefix": "health", "format": "json"} + _, stdout, _ = common.run_os_command('ceph health --format json', timeout=1) try: - health_status = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1]) - ceph_health = health_status['status'] + ceph_health = json.loads(stdout)['status'] except Exception as e: logger.out('Failed to obtain Ceph health data: {}'.format(e), state='e') return @@ -1348,8 +1336,7 @@ def collect_ceph_stats(queue): if debug: logger.out("Set ceph health information in zookeeper (primary only)", state='d', prefix='ceph-thread') - command = {"prefix": "status", "format": "pretty"} - ceph_status = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii') + _, ceph_status, _ = common.run_os_command('ceph status --format plain', timeout=1) try: zkhandler.write([ ('base.storage', str(ceph_status)) @@ -1362,8 +1349,7 @@ def collect_ceph_stats(queue): logger.out("Set ceph rados df information in zookeeper (primary only)", state='d', prefix='ceph-thread') # Get rados df info - command = {"prefix": "df", "format": "pretty"} - ceph_df = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii') + _, ceph_df, _ = common.run_os_command('ceph df --format plain', timeout=1) try: zkhandler.write([ ('base.storage.util', str(ceph_df)) @@ -1376,14 +1362,14 @@ def collect_ceph_stats(queue): logger.out("Set pool information in zookeeper (primary only)", state='d', prefix='ceph-thread') # Get pool info - retcode, stdout, stderr = common.run_os_command('ceph df --format json', timeout=1) + _, stdout, _ = common.run_os_command('ceph df --format json', timeout=1) try: ceph_pool_df_raw = json.loads(stdout)['pools'] except Exception as e: logger.out('Failed to obtain Pool data (ceph df): {}'.format(e), state='w') ceph_pool_df_raw = [] - retcode, stdout, stderr = common.run_os_command('rados df --format json', timeout=1) + _, stdout, _ = common.run_os_command('rados df --format json', timeout=1) try: rados_pool_df_raw = json.loads(stdout)['pools'] except Exception as e: @@ -1448,9 +1434,8 @@ def collect_ceph_stats(queue): # Parse the dump data osd_dump = dict() - command = {"prefix": "osd dump", "format": "json"} + _, stdout, _ = common.run_os_command('ceph osd dump --format json --connect-timeout 1', timeout=1) try: - retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json --connect-timeout 2', timeout=2) osd_dump_raw = json.loads(stdout)['osds'] except Exception as e: logger.out('Failed to obtain OSD data: {}'.format(e), state='w') @@ -1474,9 +1459,9 @@ def collect_ceph_stats(queue): osd_df = dict() - command = {"prefix": "osd df", "format": "json"} + _, osd_df_out, _ = common.run_os_command('ceph osd df --format json', timeout=1) try: - osd_df_raw = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])['nodes'] + osd_df_raw = json.loads(osd_df_out)['nodes'] except Exception as e: logger.out('Failed to obtain OSD data: {}'.format(e), state='w') osd_df_raw = [] @@ -1501,12 +1486,10 @@ def collect_ceph_stats(queue): osd_status = dict() - command = {"prefix": "osd status", "format": "pretty"} - try: - osd_status_raw = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii') - except Exception as e: - logger.out('Failed to obtain OSD status data: {}'.format(e), state='w') - osd_status_raw = [] + retcode, osd_status_raw, stderr = common.run_os_command('ceph osd status --format plain', timeout=1) + if retcode != 0: + logger.out('Failed to obtain OSD status data: {}'.format(stderr), state='w') + osd_status_raw = '' if debug: logger.out("Loop through OSD status data", state='d', prefix='ceph-thread') @@ -1573,8 +1556,6 @@ def collect_ceph_stats(queue): # One or more of the status commands timed out, just continue logger.out('Failed to upload OSD stats from dictionary: {}'.format(e), state='w') - ceph_conn.shutdown() - queue.put(ceph_health_colour) queue.put(ceph_health) queue.put(osds_this_node)