pvc/node-daemon/pvcnoded/CephInstance.py

429 lines
16 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2019-10-10 14:09:07 -04:00
# CephInstance.py - Class implementing a PVC node Ceph instance
# Part of the Parallel Virtual Cluster (PVC) system
#
2021-03-25 17:01:55 -04:00
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import time
import json
import psutil
2021-06-01 12:17:25 -04:00
import daemon_lib.common as common
class CephOSDInstance(object):
2021-05-31 19:51:27 -04:00
def __init__(self, zkhandler, this_node, osd_id):
self.zkhandler = zkhandler
self.this_node = this_node
self.osd_id = osd_id
self.node = None
self.size = None
self.stats = dict()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.node', self.osd_id))
2018-10-30 09:17:41 -04:00
def watch_osd_node(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
except AttributeError:
data = ''
if data and data != self.node:
self.node = data
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.stats', self.osd_id))
2018-10-30 09:17:41 -04:00
def watch_osd_stats(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
except AttributeError:
data = ''
if data and data != self.stats:
self.stats = json.loads(data)
2021-05-31 19:51:27 -04:00
def add_osd(zkhandler, logger, node, device, weight):
2018-10-30 09:17:41 -04:00
# We are ready to create a new OSD on this node
2019-07-12 09:58:01 -04:00
logger.out('Creating new OSD disk on block device {}'.format(device), state='i')
try:
# 1. Create an OSD; we do this so we know what ID will be gen'd
retcode, stdout, stderr = common.run_os_command('ceph osd create')
if retcode:
print('ceph osd create')
print(stdout)
print(stderr)
raise
osd_id = stdout.rstrip()
# 2. Remove that newly-created OSD
retcode, stdout, stderr = common.run_os_command('ceph osd rm {}'.format(osd_id))
if retcode:
print('ceph osd rm')
print(stdout)
print(stderr)
raise
# 3a. Zap the disk to ensure it is ready to go
logger.out('Zapping disk {}'.format(device), state='i')
retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(device))
if retcode:
print('ceph-volume lvm zap')
print(stdout)
print(stderr)
raise
# 3b. Create the OSD for real
logger.out('Preparing LVM for new OSD disk with ID {} on {}'.format(osd_id, device), state='i')
retcode, stdout, stderr = common.run_os_command(
'ceph-volume lvm prepare --bluestore --data {device}'.format(
osdid=osd_id,
device=device
)
)
if retcode:
print('ceph-volume lvm prepare')
print(stdout)
print(stderr)
raise
2019-06-18 20:22:28 -04:00
# 4a. Get OSD FSID
logger.out('Getting OSD FSID for ID {} on {}'.format(osd_id, device), state='i')
retcode, stdout, stderr = common.run_os_command(
2019-06-18 20:22:28 -04:00
'ceph-volume lvm list {device}'.format(
osdid=osd_id,
device=device
)
)
for line in stdout.split('\n'):
if 'osd fsid' in line:
osd_fsid = line.split()[-1]
2019-06-18 20:22:28 -04:00
if not osd_fsid:
print('ceph-volume lvm list')
print('Could not find OSD fsid in data:')
print(stdout)
print(stderr)
raise
# 4b. Activate the OSD
logger.out('Activating new OSD disk with ID {}'.format(osd_id, device), state='i')
2019-06-18 20:22:28 -04:00
retcode, stdout, stderr = common.run_os_command(
'ceph-volume lvm activate --bluestore {osdid} {osdfsid}'.format(
osdid=osd_id,
osdfsid=osd_fsid
)
)
if retcode:
print('ceph-volume lvm activate')
print(stdout)
print(stderr)
raise
# 5. Add it to the crush map
2019-07-12 09:58:01 -04:00
logger.out('Adding new OSD disk with ID {} to CRUSH map'.format(osd_id), state='i')
retcode, stdout, stderr = common.run_os_command(
'ceph osd crush add osd.{osdid} {weight} root=default host={node}'.format(
osdid=osd_id,
weight=weight,
node=node
)
)
if retcode:
print('ceph osd crush add')
print(stdout)
print(stderr)
raise
time.sleep(0.5)
# 6. Verify it started
retcode, stdout, stderr = common.run_os_command(
'systemctl status ceph-osd@{osdid}'.format(
osdid=osd_id
)
)
if retcode:
print('systemctl status')
print(stdout)
print(stderr)
raise
# 7. Add the new OSD to the list
logger.out('Adding new OSD disk with ID {} to Zookeeper'.format(osd_id), state='i')
2021-05-31 19:51:27 -04:00
zkhandler.write([
(('osd', osd_id), ''),
(('osd.node', osd_id), node),
(('osd.device', osd_id), device),
(('osd.stats', osd_id), '{}'),
2021-05-31 19:51:27 -04:00
])
# Log it
logger.out('Created new OSD disk with ID {}'.format(osd_id), state='o')
return True
except Exception as e:
# Log it
logger.out('Failed to create new OSD disk: {}'.format(e), state='e')
return False
2021-05-31 19:51:27 -04:00
def remove_osd(zkhandler, logger, osd_id, osd_obj):
logger.out('Removing OSD disk {}'.format(osd_id), state='i')
try:
# 1. Verify the OSD is present
retcode, stdout, stderr = common.run_os_command('ceph osd ls')
osd_list = stdout.split('\n')
if osd_id not in osd_list:
logger.out('Could not find OSD {} in the cluster'.format(osd_id), state='e')
return True
# 1. Set the OSD out so it will flush
logger.out('Setting out OSD disk with ID {}'.format(osd_id), state='i')
retcode, stdout, stderr = common.run_os_command('ceph osd out {}'.format(osd_id))
if retcode:
print('ceph osd out')
print(stdout)
print(stderr)
raise
# 2. Wait for the OSD to flush
logger.out('Flushing OSD disk with ID {}'.format(osd_id), state='i')
osd_string = str()
while True:
2019-10-22 12:51:29 -04:00
try:
retcode, stdout, stderr = common.run_os_command('ceph pg dump osds --format json')
dump_string = json.loads(stdout)
for osd in dump_string:
if str(osd['osd']) == osd_id:
osd_string = osd
num_pgs = osd_string['num_pgs']
if num_pgs > 0:
time.sleep(5)
2019-10-22 12:51:29 -04:00
else:
raise
2020-11-06 18:55:10 -05:00
except Exception:
2019-10-22 12:51:29 -04:00
break
# 3. Stop the OSD process and wait for it to be terminated
logger.out('Stopping OSD disk with ID {}'.format(osd_id), state='i')
retcode, stdout, stderr = common.run_os_command('systemctl stop ceph-osd@{}'.format(osd_id))
if retcode:
print('systemctl stop')
print(stdout)
print(stderr)
raise
# FIXME: There has to be a better way to do this /shrug
while True:
is_osd_up = False
# Find if there is a process named ceph-osd with arg '--id {id}'
for p in psutil.process_iter(attrs=['name', 'cmdline']):
if 'ceph-osd' == p.info['name'] and '--id {}'.format(osd_id) in ' '.join(p.info['cmdline']):
is_osd_up = True
# If there isn't, continue
if not is_osd_up:
break
2019-06-19 00:23:14 -04:00
# 4. Determine the block devices
retcode, stdout, stderr = common.run_os_command('readlink /var/lib/ceph/osd/ceph-{}/block'.format(osd_id))
vg_name = stdout.split('/')[-2] # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
retcode, stdout, stderr = common.run_os_command('vgs --separator , --noheadings -o pv_name {}'.format(vg_name))
2019-06-18 20:34:25 -04:00
pv_block = stdout.strip()
2019-06-19 00:23:14 -04:00
# 5. Zap the volumes
logger.out('Zapping OSD disk with ID {} on {}'.format(osd_id, pv_block), state='i')
retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(pv_block))
if retcode:
print('ceph-volume lvm zap')
print(stdout)
print(stderr)
raise
2019-06-19 00:23:14 -04:00
# 6. Purge the OSD from Ceph
logger.out('Purging OSD disk with ID {}'.format(osd_id), state='i')
retcode, stdout, stderr = common.run_os_command('ceph osd purge {} --yes-i-really-mean-it'.format(osd_id))
if retcode:
print('ceph osd purge')
print(stdout)
print(stderr)
raise
2019-06-19 00:23:14 -04:00
# 7. Delete OSD from ZK
logger.out('Deleting OSD disk with ID {} from Zookeeper'.format(osd_id), state='i')
zkhandler.delete(('osd', osd_id), recursive=True)
2019-06-19 00:23:14 -04:00
# Log it
logger.out('Removed OSD disk with ID {}'.format(osd_id), state='o')
return True
except Exception as e:
# Log it
logger.out('Failed to purge OSD disk with ID {}: {}'.format(osd_id, e), state='e')
return False
2018-10-31 23:38:17 -04:00
class CephPoolInstance(object):
2021-05-31 19:51:27 -04:00
def __init__(self, zkhandler, this_node, name):
self.zkhandler = zkhandler
2018-10-31 23:38:17 -04:00
self.this_node = this_node
self.name = name
self.pgs = ''
self.stats = dict()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.pgs', self.name))
2018-10-31 23:38:17 -04:00
def watch_pool_node(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
except AttributeError:
data = ''
if data and data != self.pgs:
self.pgs = data
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.stats', self.name))
2018-10-31 23:38:17 -04:00
def watch_pool_stats(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
except AttributeError:
data = ''
if data and data != self.stats:
self.stats = json.loads(data)
class CephVolumeInstance(object):
2021-05-31 19:51:27 -04:00
def __init__(self, zkhandler, this_node, pool, name):
self.zkhandler = zkhandler
self.this_node = this_node
self.pool = pool
self.name = name
self.stats = dict()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('volume.stats', f'{self.pool}/{self.name}'))
def watch_volume_stats(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
except AttributeError:
data = ''
if data and data != self.stats:
self.stats = json.loads(data)
class CephSnapshotInstance(object):
2021-05-31 19:51:27 -04:00
def __init__(self, zkhandler, this_node, pool, volume, name):
self.zkhandler = zkhandler
self.this_node = this_node
self.pool = pool
self.volume = volume
self.name = name
self.stats = dict()
2021-06-10 00:36:02 -04:00
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('snapshot.stats', f'{self.pool}/{self.volume}/{self.name}'))
def watch_snapshot_stats(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
except AttributeError:
data = ''
if data and data != self.stats:
self.stats = json.loads(data)
# Primary command function
# This command pipe is only used for OSD adds and removes
2021-05-31 19:51:27 -04:00
def run_command(zkhandler, logger, this_node, data, d_osd):
2018-11-23 20:02:50 -05:00
# Get the command and args
command, args = data.split()
# Adding a new OSD
if command == 'osd_add':
node, device, weight = args.split(',')
2019-06-18 13:43:54 -04:00
if node == this_node.name:
2018-11-23 20:02:50 -05:00
# Lock the command queue
zk_lock = zkhandler.writelock('base.cmd.ceph')
2018-11-23 20:02:50 -05:00
with zk_lock:
# Add the OSD
2021-05-31 19:51:27 -04:00
result = add_osd(zkhandler, logger, node, device, weight)
2018-11-23 20:02:50 -05:00
# Command succeeded
if result:
# Update the command queue
2021-05-31 19:51:27 -04:00
zkhandler.write([
('base.cmd.ceph', 'success-{}'.format(data))
2021-05-31 19:51:27 -04:00
])
2018-11-23 20:02:50 -05:00
# Command failed
else:
# Update the command queue
2021-05-31 19:51:27 -04:00
zkhandler.write([
('base.cmd.ceph', 'failure-{}'.format(data))
2021-05-31 19:51:27 -04:00
])
2018-11-23 20:02:50 -05:00
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
time.sleep(1)
# Removing an OSD
elif command == 'osd_remove':
osd_id = args
# Verify osd_id is in the list
2019-06-18 13:43:54 -04:00
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
2018-11-23 20:02:50 -05:00
# Lock the command queue
zk_lock = zkhandler.writelock('base.cmd.ceph')
2018-11-23 20:02:50 -05:00
with zk_lock:
# Remove the OSD
2021-05-31 19:51:27 -04:00
result = remove_osd(zkhandler, logger, osd_id, d_osd[osd_id])
2018-11-23 20:02:50 -05:00
# Command succeeded
if result:
# Update the command queue
2021-05-31 19:51:27 -04:00
zkhandler.write([
('base.cmd.ceph', 'success-{}'.format(data))
2021-05-31 19:51:27 -04:00
])
2018-11-23 20:02:50 -05:00
# Command failed
else:
# Update the command queue
2021-05-31 19:51:27 -04:00
zkhandler.write([
('base.cmd.ceph', 'failure-{}'.format(data))
2021-05-31 19:51:27 -04:00
])
2018-11-23 20:02:50 -05:00
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
time.sleep(1)