Compare commits

...

7 Commits

Author SHA1 Message Date
d63cc2e661 Bump version to 0.9.94 2024-02-06 13:31:50 -05:00
67ec41aaf9 Fix invalid memory errors for stopped VMs 2024-02-06 13:30:48 -05:00
a95e72008e Add size validations for volume clones
Adds the same validations as a volume add or resize to volume clones, to
ensure there is enough free space for them.
2024-02-02 11:37:29 -05:00
efc7434143 Add safety check for 80% full size
Adds a check that a volume creation or resize won't violate the 80% full
rule for the storage cluster. This ensures a cluster won't get too full
if a storage volume fills up.

Also adds a force flag throughout the pipeline to override this check,
should an administrator really want to do so.

Closes #177
2024-02-02 11:37:00 -05:00
c473dcca81 Fix errors with autobackup email summary
How this was being done didn't work, as the backup volume was already
unmounted when we tried to read the backups from it. Instead, populate
the backup summary earlier in the run, during the actual backup.
2024-02-02 09:31:16 -05:00
18f09196be Bump version to 0.9.93 2024-01-30 09:51:21 -05:00
8419659e1b Ensure zkhandler is always cleaned up
Even if the subfunction of an API @ZKConnection call fails, the
zkhandler needs to terminate and clean up, or it leaves stuck threads
around.
2024-01-30 09:48:17 -05:00
16 changed files with 227 additions and 60 deletions

View File

@ -1 +1 @@
0.9.92
0.9.94

View File

@ -1,5 +1,16 @@
## PVC Changelog
###### [v0.9.94](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.94)
* [CLI Client] Fixes an incorrect ordering issue with autobackup summary emails
* [API Daemon/CLI Client] Adds an additional safety check for 80% cluster fullness when doing volume adds or resizes
* [API Daemon/CLI Client] Adds safety checks to volume clones as well
* [API Daemon] Fixes a few remaining memory bugs for stopped/disabled VMs
###### [v0.9.93](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.93)
* [API Daemon] Fixes a bug where stuck zkhandler threads were not cleaned up on error
###### [v0.9.92](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.92)
* [CLI Client] Adds the new restore state to the colours list for VM status

View File

@ -27,7 +27,7 @@ from distutils.util import strtobool as dustrtobool
import daemon_lib.config as cfg
# Daemon version
version = "0.9.92"
version = "0.9.94"
# API version
API_VERSION = 1.0

View File

@ -5744,6 +5744,10 @@ class API_Storage_Ceph_Volume_Root(Resource):
"required": True,
"helptext": "A volume size in bytes (B implied or with SI suffix k/M/G/T) must be specified.",
},
{
"name": "force",
"required": False,
},
]
)
@Authenticator
@ -5769,6 +5773,12 @@ class API_Storage_Ceph_Volume_Root(Resource):
type: string
required: true
description: The volume size, in bytes (B implied) or with a single-character SI suffix (k/M/G/T)
- in: query
name: force
type: boolean
required: false
default: flase
description: Force action if volume creation would violate 80% full soft cap on the pool
responses:
200:
description: OK
@ -5785,6 +5795,7 @@ class API_Storage_Ceph_Volume_Root(Resource):
reqargs.get("pool", None),
reqargs.get("volume", None),
reqargs.get("size", None),
reqargs.get("force", False),
)
@ -5819,7 +5830,11 @@ class API_Storage_Ceph_Volume_Element(Resource):
"name": "size",
"required": True,
"helptext": "A volume size in bytes (or with k/M/G/T suffix) must be specified.",
}
},
{
"name": "force",
"required": False,
},
]
)
@Authenticator
@ -5835,6 +5850,12 @@ class API_Storage_Ceph_Volume_Element(Resource):
type: string
required: true
description: The volume size in bytes (or with a metric suffix, i.e. k/M/G/T)
- in: query
name: force
type: boolean
required: false
default: flase
description: Force action if volume creation would violate 80% full soft cap on the pool
responses:
200:
description: OK
@ -5852,9 +5873,17 @@ class API_Storage_Ceph_Volume_Element(Resource):
type: object
id: Message
"""
return api_helper.ceph_volume_add(pool, volume, reqargs.get("size", None))
return api_helper.ceph_volume_add(
pool, volume, reqargs.get("size", None), reqargs.get("force", False)
)
@RequestParser([{"name": "new_size"}, {"name": "new_name"}])
@RequestParser(
[
{"name": "new_size"},
{"name": "new_name"},
{"name": "force", "required": False},
]
)
@Authenticator
def put(self, pool, volume, reqargs):
"""
@ -5873,6 +5902,12 @@ class API_Storage_Ceph_Volume_Element(Resource):
type: string
required: false
description: The new volume name
- in: query
name: force
type: boolean
required: false
default: flase
description: Force action if new volume size would violate 80% full soft cap on the pool
responses:
200:
description: OK
@ -5894,7 +5929,9 @@ class API_Storage_Ceph_Volume_Element(Resource):
return {"message": "Can only perform one modification at once"}, 400
if reqargs.get("new_size", None):
return api_helper.ceph_volume_resize(pool, volume, reqargs.get("new_size"))
return api_helper.ceph_volume_resize(
pool, volume, reqargs.get("new_size"), reqargs.get("force", False)
)
if reqargs.get("new_name", None):
return api_helper.ceph_volume_rename(pool, volume, reqargs.get("new_name"))
return {"message": "At least one modification must be specified"}, 400
@ -5935,7 +5972,11 @@ class API_Storage_Ceph_Volume_Element_Clone(Resource):
"name": "new_volume",
"required": True,
"helptext": "A new volume name must be specified.",
}
},
{
"name": "force",
"required": False,
},
]
)
@Authenticator
@ -5951,6 +5992,12 @@ class API_Storage_Ceph_Volume_Element_Clone(Resource):
type: string
required: true
description: The name of the new cloned volume
- in: query
name: force
type: boolean
required: false
default: flase
description: Force action if clone volume size would violate 80% full soft cap on the pool
responses:
200:
description: OK
@ -5969,7 +6016,7 @@ class API_Storage_Ceph_Volume_Element_Clone(Resource):
id: Message
"""
return api_helper.ceph_volume_clone(
pool, reqargs.get("new_volume", None), volume
pool, reqargs.get("new_volume", None), volume, reqargs.get("force", None)
)

View File

@ -1869,11 +1869,13 @@ def ceph_volume_list(zkhandler, pool=None, limit=None, is_fuzzy=True):
@ZKConnection(config)
def ceph_volume_add(zkhandler, pool, name, size):
def ceph_volume_add(zkhandler, pool, name, size, force_flag):
"""
Add a Ceph RBD volume to the PVC Ceph storage cluster.
"""
retflag, retdata = pvc_ceph.add_volume(zkhandler, pool, name, size)
retflag, retdata = pvc_ceph.add_volume(
zkhandler, pool, name, size, force_flag=force_flag
)
if retflag:
retcode = 200
@ -1885,11 +1887,13 @@ def ceph_volume_add(zkhandler, pool, name, size):
@ZKConnection(config)
def ceph_volume_clone(zkhandler, pool, name, source_volume):
def ceph_volume_clone(zkhandler, pool, name, source_volume, force_flag):
"""
Clone a Ceph RBD volume to a new volume on the PVC Ceph storage cluster.
"""
retflag, retdata = pvc_ceph.clone_volume(zkhandler, pool, source_volume, name)
retflag, retdata = pvc_ceph.clone_volume(
zkhandler, pool, source_volume, name, force_flag=force_flag
)
if retflag:
retcode = 200
@ -1901,11 +1905,13 @@ def ceph_volume_clone(zkhandler, pool, name, source_volume):
@ZKConnection(config)
def ceph_volume_resize(zkhandler, pool, name, size):
def ceph_volume_resize(zkhandler, pool, name, size, force_flag):
"""
Resize an existing Ceph RBD volume in the PVC Ceph storage cluster.
"""
retflag, retdata = pvc_ceph.resize_volume(zkhandler, pool, name, size)
retflag, retdata = pvc_ceph.resize_volume(
zkhandler, pool, name, size, force_flag=force_flag
)
if retflag:
retcode = 200

View File

@ -4100,12 +4100,26 @@ def cli_storage_volume():
@click.argument("pool")
@click.argument("name")
@click.argument("size")
def cli_storage_volume_add(pool, name, size):
@click.option(
"-f",
"--force",
"force_flag",
is_flag=True,
default=False,
help="Force creation even if volume would violate 80% full safe free space.",
)
def cli_storage_volume_add(pool, name, size, force_flag):
"""
Add a new Ceph RBD volume in pool POOL with name NAME and size SIZE (in human units, e.g. 1024M, 20G, etc.).
PVC will prevent the creation of a volume who's size is greater than the available free space on the pool. This cannot be overridden.
PVC will prevent the creation of a volume who's size is greater than the 80% full safe free space on the pool. This can be overridden with the "-f"/"--force" option but this may be dangerous!
"""
retcode, retmsg = pvc.lib.storage.ceph_volume_add(CLI_CONFIG, pool, name, size)
retcode, retmsg = pvc.lib.storage.ceph_volume_add(
CLI_CONFIG, pool, name, size, force_flag=force_flag
)
finish(retcode, retmsg)
@ -4171,14 +4185,26 @@ def cli_storage_volume_remove(pool, name):
@click.argument("pool")
@click.argument("name")
@click.argument("size")
@click.option(
"-f",
"--force",
"force_flag",
is_flag=True,
default=False,
help="Force resize even if volume would violate 80% full safe free space.",
)
@confirm_opt("Resize volume {name} in pool {pool} to size {size}")
def cli_storage_volume_resize(pool, name, size):
def cli_storage_volume_resize(pool, name, size, force_flag):
"""
Resize an existing Ceph RBD volume with name NAME in pool POOL to size SIZE (in human units, e.g. 1024M, 20G, etc.).
PVC will prevent the resize of a volume who's new size is greater than the available free space on the pool. This cannot be overridden.
PVC will prevent the resize of a volume who's new size is greater than the 80% full safe free space on the pool. This can be overridden with the "-f"/"--force" option but this may be dangerous!
"""
retcode, retmsg = pvc.lib.storage.ceph_volume_modify(
CLI_CONFIG, pool, name, new_size=size
CLI_CONFIG, pool, name, new_size=size, force_flag=force_flag
)
finish(retcode, retmsg)
@ -4211,13 +4237,25 @@ def cli_storage_volume_rename(pool, name, new_name):
@click.argument("pool")
@click.argument("name")
@click.argument("new_name")
def cli_storage_volume_clone(pool, name, new_name):
@click.option(
"-f",
"--force",
"force_flag",
is_flag=True,
default=False,
help="Force clone even if volume would violate 80% full safe free space.",
)
def cli_storage_volume_clone(pool, name, new_name, force_flag):
"""
Clone a Ceph RBD volume with name NAME in pool POOL to name NEW_NAME in pool POOL.
PVC will prevent the clone of a volume who's new size is greater than the available free space on the pool. This cannot be overridden.
PVC will prevent the clone of a volume who's new size is greater than the 80% full safe free space on the pool. This can be overridden with the "-f"/"--force" option but this may be dangerous!
"""
retcode, retmsg = pvc.lib.storage.ceph_volume_clone(
CLI_CONFIG, pool, name, new_name
CLI_CONFIG, pool, name, new_name, force_flag=force_flag
)
finish(retcode, retmsg)

View File

@ -246,6 +246,8 @@ def vm_autobackup(
Perform automatic backups of VMs based on an external config file.
"""
backup_summary = dict()
if email_report is not None:
from email.utils import formatdate
from socket import gethostname
@ -553,6 +555,8 @@ def vm_autobackup(
with open(autobackup_state_file, "w") as fh:
jdump(state_data, fh)
backup_summary[vm] = tracked_backups
if autobackup_config["auto_mount_enabled"]:
# Execute each unmount_cmds command in sequence
for cmd in autobackup_config["unmount_cmds"]:
@ -588,20 +592,6 @@ def vm_autobackup(
if email_report is not None:
echo(CLI_CONFIG, "")
echo(CLI_CONFIG, f"Sending email summary report to {email_report}")
backup_summary = dict()
for vm in backup_vms:
backup_path = f"{backup_suffixed_path}/{vm}"
autobackup_state_file = f"{backup_path}/.autobackup.json"
if not path.exists(backup_path) or not path.exists(autobackup_state_file):
# There are no new backups so the list is empty
state_data = dict()
tracked_backups = list()
else:
with open(autobackup_state_file) as fh:
state_data = jload(fh)
tracked_backups = state_data["tracked_backups"]
backup_summary[vm] = tracked_backups
current_datetime = datetime.now()
email_datetime = formatdate(float(current_datetime.strftime("%s")))

View File

@ -1172,15 +1172,15 @@ def ceph_volume_list(config, limit, pool):
return False, response.json().get("message", "")
def ceph_volume_add(config, pool, volume, size):
def ceph_volume_add(config, pool, volume, size, force_flag=False):
"""
Add new Ceph volume
API endpoint: POST /api/v1/storage/ceph/volume
API arguments: volume={volume}, pool={pool}, size={size}
API arguments: volume={volume}, pool={pool}, size={size}, force={force_flag}
API schema: {"message":"{data}"}
"""
params = {"volume": volume, "pool": pool, "size": size}
params = {"volume": volume, "pool": pool, "size": size, "force": force_flag}
response = call_api(config, "post", "/storage/ceph/volume", params=params)
if response.status_code == 200:
@ -1261,12 +1261,14 @@ def ceph_volume_remove(config, pool, volume):
return retstatus, response.json().get("message", "")
def ceph_volume_modify(config, pool, volume, new_name=None, new_size=None):
def ceph_volume_modify(
config, pool, volume, new_name=None, new_size=None, force_flag=False
):
"""
Modify Ceph volume
API endpoint: PUT /api/v1/storage/ceph/volume/{pool}/{volume}
API arguments:
API arguments: [new_name={new_name}], [new_size={new_size}], force_flag={force_flag}
API schema: {"message":"{data}"}
"""
@ -1275,6 +1277,7 @@ def ceph_volume_modify(config, pool, volume, new_name=None, new_size=None):
params["new_name"] = new_name
if new_size:
params["new_size"] = new_size
params["force"] = force_flag
response = call_api(
config,
@ -1291,15 +1294,15 @@ def ceph_volume_modify(config, pool, volume, new_name=None, new_size=None):
return retstatus, response.json().get("message", "")
def ceph_volume_clone(config, pool, volume, new_volume):
def ceph_volume_clone(config, pool, volume, new_volume, force_flag=False):
"""
Clone Ceph volume
API endpoint: POST /api/v1/storage/ceph/volume/{pool}/{volume}
API arguments: new_volume={new_volume
API arguments: new_volume={new_volume, force_flag={force_flag}
API schema: {"message":"{data}"}
"""
params = {"new_volume": new_volume}
params = {"new_volume": new_volume, "force_flag": force_flag}
response = call_api(
config,
"post",

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup(
name="pvc",
version="0.9.92",
version="0.9.94",
packages=["pvc.cli", "pvc.lib"],
install_requires=[
"Click",

View File

@ -553,7 +553,7 @@ def getVolumeInformation(zkhandler, pool, volume):
return volume_information
def add_volume(zkhandler, pool, name, size):
def add_volume(zkhandler, pool, name, size, force_flag=False):
# 1. Verify the size of the volume
pool_information = getPoolInformation(zkhandler, pool)
size_bytes = format_bytes_fromhuman(size)
@ -563,12 +563,27 @@ def add_volume(zkhandler, pool, name, size):
f"ERROR: Requested volume size '{size}' does not have a valid SI unit",
)
if size_bytes >= int(pool_information["stats"]["free_bytes"]):
pool_total_free_bytes = int(pool_information["stats"]["free_bytes"])
if size_bytes >= pool_total_free_bytes:
return (
False,
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')",
)
# Check if we're greater than 80% utilization after the create; error if so unless we have the force flag
pool_total_bytes = (
int(pool_information["stats"]["used_bytes"]) + pool_total_free_bytes
)
pool_safe_total_bytes = int(pool_total_bytes * 0.80)
pool_safe_free_bytes = pool_safe_total_bytes - int(
pool_information["stats"]["used_bytes"]
)
if size_bytes >= pool_safe_free_bytes and not force_flag:
return (
False,
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the safe free space in the pool ('{format_bytes_tohuman(pool_safe_free_bytes)}' for 80% full); retry with force to ignore this error",
)
# 2. Create the volume
retcode, stdout, stderr = common.run_os_command(
"rbd create --size {}B {}/{}".format(size_bytes, pool, name)
@ -596,13 +611,39 @@ def add_volume(zkhandler, pool, name, size):
)
def clone_volume(zkhandler, pool, name_src, name_new):
def clone_volume(zkhandler, pool, name_src, name_new, force_flag=False):
# 1. Verify the volume
if not verifyVolume(zkhandler, pool, name_src):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
name_src, pool
)
# 1. Clone the volume
volume_stats_raw = zkhandler.read(("volume.stats", f"{pool}/{name_src}"))
volume_stats = dict(json.loads(volume_stats_raw))
size_bytes = volume_stats["size"]
pool_information = getPoolInformation(zkhandler, pool)
pool_total_free_bytes = int(pool_information["stats"]["free_bytes"])
if size_bytes >= pool_total_free_bytes:
return (
False,
f"ERROR: Clone volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')",
)
# Check if we're greater than 80% utilization after the create; error if so unless we have the force flag
pool_total_bytes = (
int(pool_information["stats"]["used_bytes"]) + pool_total_free_bytes
)
pool_safe_total_bytes = int(pool_total_bytes * 0.80)
pool_safe_free_bytes = pool_safe_total_bytes - int(
pool_information["stats"]["used_bytes"]
)
if size_bytes >= pool_safe_free_bytes and not force_flag:
return (
False,
f"ERROR: Clone volume size '{format_bytes_tohuman(size_bytes)}' is greater than the safe free space in the pool ('{format_bytes_tohuman(pool_safe_free_bytes)}' for 80% full); retry with force to ignore this error",
)
# 2. Clone the volume
retcode, stdout, stderr = common.run_os_command(
"rbd copy {}/{} {}/{}".format(pool, name_src, pool, name_new)
)
@ -614,13 +655,13 @@ def clone_volume(zkhandler, pool, name_src, name_new):
),
)
# 2. Get volume stats
# 3. Get volume stats
retcode, stdout, stderr = common.run_os_command(
"rbd info --format json {}/{}".format(pool, name_new)
)
volstats = stdout
# 3. Add the new volume to Zookeeper
# 4. Add the new volume to Zookeeper
zkhandler.write(
[
(("volume", f"{pool}/{name_new}"), ""),
@ -634,7 +675,7 @@ def clone_volume(zkhandler, pool, name_src, name_new):
)
def resize_volume(zkhandler, pool, name, size):
def resize_volume(zkhandler, pool, name, size, force_flag=False):
if not verifyVolume(zkhandler, pool, name):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
name, pool
@ -649,12 +690,27 @@ def resize_volume(zkhandler, pool, name, size):
f"ERROR: Requested volume size '{size}' does not have a valid SI unit",
)
if size_bytes >= int(pool_information["stats"]["free_bytes"]):
pool_total_free_bytes = int(pool_information["stats"]["free_bytes"])
if size_bytes >= pool_total_free_bytes:
return (
False,
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')",
)
# Check if we're greater than 80% utilization after the create; error if so unless we have the force flag
pool_total_bytes = (
int(pool_information["stats"]["used_bytes"]) + pool_total_free_bytes
)
pool_safe_total_bytes = int(pool_total_bytes * 0.80)
pool_safe_free_bytes = pool_safe_total_bytes - int(
pool_information["stats"]["used_bytes"]
)
if size_bytes >= pool_safe_free_bytes and not force_flag:
return (
False,
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the safe free space in the pool ('{format_bytes_tohuman(pool_safe_free_bytes)}' for 80% full); retry with force to ignore this error",
)
# 2. Resize the volume
retcode, stdout, stderr = common.run_os_command(
"rbd resize --size {} {}/{}".format(

View File

@ -1230,7 +1230,7 @@ def get_resource_metrics(zkhandler):
)
output_lines.append("# TYPE pvc_vm_memory_stats_actual gauge")
for vm in vm_data:
actual_memory = vm["memory_stats"]["actual"]
actual_memory = vm["memory_stats"].get("actual", 0)
output_lines.append(
f"pvc_vm_memory_stats_actual{{vm=\"{vm['name']}\"}} {actual_memory}"
)
@ -1238,7 +1238,7 @@ def get_resource_metrics(zkhandler):
output_lines.append("# HELP pvc_vm_memory_stats_rss PVC VM RSS memory KB")
output_lines.append("# TYPE pvc_vm_memory_stats_rss gauge")
for vm in vm_data:
rss_memory = vm["memory_stats"]["rss"]
rss_memory = vm["memory_stats"].get("rss", 0)
output_lines.append(
f"pvc_vm_memory_stats_rss{{vm=\"{vm['name']}\"}} {rss_memory}"
)

View File

@ -57,10 +57,11 @@ class ZKConnection(object):
schema_version = 0
zkhandler.schema.load(schema_version, quiet=True)
ret = function(zkhandler, *args, **kwargs)
zkhandler.disconnect()
del zkhandler
try:
ret = function(zkhandler, *args, **kwargs)
finally:
zkhandler.disconnect()
del zkhandler
return ret

15
debian/changelog vendored
View File

@ -1,3 +1,18 @@
pvc (0.9.94-0) unstable; urgency=high
* [CLI Client] Fixes an incorrect ordering issue with autobackup summary emails
* [API Daemon/CLI Client] Adds an additional safety check for 80% cluster fullness when doing volume adds or resizes
* [API Daemon/CLI Client] Adds safety checks to volume clones as well
* [API Daemon] Fixes a few remaining memory bugs for stopped/disabled VMs
-- Joshua M. Boniface <joshua@boniface.me> Mon, 05 Feb 2024 09:58:07 -0500
pvc (0.9.93-0) unstable; urgency=high
* [API Daemon] Fixes a bug where stuck zkhandler threads were not cleaned up on error
-- Joshua M. Boniface <joshua@boniface.me> Tue, 30 Jan 2024 09:51:21 -0500
pvc (0.9.92-0) unstable; urgency=high
* [CLI Client] Adds the new restore state to the colours list for VM status

View File

@ -33,7 +33,7 @@ import os
import signal
# Daemon version
version = "0.9.92"
version = "0.9.94"
##########################################################

View File

@ -49,7 +49,7 @@ import re
import json
# Daemon version
version = "0.9.92"
version = "0.9.94"
##########################################################

View File

@ -44,7 +44,7 @@ from daemon_lib.vmbuilder import (
)
# Daemon version
version = "0.9.92"
version = "0.9.94"
config = cfg.get_configuration()