From efc7434143f5afd97086543d9ce78ef3b3a7d116 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Fri, 2 Feb 2024 10:13:46 -0500 Subject: [PATCH] Add safety check for 80% full size Adds a check that a volume creation or resize won't violate the 80% full rule for the storage cluster. This ensures a cluster won't get too full if a storage volume fills up. Also adds a force flag throughout the pipeline to override this check, should an administrator really want to do so. Closes #177 --- api-daemon/pvcapid/flaskapi.py | 45 +++++++++++++++++++++++++++++++--- api-daemon/pvcapid/helper.py | 12 ++++++--- client-cli/pvc/cli/cli.py | 34 ++++++++++++++++++++++--- client-cli/pvc/lib/storage.py | 13 ++++++---- daemon-common/ceph.py | 38 +++++++++++++++++++++++++--- 5 files changed, 121 insertions(+), 21 deletions(-) diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index c312638f..87e26f85 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -5744,6 +5744,10 @@ class API_Storage_Ceph_Volume_Root(Resource): "required": True, "helptext": "A volume size in bytes (B implied or with SI suffix k/M/G/T) must be specified.", }, + { + "name": "force", + "required": False, + }, ] ) @Authenticator @@ -5769,6 +5773,12 @@ class API_Storage_Ceph_Volume_Root(Resource): type: string required: true description: The volume size, in bytes (B implied) or with a single-character SI suffix (k/M/G/T) + - in: query + name: force + type: boolean + required: false + default: flase + description: Force action if volume creation would violate 80% full soft cap on the pool responses: 200: description: OK @@ -5785,6 +5795,7 @@ class API_Storage_Ceph_Volume_Root(Resource): reqargs.get("pool", None), reqargs.get("volume", None), reqargs.get("size", None), + reqargs.get("force", False), ) @@ -5819,7 +5830,11 @@ class API_Storage_Ceph_Volume_Element(Resource): "name": "size", "required": True, "helptext": "A volume size in bytes (or with k/M/G/T suffix) must be specified.", - } + }, + { + "name": "force", + "required": False, + }, ] ) @Authenticator @@ -5835,6 +5850,12 @@ class API_Storage_Ceph_Volume_Element(Resource): type: string required: true description: The volume size in bytes (or with a metric suffix, i.e. k/M/G/T) + - in: query + name: force + type: boolean + required: false + default: flase + description: Force action if volume creation would violate 80% full soft cap on the pool responses: 200: description: OK @@ -5852,9 +5873,17 @@ class API_Storage_Ceph_Volume_Element(Resource): type: object id: Message """ - return api_helper.ceph_volume_add(pool, volume, reqargs.get("size", None)) + return api_helper.ceph_volume_add( + pool, volume, reqargs.get("size", None), reqargs.get("force", False) + ) - @RequestParser([{"name": "new_size"}, {"name": "new_name"}]) + @RequestParser( + [ + {"name": "new_size"}, + {"name": "new_name"}, + {"name": "force", "required": False}, + ] + ) @Authenticator def put(self, pool, volume, reqargs): """ @@ -5873,6 +5902,12 @@ class API_Storage_Ceph_Volume_Element(Resource): type: string required: false description: The new volume name + - in: query + name: force + type: boolean + required: false + default: flase + description: Force action if new volume size would violate 80% full soft cap on the pool responses: 200: description: OK @@ -5894,7 +5929,9 @@ class API_Storage_Ceph_Volume_Element(Resource): return {"message": "Can only perform one modification at once"}, 400 if reqargs.get("new_size", None): - return api_helper.ceph_volume_resize(pool, volume, reqargs.get("new_size")) + return api_helper.ceph_volume_resize( + pool, volume, reqargs.get("new_size"), reqargs.get("force", False) + ) if reqargs.get("new_name", None): return api_helper.ceph_volume_rename(pool, volume, reqargs.get("new_name")) return {"message": "At least one modification must be specified"}, 400 diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index ca2ff7c1..6df257bd 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -1869,11 +1869,13 @@ def ceph_volume_list(zkhandler, pool=None, limit=None, is_fuzzy=True): @ZKConnection(config) -def ceph_volume_add(zkhandler, pool, name, size): +def ceph_volume_add(zkhandler, pool, name, size, force_flag): """ Add a Ceph RBD volume to the PVC Ceph storage cluster. """ - retflag, retdata = pvc_ceph.add_volume(zkhandler, pool, name, size) + retflag, retdata = pvc_ceph.add_volume( + zkhandler, pool, name, size, force_flag=force_flag + ) if retflag: retcode = 200 @@ -1901,11 +1903,13 @@ def ceph_volume_clone(zkhandler, pool, name, source_volume): @ZKConnection(config) -def ceph_volume_resize(zkhandler, pool, name, size): +def ceph_volume_resize(zkhandler, pool, name, size, force_flag): """ Resize an existing Ceph RBD volume in the PVC Ceph storage cluster. """ - retflag, retdata = pvc_ceph.resize_volume(zkhandler, pool, name, size) + retflag, retdata = pvc_ceph.resize_volume( + zkhandler, pool, name, size, force_flag=force_flag + ) if retflag: retcode = 200 diff --git a/client-cli/pvc/cli/cli.py b/client-cli/pvc/cli/cli.py index 8cabe30e..c120e5e6 100644 --- a/client-cli/pvc/cli/cli.py +++ b/client-cli/pvc/cli/cli.py @@ -4100,12 +4100,26 @@ def cli_storage_volume(): @click.argument("pool") @click.argument("name") @click.argument("size") -def cli_storage_volume_add(pool, name, size): +@click.option( + "-f", + "--force", + "force_flag", + is_flag=True, + default=False, + help="Force creation even if volume would violate 80% full safe free space.", +) +def cli_storage_volume_add(pool, name, size, force_flag): """ Add a new Ceph RBD volume in pool POOL with name NAME and size SIZE (in human units, e.g. 1024M, 20G, etc.). + + PVC will prevent the creation of a volume who's size is greater than the available free space on the pool. This cannot be overridden. + + PVC will prevent the creation of a volume who's size is greater than the 80% full safe free space on the pool. This can be overridden with the "-f"/"--force" option but this may be dangerous! """ - retcode, retmsg = pvc.lib.storage.ceph_volume_add(CLI_CONFIG, pool, name, size) + retcode, retmsg = pvc.lib.storage.ceph_volume_add( + CLI_CONFIG, pool, name, size, force_flag=force_flag + ) finish(retcode, retmsg) @@ -4171,14 +4185,26 @@ def cli_storage_volume_remove(pool, name): @click.argument("pool") @click.argument("name") @click.argument("size") +@click.option( + "-f", + "--force", + "force_flag", + is_flag=True, + default=False, + help="Force resize even if volume would violate 80% full safe free space.", +) @confirm_opt("Resize volume {name} in pool {pool} to size {size}") -def cli_storage_volume_resize(pool, name, size): +def cli_storage_volume_resize(pool, name, size, force_flag): """ Resize an existing Ceph RBD volume with name NAME in pool POOL to size SIZE (in human units, e.g. 1024M, 20G, etc.). + + PVC will prevent the resize of a volume who's new size is greater than the available free space on the pool. This cannot be overridden. + + PVC will prevent the resize of a volume who's new size is greater than the 80% full safe free space on the pool. This can be overridden with the "-f"/"--force" option but this may be dangerous! """ retcode, retmsg = pvc.lib.storage.ceph_volume_modify( - CLI_CONFIG, pool, name, new_size=size + CLI_CONFIG, pool, name, new_size=size, force_flag=force_flag ) finish(retcode, retmsg) diff --git a/client-cli/pvc/lib/storage.py b/client-cli/pvc/lib/storage.py index acf1512e..a46d1b9d 100644 --- a/client-cli/pvc/lib/storage.py +++ b/client-cli/pvc/lib/storage.py @@ -1172,15 +1172,15 @@ def ceph_volume_list(config, limit, pool): return False, response.json().get("message", "") -def ceph_volume_add(config, pool, volume, size): +def ceph_volume_add(config, pool, volume, size, force_flag=False): """ Add new Ceph volume API endpoint: POST /api/v1/storage/ceph/volume - API arguments: volume={volume}, pool={pool}, size={size} + API arguments: volume={volume}, pool={pool}, size={size}, force={force_flag} API schema: {"message":"{data}"} """ - params = {"volume": volume, "pool": pool, "size": size} + params = {"volume": volume, "pool": pool, "size": size, "force": force_flag} response = call_api(config, "post", "/storage/ceph/volume", params=params) if response.status_code == 200: @@ -1261,12 +1261,14 @@ def ceph_volume_remove(config, pool, volume): return retstatus, response.json().get("message", "") -def ceph_volume_modify(config, pool, volume, new_name=None, new_size=None): +def ceph_volume_modify( + config, pool, volume, new_name=None, new_size=None, force_flag=False +): """ Modify Ceph volume API endpoint: PUT /api/v1/storage/ceph/volume/{pool}/{volume} - API arguments: + API arguments: [new_name={new_name}], [new_size={new_size}], force_flag={force_flag} API schema: {"message":"{data}"} """ @@ -1275,6 +1277,7 @@ def ceph_volume_modify(config, pool, volume, new_name=None, new_size=None): params["new_name"] = new_name if new_size: params["new_size"] = new_size + params["force"] = force_flag response = call_api( config, diff --git a/daemon-common/ceph.py b/daemon-common/ceph.py index e753fe7c..388f7314 100644 --- a/daemon-common/ceph.py +++ b/daemon-common/ceph.py @@ -553,7 +553,7 @@ def getVolumeInformation(zkhandler, pool, volume): return volume_information -def add_volume(zkhandler, pool, name, size): +def add_volume(zkhandler, pool, name, size, force_flag=False): # 1. Verify the size of the volume pool_information = getPoolInformation(zkhandler, pool) size_bytes = format_bytes_fromhuman(size) @@ -563,12 +563,27 @@ def add_volume(zkhandler, pool, name, size): f"ERROR: Requested volume size '{size}' does not have a valid SI unit", ) - if size_bytes >= int(pool_information["stats"]["free_bytes"]): + pool_total_free_bytes = int(pool_information["stats"]["free_bytes"]) + if size_bytes >= pool_total_free_bytes: return ( False, f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')", ) + # Check if we're greater than 80% utilization after the create; error if so unless we have the force flag + pool_total_bytes = ( + int(pool_information["stats"]["used_bytes"]) + pool_total_free_bytes + ) + pool_safe_total_bytes = int(pool_total_bytes * 0.80) + pool_safe_free_bytes = pool_safe_total_bytes - int( + pool_information["stats"]["used_bytes"] + ) + if size_bytes >= pool_safe_free_bytes and not force_flag: + return ( + False, + f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the safe free space in the pool ('{format_bytes_tohuman(pool_safe_free_bytes)}' for 80% full); retry with force to ignore this error", + ) + # 2. Create the volume retcode, stdout, stderr = common.run_os_command( "rbd create --size {}B {}/{}".format(size_bytes, pool, name) @@ -634,7 +649,7 @@ def clone_volume(zkhandler, pool, name_src, name_new): ) -def resize_volume(zkhandler, pool, name, size): +def resize_volume(zkhandler, pool, name, size, force_flag=False): if not verifyVolume(zkhandler, pool, name): return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format( name, pool @@ -649,12 +664,27 @@ def resize_volume(zkhandler, pool, name, size): f"ERROR: Requested volume size '{size}' does not have a valid SI unit", ) - if size_bytes >= int(pool_information["stats"]["free_bytes"]): + pool_total_free_bytes = int(pool_information["stats"]["free_bytes"]) + if size_bytes >= pool_total_free_bytes: return ( False, f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')", ) + # Check if we're greater than 80% utilization after the create; error if so unless we have the force flag + pool_total_bytes = ( + int(pool_information["stats"]["used_bytes"]) + pool_total_free_bytes + ) + pool_safe_total_bytes = int(pool_total_bytes * 0.80) + pool_safe_free_bytes = pool_safe_total_bytes - int( + pool_information["stats"]["used_bytes"] + ) + if size_bytes >= pool_safe_free_bytes and not force_flag: + return ( + False, + f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the safe free space in the pool ('{format_bytes_tohuman(pool_safe_free_bytes)}' for 80% full); retry with force to ignore this error", + ) + # 2. Resize the volume retcode, stdout, stderr = common.run_os_command( "rbd resize --size {} {}/{}".format(