From 02138974fa5c5a0b1263e8448b55939c57eae89c Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 28 Dec 2021 20:39:50 -0500 Subject: [PATCH] Add device class tiers to Ceph pools Allows specifying a particular device class ("tier") for a given pool, for instance SSD-only or NVMe-only. This is implemented with Crush rules on the Ceph side, and via an additional new key in the pool Zookeeper schema which is defaulted to "default". --- api-daemon/pvcapid/flaskapi.py | 29 ++++++++- api-daemon/pvcapid/helper.py | 4 +- client-cli/pvc/cli_lib/ceph.py | 21 +++++-- client-cli/pvc/pvc.py | 21 ++++--- daemon-common/ceph.py | 78 +++++++++++++++--------- daemon-common/migrations/versions/7.json | 1 + daemon-common/zkhandler.py | 16 ++++- docs/manuals/swagger.json | 16 +++++ 8 files changed, 139 insertions(+), 47 deletions(-) create mode 100644 daemon-common/migrations/versions/7.json diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index b62721a5..414ecc54 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -4226,6 +4226,9 @@ class API_Storage_Ceph_Pool_Root(Resource): volume_count: type: integer description: The number of volumes in the pool + tier: + type: string + description: The device class/tier of the pool stats: type: object properties: @@ -4307,6 +4310,12 @@ class API_Storage_Ceph_Pool_Root(Resource): "required": True, "helptext": "A valid replication configuration must be specified.", }, + { + "name": "tier", + "required": False, + "choices": ("hdd", "ssd", "nvme", "default"), + "helptext": "A valid tier must be specified", + }, ] ) @Authenticator @@ -4332,6 +4341,10 @@ class API_Storage_Ceph_Pool_Root(Resource): type: string required: true description: The replication configuration (e.g. "copies=3,mincopies=2") for the pool + - in: query + name: tier + required: false + description: The device tier for the pool (hdd, ssd, nvme, or default) responses: 200: description: OK @@ -4348,6 +4361,7 @@ class API_Storage_Ceph_Pool_Root(Resource): reqargs.get("pool", None), reqargs.get("pgs", None), reqargs.get("replcfg", None), + reqargs.get("tier", None), ) @@ -4388,6 +4402,12 @@ class API_Storage_Ceph_Pool_Element(Resource): "required": True, "helptext": "A valid replication configuration must be specified.", }, + { + "name": "tier", + "required": False, + "choices": ("hdd", "ssd", "nvme", "default"), + "helptext": "A valid tier must be specified", + }, ] ) @Authenticator @@ -4408,6 +4428,10 @@ class API_Storage_Ceph_Pool_Element(Resource): type: string required: true description: The replication configuration (e.g. "copies=3,mincopies=2") for the pool + - in: query + name: tier + required: false + description: The device tier for the pool (hdd, ssd, nvme, or default) responses: 200: description: OK @@ -4426,7 +4450,10 @@ class API_Storage_Ceph_Pool_Element(Resource): id: Message """ return api_helper.ceph_pool_add( - pool, reqargs.get("pgs", None), reqargs.get("replcfg", None) + pool, + reqargs.get("pgs", None), + reqargs.get("replcfg", None), + reqargs.get("tier", None), ) @RequestParser( diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index 79df15c6..f260f11e 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -1403,11 +1403,11 @@ def ceph_pool_list(zkhandler, limit=None, is_fuzzy=True): @ZKConnection(config) -def ceph_pool_add(zkhandler, name, pgs, replcfg): +def ceph_pool_add(zkhandler, name, pgs, replcfg, tier=None): """ Add a Ceph RBD pool to the PVC Ceph storage cluster. """ - retflag, retdata = pvc_ceph.add_pool(zkhandler, name, pgs, replcfg) + retflag, retdata = pvc_ceph.add_pool(zkhandler, name, pgs, replcfg, tier) if retflag: retcode = 200 diff --git a/client-cli/pvc/cli_lib/ceph.py b/client-cli/pvc/cli_lib/ceph.py index 12a1c947..7fc4c3d1 100644 --- a/client-cli/pvc/cli_lib/ceph.py +++ b/client-cli/pvc/cli_lib/ceph.py @@ -726,15 +726,15 @@ def ceph_pool_list(config, limit): return False, response.json().get("message", "") -def ceph_pool_add(config, pool, pgs, replcfg): +def ceph_pool_add(config, pool, pgs, replcfg, tier): """ Add new Ceph OSD API endpoint: POST /api/v1/storage/ceph/pool - API arguments: pool={pool}, pgs={pgs}, replcfg={replcfg} + API arguments: pool={pool}, pgs={pgs}, replcfg={replcfg}, tier={tier} API schema: {"message":"{data}"} """ - params = {"pool": pool, "pgs": pgs, "replcfg": replcfg} + params = {"pool": pool, "pgs": pgs, "replcfg": replcfg, "tier": tier} response = call_api(config, "post", "/storage/ceph/pool", params=params) if response.status_code == 200: @@ -775,6 +775,7 @@ def format_list_pool(pool_list): pool_name_length = 5 pool_id_length = 3 + pool_tier_length = 5 pool_used_length = 5 pool_usedpct_length = 6 pool_free_length = 5 @@ -812,6 +813,11 @@ def format_list_pool(pool_list): if _pool_id_length > pool_id_length: pool_id_length = _pool_id_length + # Set the tier and length + _pool_tier_length = len(str(pool_information["tier"])) + 1 + if _pool_tier_length > pool_tier_length: + pool_tier_length = _pool_tier_length + # Set the used and length _pool_used_length = len(str(pool_information["stats"]["used_bytes"])) + 1 if _pool_used_length > pool_used_length: @@ -879,10 +885,11 @@ def format_list_pool(pool_list): end_bold=ansiprint.end(), pool_header_length=pool_id_length + pool_name_length + + pool_tier_length + pool_used_length + pool_usedpct_length + pool_free_length - + 4, + + 5, objects_header_length=pool_num_objects_length + pool_num_clones_length + pool_num_copies_length @@ -934,6 +941,7 @@ def format_list_pool(pool_list): "{bold}\ {pool_id: <{pool_id_length}} \ {pool_name: <{pool_name_length}} \ +{pool_tier: <{pool_tier_length}} \ {pool_used: <{pool_used_length}} \ {pool_usedpct: <{pool_usedpct_length}} \ {pool_free: <{pool_free_length}} \ @@ -950,6 +958,7 @@ def format_list_pool(pool_list): end_bold=ansiprint.end(), pool_id_length=pool_id_length, pool_name_length=pool_name_length, + pool_tier_length=pool_tier_length, pool_used_length=pool_used_length, pool_usedpct_length=pool_usedpct_length, pool_free_length=pool_free_length, @@ -963,6 +972,7 @@ def format_list_pool(pool_list): pool_read_data_length=pool_read_data_length, pool_id="ID", pool_name="Name", + pool_tier="Tier", pool_used="Used", pool_usedpct="Used%", pool_free="Free", @@ -983,6 +993,7 @@ def format_list_pool(pool_list): "{bold}\ {pool_id: <{pool_id_length}} \ {pool_name: <{pool_name_length}} \ +{pool_tier: <{pool_tier_length}} \ {pool_used: <{pool_used_length}} \ {pool_usedpct: <{pool_usedpct_length}} \ {pool_free: <{pool_free_length}} \ @@ -999,6 +1010,7 @@ def format_list_pool(pool_list): end_bold="", pool_id_length=pool_id_length, pool_name_length=pool_name_length, + pool_tier_length=pool_tier_length, pool_used_length=pool_used_length, pool_usedpct_length=pool_usedpct_length, pool_free_length=pool_free_length, @@ -1012,6 +1024,7 @@ def format_list_pool(pool_list): pool_read_data_length=pool_read_data_length, pool_id=pool_information["stats"]["id"], pool_name=pool_information["name"], + pool_tier=pool_information["tier"], pool_used=pool_information["stats"]["used_bytes"], pool_usedpct=pool_information["stats"]["used_percent"], pool_free=pool_information["stats"]["free_bytes"], diff --git a/client-cli/pvc/pvc.py b/client-cli/pvc/pvc.py index 604f0ba4..d183925c 100755 --- a/client-cli/pvc/pvc.py +++ b/client-cli/pvc/pvc.py @@ -3507,6 +3507,17 @@ def ceph_pool(): @click.command(name="add", short_help="Add new RBD pool.") @click.argument("name") @click.argument("pgs") +@click.option( + "-t", + "--tier", + "tier", + default="default", + show_default=True, + type=click.Choice(["default", "hdd", "ssd", "nvme"]), + help=""" + The device tier to limit the pool to. Default is all OSD tiers, and specific tiers can be specified instead. At least one full set of OSDs for a given tier must be present for the tier to be specified, or the pool creation will fail. + """, +) @click.option( "--replcfg", "replcfg", @@ -3514,20 +3525,16 @@ def ceph_pool(): show_default=True, required=False, help=""" - The replication configuration, specifying both a "copies" and "mincopies" value, separated by a - comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas - and should not exceed the total number of nodes; the "mincopies" value specifies the minimum - number of available copies to allow writes. For additional details please see the Cluster - Architecture documentation. + The replication configuration, specifying both a "copies" and "mincopies" value, separated by a comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas and should not exceed the total number of nodes; the "mincopies" value specifies the minimum number of available copies to allow writes. For additional details please see the Cluster Architecture documentation. """, ) @cluster_req -def ceph_pool_add(name, pgs, replcfg): +def ceph_pool_add(name, pgs, tier, replcfg): """ Add a new Ceph RBD pool with name NAME and PGS placement groups. """ - retcode, retmsg = pvc_ceph.ceph_pool_add(config, name, pgs, replcfg) + retcode, retmsg = pvc_ceph.ceph_pool_add(config, name, pgs, replcfg, tier) cleanup(retcode, retmsg) diff --git a/daemon-common/ceph.py b/daemon-common/ceph.py index 770cfe8a..e71da5cd 100644 --- a/daemon-common/ceph.py +++ b/daemon-common/ceph.py @@ -393,12 +393,20 @@ def getPoolInformation(zkhandler, pool): pool_stats_raw = zkhandler.read(("pool.stats", pool)) pool_stats = dict(json.loads(pool_stats_raw)) volume_count = len(getCephVolumes(zkhandler, pool)) + tier = zkhandler.read(("pool.tier", pool)) + if tier is None: + tier = "default" - pool_information = {"name": pool, "volume_count": volume_count, "stats": pool_stats} + pool_information = { + "name": pool, + "volume_count": volume_count, + "tier": tier, + "stats": pool_stats, + } return pool_information -def add_pool(zkhandler, name, pgs, replcfg): +def add_pool(zkhandler, name, pgs, replcfg, tier=None): # Prepare the copies/mincopies variables try: copies, mincopies = replcfg.split(",") @@ -408,60 +416,70 @@ def add_pool(zkhandler, name, pgs, replcfg): copies = None mincopies = None if not copies or not mincopies: - return False, 'ERROR: Replication configuration "{}" is not valid.'.format( - replcfg - ) + return False, f'ERROR: Replication configuration "{replcfg}" is not valid.' - # 1. Create the pool + # Prepare the tiers if applicable + if tier is not None and tier in ["hdd", "ssd", "nvme"]: + crush_rule = f"{tier}_tier" + # Create a CRUSH rule for the relevant tier + retcode, stdout, stderr = common.run_os_command( + f"ceph osd crush rule create-replicated {crush_rule} default host {tier}" + ) + if retcode: + return ( + False, + f"ERROR: Failed to create CRUSH rule {tier} for pool {name}: {stderr}", + ) + else: + tier = "default" + crush_rule = "replicated" + + # Create the pool retcode, stdout, stderr = common.run_os_command( - "ceph osd pool create {} {} replicated".format(name, pgs) + f"ceph osd pool create {name} {pgs} {pgs} {crush_rule}" ) if retcode: - return False, 'ERROR: Failed to create pool "{}" with {} PGs: {}'.format( - name, pgs, stderr - ) + return False, f'ERROR: Failed to create pool "{name}" with {pgs} PGs: {stderr}' - # 2. Set the size and minsize + # Set the size and minsize retcode, stdout, stderr = common.run_os_command( - "ceph osd pool set {} size {}".format(name, copies) + f"ceph osd pool set {name} size {copies}" ) if retcode: - return False, 'ERROR: Failed to set pool "{}" size of {}: {}'.format( - name, copies, stderr - ) + return False, f'ERROR: Failed to set pool "{name}" size of {copies}: {stderr}' retcode, stdout, stderr = common.run_os_command( - "ceph osd pool set {} min_size {}".format(name, mincopies) - ) - if retcode: - return False, 'ERROR: Failed to set pool "{}" minimum size of {}: {}'.format( - name, mincopies, stderr - ) - - # 3. Enable RBD application - retcode, stdout, stderr = common.run_os_command( - "ceph osd pool application enable {} rbd".format(name) + f"ceph osd pool set {name} min_size {mincopies}" ) if retcode: return ( False, - 'ERROR: Failed to enable RBD application on pool "{}" : {}'.format( - name, stderr - ), + f'ERROR: Failed to set pool "{name}" minimum size of {mincopies}: {stderr}', ) - # 4. Add the new pool to Zookeeper + # Enable RBD application + retcode, stdout, stderr = common.run_os_command( + f"ceph osd pool application enable {name} rbd" + ) + if retcode: + return ( + False, + f'ERROR: Failed to enable RBD application on pool "{name}" : {stderr}', + ) + + # Add the new pool to Zookeeper zkhandler.write( [ (("pool", name), ""), (("pool.pgs", name), pgs), + (("pool.tier", name), tier), (("pool.stats", name), "{}"), (("volume", name), ""), (("snapshot", name), ""), ] ) - return True, 'Created RBD pool "{}" with {} PGs'.format(name, pgs) + return True, f'Created RBD pool "{name}" with {pgs} PGs' def remove_pool(zkhandler, name): diff --git a/daemon-common/migrations/versions/7.json b/daemon-common/migrations/versions/7.json new file mode 100644 index 00000000..99419c1b --- /dev/null +++ b/daemon-common/migrations/versions/7.json @@ -0,0 +1 @@ +{"version": "7", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index ecf81264..1eea7259 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -540,7 +540,7 @@ class ZKHandler(object): # class ZKSchema(object): # Current version - _version = 6 + _version = 7 # Root for doing nested keys _schema_root = "" @@ -703,7 +703,12 @@ class ZKSchema(object): "stats": "/stats", }, # The schema of an individual pool entry (/ceph/pools/{pool_name}) - "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, # The root key + "pool": { + "name": "", + "pgs": "/pgs", + "tier": "/tier", + "stats": "/stats", + }, # The root key # The schema of an individual volume entry (/ceph/volumes/{pool_name}/{volume_name}) "volume": {"name": "", "stats": "/stats"}, # The root key # The schema of an individual snapshot entry (/ceph/volumes/{pool_name}/{volume_name}/{snapshot_name}) @@ -938,8 +943,13 @@ class ZKSchema(object): kpath = f"{elem}.{ikey}" # Validate that the key exists for that child if not zkhandler.zk_conn.exists(self.path(kpath, child)): + if elem == "pool" and ikey == "tier": + default_data = "default" + else: + default_data = "" zkhandler.zk_conn.create( - self.path(kpath, child), "".encode(zkhandler.encoding) + self.path(kpath, child), + default_data.encode(zkhandler.encoding), ) # Continue for child keys under network (reservation, acl) diff --git a/docs/manuals/swagger.json b/docs/manuals/swagger.json index d7d8bd0e..d9677c77 100644 --- a/docs/manuals/swagger.json +++ b/docs/manuals/swagger.json @@ -729,6 +729,10 @@ }, "type": "object" }, + "tier": { + "description": "The device class/tier of the pool", + "type": "string" + }, "volume_count": { "description": "The number of volumes in the pool", "type": "integer" @@ -5272,6 +5276,12 @@ "name": "replcfg", "required": true, "type": "string" + }, + { + "description": "The device tier for the pool (hdd, ssd, nvme, or default)", + "in": "query", + "name": "tier", + "required": false } ], "responses": { @@ -5368,6 +5378,12 @@ "name": "replcfg", "required": true, "type": "string" + }, + { + "description": "The device tier for the pool (hdd, ssd, nvme, or default)", + "in": "query", + "name": "tier", + "required": false } ], "responses": {