Add device class tiers to Ceph pools

Allows specifying a particular device class ("tier") for a given pool,
for instance SSD-only or NVMe-only. This is implemented with Crush
rules on the Ceph side, and via an additional new key in the pool
Zookeeper schema which is defaulted to "default".
This commit is contained in:
Joshua Boniface 2021-12-28 20:39:50 -05:00
parent c3d255be65
commit 02138974fa
8 changed files with 139 additions and 47 deletions

View File

@ -4226,6 +4226,9 @@ class API_Storage_Ceph_Pool_Root(Resource):
volume_count: volume_count:
type: integer type: integer
description: The number of volumes in the pool description: The number of volumes in the pool
tier:
type: string
description: The device class/tier of the pool
stats: stats:
type: object type: object
properties: properties:
@ -4307,6 +4310,12 @@ class API_Storage_Ceph_Pool_Root(Resource):
"required": True, "required": True,
"helptext": "A valid replication configuration must be specified.", "helptext": "A valid replication configuration must be specified.",
}, },
{
"name": "tier",
"required": False,
"choices": ("hdd", "ssd", "nvme", "default"),
"helptext": "A valid tier must be specified",
},
] ]
) )
@Authenticator @Authenticator
@ -4332,6 +4341,10 @@ class API_Storage_Ceph_Pool_Root(Resource):
type: string type: string
required: true required: true
description: The replication configuration (e.g. "copies=3,mincopies=2") for the pool description: The replication configuration (e.g. "copies=3,mincopies=2") for the pool
- in: query
name: tier
required: false
description: The device tier for the pool (hdd, ssd, nvme, or default)
responses: responses:
200: 200:
description: OK description: OK
@ -4348,6 +4361,7 @@ class API_Storage_Ceph_Pool_Root(Resource):
reqargs.get("pool", None), reqargs.get("pool", None),
reqargs.get("pgs", None), reqargs.get("pgs", None),
reqargs.get("replcfg", None), reqargs.get("replcfg", None),
reqargs.get("tier", None),
) )
@ -4388,6 +4402,12 @@ class API_Storage_Ceph_Pool_Element(Resource):
"required": True, "required": True,
"helptext": "A valid replication configuration must be specified.", "helptext": "A valid replication configuration must be specified.",
}, },
{
"name": "tier",
"required": False,
"choices": ("hdd", "ssd", "nvme", "default"),
"helptext": "A valid tier must be specified",
},
] ]
) )
@Authenticator @Authenticator
@ -4408,6 +4428,10 @@ class API_Storage_Ceph_Pool_Element(Resource):
type: string type: string
required: true required: true
description: The replication configuration (e.g. "copies=3,mincopies=2") for the pool description: The replication configuration (e.g. "copies=3,mincopies=2") for the pool
- in: query
name: tier
required: false
description: The device tier for the pool (hdd, ssd, nvme, or default)
responses: responses:
200: 200:
description: OK description: OK
@ -4426,7 +4450,10 @@ class API_Storage_Ceph_Pool_Element(Resource):
id: Message id: Message
""" """
return api_helper.ceph_pool_add( return api_helper.ceph_pool_add(
pool, reqargs.get("pgs", None), reqargs.get("replcfg", None) pool,
reqargs.get("pgs", None),
reqargs.get("replcfg", None),
reqargs.get("tier", None),
) )
@RequestParser( @RequestParser(

View File

@ -1403,11 +1403,11 @@ def ceph_pool_list(zkhandler, limit=None, is_fuzzy=True):
@ZKConnection(config) @ZKConnection(config)
def ceph_pool_add(zkhandler, name, pgs, replcfg): def ceph_pool_add(zkhandler, name, pgs, replcfg, tier=None):
""" """
Add a Ceph RBD pool to the PVC Ceph storage cluster. Add a Ceph RBD pool to the PVC Ceph storage cluster.
""" """
retflag, retdata = pvc_ceph.add_pool(zkhandler, name, pgs, replcfg) retflag, retdata = pvc_ceph.add_pool(zkhandler, name, pgs, replcfg, tier)
if retflag: if retflag:
retcode = 200 retcode = 200

View File

@ -726,15 +726,15 @@ def ceph_pool_list(config, limit):
return False, response.json().get("message", "") return False, response.json().get("message", "")
def ceph_pool_add(config, pool, pgs, replcfg): def ceph_pool_add(config, pool, pgs, replcfg, tier):
""" """
Add new Ceph OSD Add new Ceph OSD
API endpoint: POST /api/v1/storage/ceph/pool API endpoint: POST /api/v1/storage/ceph/pool
API arguments: pool={pool}, pgs={pgs}, replcfg={replcfg} API arguments: pool={pool}, pgs={pgs}, replcfg={replcfg}, tier={tier}
API schema: {"message":"{data}"} API schema: {"message":"{data}"}
""" """
params = {"pool": pool, "pgs": pgs, "replcfg": replcfg} params = {"pool": pool, "pgs": pgs, "replcfg": replcfg, "tier": tier}
response = call_api(config, "post", "/storage/ceph/pool", params=params) response = call_api(config, "post", "/storage/ceph/pool", params=params)
if response.status_code == 200: if response.status_code == 200:
@ -775,6 +775,7 @@ def format_list_pool(pool_list):
pool_name_length = 5 pool_name_length = 5
pool_id_length = 3 pool_id_length = 3
pool_tier_length = 5
pool_used_length = 5 pool_used_length = 5
pool_usedpct_length = 6 pool_usedpct_length = 6
pool_free_length = 5 pool_free_length = 5
@ -812,6 +813,11 @@ def format_list_pool(pool_list):
if _pool_id_length > pool_id_length: if _pool_id_length > pool_id_length:
pool_id_length = _pool_id_length pool_id_length = _pool_id_length
# Set the tier and length
_pool_tier_length = len(str(pool_information["tier"])) + 1
if _pool_tier_length > pool_tier_length:
pool_tier_length = _pool_tier_length
# Set the used and length # Set the used and length
_pool_used_length = len(str(pool_information["stats"]["used_bytes"])) + 1 _pool_used_length = len(str(pool_information["stats"]["used_bytes"])) + 1
if _pool_used_length > pool_used_length: if _pool_used_length > pool_used_length:
@ -879,10 +885,11 @@ def format_list_pool(pool_list):
end_bold=ansiprint.end(), end_bold=ansiprint.end(),
pool_header_length=pool_id_length pool_header_length=pool_id_length
+ pool_name_length + pool_name_length
+ pool_tier_length
+ pool_used_length + pool_used_length
+ pool_usedpct_length + pool_usedpct_length
+ pool_free_length + pool_free_length
+ 4, + 5,
objects_header_length=pool_num_objects_length objects_header_length=pool_num_objects_length
+ pool_num_clones_length + pool_num_clones_length
+ pool_num_copies_length + pool_num_copies_length
@ -934,6 +941,7 @@ def format_list_pool(pool_list):
"{bold}\ "{bold}\
{pool_id: <{pool_id_length}} \ {pool_id: <{pool_id_length}} \
{pool_name: <{pool_name_length}} \ {pool_name: <{pool_name_length}} \
{pool_tier: <{pool_tier_length}} \
{pool_used: <{pool_used_length}} \ {pool_used: <{pool_used_length}} \
{pool_usedpct: <{pool_usedpct_length}} \ {pool_usedpct: <{pool_usedpct_length}} \
{pool_free: <{pool_free_length}} \ {pool_free: <{pool_free_length}} \
@ -950,6 +958,7 @@ def format_list_pool(pool_list):
end_bold=ansiprint.end(), end_bold=ansiprint.end(),
pool_id_length=pool_id_length, pool_id_length=pool_id_length,
pool_name_length=pool_name_length, pool_name_length=pool_name_length,
pool_tier_length=pool_tier_length,
pool_used_length=pool_used_length, pool_used_length=pool_used_length,
pool_usedpct_length=pool_usedpct_length, pool_usedpct_length=pool_usedpct_length,
pool_free_length=pool_free_length, pool_free_length=pool_free_length,
@ -963,6 +972,7 @@ def format_list_pool(pool_list):
pool_read_data_length=pool_read_data_length, pool_read_data_length=pool_read_data_length,
pool_id="ID", pool_id="ID",
pool_name="Name", pool_name="Name",
pool_tier="Tier",
pool_used="Used", pool_used="Used",
pool_usedpct="Used%", pool_usedpct="Used%",
pool_free="Free", pool_free="Free",
@ -983,6 +993,7 @@ def format_list_pool(pool_list):
"{bold}\ "{bold}\
{pool_id: <{pool_id_length}} \ {pool_id: <{pool_id_length}} \
{pool_name: <{pool_name_length}} \ {pool_name: <{pool_name_length}} \
{pool_tier: <{pool_tier_length}} \
{pool_used: <{pool_used_length}} \ {pool_used: <{pool_used_length}} \
{pool_usedpct: <{pool_usedpct_length}} \ {pool_usedpct: <{pool_usedpct_length}} \
{pool_free: <{pool_free_length}} \ {pool_free: <{pool_free_length}} \
@ -999,6 +1010,7 @@ def format_list_pool(pool_list):
end_bold="", end_bold="",
pool_id_length=pool_id_length, pool_id_length=pool_id_length,
pool_name_length=pool_name_length, pool_name_length=pool_name_length,
pool_tier_length=pool_tier_length,
pool_used_length=pool_used_length, pool_used_length=pool_used_length,
pool_usedpct_length=pool_usedpct_length, pool_usedpct_length=pool_usedpct_length,
pool_free_length=pool_free_length, pool_free_length=pool_free_length,
@ -1012,6 +1024,7 @@ def format_list_pool(pool_list):
pool_read_data_length=pool_read_data_length, pool_read_data_length=pool_read_data_length,
pool_id=pool_information["stats"]["id"], pool_id=pool_information["stats"]["id"],
pool_name=pool_information["name"], pool_name=pool_information["name"],
pool_tier=pool_information["tier"],
pool_used=pool_information["stats"]["used_bytes"], pool_used=pool_information["stats"]["used_bytes"],
pool_usedpct=pool_information["stats"]["used_percent"], pool_usedpct=pool_information["stats"]["used_percent"],
pool_free=pool_information["stats"]["free_bytes"], pool_free=pool_information["stats"]["free_bytes"],

View File

@ -3507,6 +3507,17 @@ def ceph_pool():
@click.command(name="add", short_help="Add new RBD pool.") @click.command(name="add", short_help="Add new RBD pool.")
@click.argument("name") @click.argument("name")
@click.argument("pgs") @click.argument("pgs")
@click.option(
"-t",
"--tier",
"tier",
default="default",
show_default=True,
type=click.Choice(["default", "hdd", "ssd", "nvme"]),
help="""
The device tier to limit the pool to. Default is all OSD tiers, and specific tiers can be specified instead. At least one full set of OSDs for a given tier must be present for the tier to be specified, or the pool creation will fail.
""",
)
@click.option( @click.option(
"--replcfg", "--replcfg",
"replcfg", "replcfg",
@ -3514,20 +3525,16 @@ def ceph_pool():
show_default=True, show_default=True,
required=False, required=False,
help=""" help="""
The replication configuration, specifying both a "copies" and "mincopies" value, separated by a The replication configuration, specifying both a "copies" and "mincopies" value, separated by a comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas and should not exceed the total number of nodes; the "mincopies" value specifies the minimum number of available copies to allow writes. For additional details please see the Cluster Architecture documentation.
comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas
and should not exceed the total number of nodes; the "mincopies" value specifies the minimum
number of available copies to allow writes. For additional details please see the Cluster
Architecture documentation.
""", """,
) )
@cluster_req @cluster_req
def ceph_pool_add(name, pgs, replcfg): def ceph_pool_add(name, pgs, tier, replcfg):
""" """
Add a new Ceph RBD pool with name NAME and PGS placement groups. Add a new Ceph RBD pool with name NAME and PGS placement groups.
""" """
retcode, retmsg = pvc_ceph.ceph_pool_add(config, name, pgs, replcfg) retcode, retmsg = pvc_ceph.ceph_pool_add(config, name, pgs, replcfg, tier)
cleanup(retcode, retmsg) cleanup(retcode, retmsg)

View File

@ -393,12 +393,20 @@ def getPoolInformation(zkhandler, pool):
pool_stats_raw = zkhandler.read(("pool.stats", pool)) pool_stats_raw = zkhandler.read(("pool.stats", pool))
pool_stats = dict(json.loads(pool_stats_raw)) pool_stats = dict(json.loads(pool_stats_raw))
volume_count = len(getCephVolumes(zkhandler, pool)) volume_count = len(getCephVolumes(zkhandler, pool))
tier = zkhandler.read(("pool.tier", pool))
if tier is None:
tier = "default"
pool_information = {"name": pool, "volume_count": volume_count, "stats": pool_stats} pool_information = {
"name": pool,
"volume_count": volume_count,
"tier": tier,
"stats": pool_stats,
}
return pool_information return pool_information
def add_pool(zkhandler, name, pgs, replcfg): def add_pool(zkhandler, name, pgs, replcfg, tier=None):
# Prepare the copies/mincopies variables # Prepare the copies/mincopies variables
try: try:
copies, mincopies = replcfg.split(",") copies, mincopies = replcfg.split(",")
@ -408,60 +416,70 @@ def add_pool(zkhandler, name, pgs, replcfg):
copies = None copies = None
mincopies = None mincopies = None
if not copies or not mincopies: if not copies or not mincopies:
return False, 'ERROR: Replication configuration "{}" is not valid.'.format( return False, f'ERROR: Replication configuration "{replcfg}" is not valid.'
replcfg
)
# 1. Create the pool # Prepare the tiers if applicable
if tier is not None and tier in ["hdd", "ssd", "nvme"]:
crush_rule = f"{tier}_tier"
# Create a CRUSH rule for the relevant tier
retcode, stdout, stderr = common.run_os_command( retcode, stdout, stderr = common.run_os_command(
"ceph osd pool create {} {} replicated".format(name, pgs) f"ceph osd crush rule create-replicated {crush_rule} default host {tier}"
)
if retcode:
return False, 'ERROR: Failed to create pool "{}" with {} PGs: {}'.format(
name, pgs, stderr
)
# 2. Set the size and minsize
retcode, stdout, stderr = common.run_os_command(
"ceph osd pool set {} size {}".format(name, copies)
)
if retcode:
return False, 'ERROR: Failed to set pool "{}" size of {}: {}'.format(
name, copies, stderr
)
retcode, stdout, stderr = common.run_os_command(
"ceph osd pool set {} min_size {}".format(name, mincopies)
)
if retcode:
return False, 'ERROR: Failed to set pool "{}" minimum size of {}: {}'.format(
name, mincopies, stderr
)
# 3. Enable RBD application
retcode, stdout, stderr = common.run_os_command(
"ceph osd pool application enable {} rbd".format(name)
) )
if retcode: if retcode:
return ( return (
False, False,
'ERROR: Failed to enable RBD application on pool "{}" : {}'.format( f"ERROR: Failed to create CRUSH rule {tier} for pool {name}: {stderr}",
name, stderr )
), else:
tier = "default"
crush_rule = "replicated"
# Create the pool
retcode, stdout, stderr = common.run_os_command(
f"ceph osd pool create {name} {pgs} {pgs} {crush_rule}"
)
if retcode:
return False, f'ERROR: Failed to create pool "{name}" with {pgs} PGs: {stderr}'
# Set the size and minsize
retcode, stdout, stderr = common.run_os_command(
f"ceph osd pool set {name} size {copies}"
)
if retcode:
return False, f'ERROR: Failed to set pool "{name}" size of {copies}: {stderr}'
retcode, stdout, stderr = common.run_os_command(
f"ceph osd pool set {name} min_size {mincopies}"
)
if retcode:
return (
False,
f'ERROR: Failed to set pool "{name}" minimum size of {mincopies}: {stderr}',
) )
# 4. Add the new pool to Zookeeper # Enable RBD application
retcode, stdout, stderr = common.run_os_command(
f"ceph osd pool application enable {name} rbd"
)
if retcode:
return (
False,
f'ERROR: Failed to enable RBD application on pool "{name}" : {stderr}',
)
# Add the new pool to Zookeeper
zkhandler.write( zkhandler.write(
[ [
(("pool", name), ""), (("pool", name), ""),
(("pool.pgs", name), pgs), (("pool.pgs", name), pgs),
(("pool.tier", name), tier),
(("pool.stats", name), "{}"), (("pool.stats", name), "{}"),
(("volume", name), ""), (("volume", name), ""),
(("snapshot", name), ""), (("snapshot", name), ""),
] ]
) )
return True, 'Created RBD pool "{}" with {} PGs'.format(name, pgs) return True, f'Created RBD pool "{name}" with {pgs} PGs'
def remove_pool(zkhandler, name): def remove_pool(zkhandler, name):

View File

@ -0,0 +1 @@
{"version": "7", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}

View File

@ -540,7 +540,7 @@ class ZKHandler(object):
# #
class ZKSchema(object): class ZKSchema(object):
# Current version # Current version
_version = 6 _version = 7
# Root for doing nested keys # Root for doing nested keys
_schema_root = "" _schema_root = ""
@ -703,7 +703,12 @@ class ZKSchema(object):
"stats": "/stats", "stats": "/stats",
}, },
# The schema of an individual pool entry (/ceph/pools/{pool_name}) # The schema of an individual pool entry (/ceph/pools/{pool_name})
"pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, # The root key "pool": {
"name": "",
"pgs": "/pgs",
"tier": "/tier",
"stats": "/stats",
}, # The root key
# The schema of an individual volume entry (/ceph/volumes/{pool_name}/{volume_name}) # The schema of an individual volume entry (/ceph/volumes/{pool_name}/{volume_name})
"volume": {"name": "", "stats": "/stats"}, # The root key "volume": {"name": "", "stats": "/stats"}, # The root key
# The schema of an individual snapshot entry (/ceph/volumes/{pool_name}/{volume_name}/{snapshot_name}) # The schema of an individual snapshot entry (/ceph/volumes/{pool_name}/{volume_name}/{snapshot_name})
@ -938,8 +943,13 @@ class ZKSchema(object):
kpath = f"{elem}.{ikey}" kpath = f"{elem}.{ikey}"
# Validate that the key exists for that child # Validate that the key exists for that child
if not zkhandler.zk_conn.exists(self.path(kpath, child)): if not zkhandler.zk_conn.exists(self.path(kpath, child)):
if elem == "pool" and ikey == "tier":
default_data = "default"
else:
default_data = ""
zkhandler.zk_conn.create( zkhandler.zk_conn.create(
self.path(kpath, child), "".encode(zkhandler.encoding) self.path(kpath, child),
default_data.encode(zkhandler.encoding),
) )
# Continue for child keys under network (reservation, acl) # Continue for child keys under network (reservation, acl)

View File

@ -729,6 +729,10 @@
}, },
"type": "object" "type": "object"
}, },
"tier": {
"description": "The device class/tier of the pool",
"type": "string"
},
"volume_count": { "volume_count": {
"description": "The number of volumes in the pool", "description": "The number of volumes in the pool",
"type": "integer" "type": "integer"
@ -5272,6 +5276,12 @@
"name": "replcfg", "name": "replcfg",
"required": true, "required": true,
"type": "string" "type": "string"
},
{
"description": "The device tier for the pool (hdd, ssd, nvme, or default)",
"in": "query",
"name": "tier",
"required": false
} }
], ],
"responses": { "responses": {
@ -5368,6 +5378,12 @@
"name": "replcfg", "name": "replcfg",
"required": true, "required": true,
"type": "string" "type": "string"
},
{
"description": "The device tier for the pool (hdd, ssd, nvme, or default)",
"in": "query",
"name": "tier",
"required": false
} }
], ],
"responses": { "responses": {