Compare commits

..

No commits in common. "9f47da67775ac024f8f84f7e5b36217f0282b6e4" and "fc550468121c78dfd9444a398c5ae32a74743bf4" have entirely different histories.

3 changed files with 43 additions and 99 deletions

View File

@ -597,7 +597,7 @@ class API_Status(Resource):
Set the cluster maintenance mode
---
tags:
- root
- node
parameters:
- in: query
name: state
@ -622,45 +622,6 @@ class API_Status(Resource):
api.add_resource(API_Status, "/status")
# /status/primary_node
class API_Status_Primary(Resource):
def get(self):
"""
Return the name of the current primary node.
---
tags:
- root
responses:
200:
description: OK
schema:
type: object
properties:
primary_node:
type: string
description: The name of the current primary node
204:
description: No content
schema:
type: object
properties:
primary_node:
type: string
description: An empty response; there is not currently a primary node, try again later
"""
primary_node = get_primary_node()
if primary_node is None:
retdata = None
retcode = 204
else:
retdata = {"primary_node": primary_node}
retcode = 200
return retdata, retcode
api.add_resource(API_Status_Primary, "/status/primary_node")
# /metrics
class API_Metrics(Resource):
def get(self):
@ -877,39 +838,6 @@ class API_Faults(Resource):
---
tags:
- faults
definitions:
- schema:
type: object
id: fault
properties:
id:
type: string
description: The ID of the fault
example: "10ae144b78b4cc5fdf09e2ebbac51235"
first_reported:
type: date
description: The first time the fault was reported
example: "2023-12-01 16:47:59.849742"
last_reported:
type: date
description: The last time the fault was reported
example: "2023-12-01 17:39:45.188398"
acknowledged_at:
type: date
description: The time the fault was acknowledged, or empty if not acknowledged
example: "2023-12-01 17:50:00.000000"
status:
type: string
description: The current state of the fault, either "new" or "ack" (acknowledged)
example: "new"
health_delta:
type: integer
description: The health delta (amount it reduces cluster health from 100%) of the fault
example: 25
message:
type: string
description: The textual description of the fault
example: "Node hv1 was at 40% (psur@-10%, psql@-50%) <= 50% health"
parameters:
- in: query
name: sort_key
@ -929,7 +857,37 @@ class API_Faults(Resource):
schema:
type: array
items:
$ref: '#/definitions/fault'
type: object
id: fault
properties:
id:
type: string
description: The ID of the fault
example: "10ae144b78b4cc5fdf09e2ebbac51235"
first_reported:
type: date
description: The first time the fault was reported
example: "2023-12-01 16:47:59.849742"
last_reported:
type: date
description: The last time the fault was reported
example: "2023-12-01 17:39:45.188398"
acknowledged_at:
type: date
description: The time the fault was acknowledged, or empty if not acknowledged
example: "2023-12-01 17:50:00.000000"
status:
type: string
description: The current state of the fault, either "new" or "ack" (acknowledged)
example: "new"
health_delta:
type: integer
description: The health delta (amount it reduces cluster health from 100%) of the fault
example: 25
message:
type: string
description: The textual description of the fault
example: "Node hv1 was at 40% (psur@-10%, psql@-50%) <= 50% health"
"""
return api_helper.fault_list(sort_key=reqargs.get("sort_key", "last_reported"))
@ -990,6 +948,8 @@ class API_Faults_Element(Resource):
schema:
type: array
items:
type: object
id: fault
$ref: '#/definitions/fault'
"""
return api_helper.fault_list(limit=fault_id)
@ -1665,7 +1625,7 @@ class API_VM_Root(Resource):
descrpition: Unix timestamp of the snapshot
age:
type: string
description: Human-readable age of the snapshot in the largest viable unit (seconds, minutes, hours, days)
description: Human-readable age of the snapshot in the largest viable unit: seconds, minutes, hours, days
rbd_snapshots:
type: array
items:

View File

@ -158,10 +158,9 @@ def call_api(
if response.status_code in retry_on_code:
failed = True
continue
break
except requests.exceptions.ConnectionError:
failed = True
continue
pass
if failed:
error = f"Code {response.status_code}" if response else "Timeout"
raise requests.exceptions.ConnectionError(

View File

@ -1015,27 +1015,23 @@ def add_snapshot(zkhandler, pool, volume, name, zk_only=False):
),
)
# 2. Get snapshot stats
retcode, stdout, stderr = common.run_os_command(
"rbd info --format json {}/{}@{}".format(pool, volume, name)
)
snapstats = stdout
# 3. Add the snapshot to Zookeeper
# 2. Add the snapshot to Zookeeper
zkhandler.write(
[
(("snapshot", f"{pool}/{volume}/{name}"), ""),
(("snapshot.stats", f"{pool}/{volume}/{name}"), snapstats),
(("snapshot.stats", f"{pool}/{volume}/{name}"), "{}"),
]
)
# 4. Update the count of snapshots on this volume
# 3. Update the count of snapshots on this volume
volume_stats_raw = zkhandler.read(("volume.stats", f"{pool}/{volume}"))
volume_stats = dict(json.loads(volume_stats_raw))
# Format the size to something nicer
volume_stats["snapshot_count"] = volume_stats["snapshot_count"] + 1
volume_stats_raw = json.dumps(volume_stats)
zkhandler.write(
[
(("volume.stats", f"{pool}/{volume}"), json.dumps(volume_stats)),
(("volume.stats", f"{pool}/{volume}"), volume_stats_raw),
]
)
@ -1178,11 +1174,6 @@ def get_list_snapshot(zkhandler, target_pool, target_volume, limit=None, is_fuzz
continue
if target_volume and volume_name != target_volume:
continue
snapshot_stats = json.loads(
zkhandler.read(
("snapshot.stats", f"{pool_name}/{volume_name}/{snapshot_name}")
)
)
if limit:
try:
if re.fullmatch(limit, snapshot_name):
@ -1191,19 +1182,13 @@ def get_list_snapshot(zkhandler, target_pool, target_volume, limit=None, is_fuzz
"pool": pool_name,
"volume": volume_name,
"snapshot": snapshot_name,
"stats": snapshot_stats,
}
)
except Exception as e:
return False, "Regex Error: {}".format(e)
else:
snapshot_list.append(
{
"pool": pool_name,
"volume": volume_name,
"snapshot": snapshot_name,
"stats": snapshot_stats,
}
{"pool": pool_name, "volume": volume_name, "snapshot": snapshot_name}
)
return True, sorted(snapshot_list, key=lambda x: str(x["snapshot"]))