diff --git a/api-daemon/migrations/versions/3bc6117ea44d_pvc_version_0_7.py b/api-daemon/migrations/versions/3bc6117ea44d_pvc_version_0_7.py new file mode 100644 index 00000000..2273eed9 --- /dev/null +++ b/api-daemon/migrations/versions/3bc6117ea44d_pvc_version_0_7.py @@ -0,0 +1,33 @@ +"""PVC version 0.7 + +Revision ID: 3bc6117ea44d +Revises: 88c8514684f7 +Create Date: 2020-08-24 14:34:36.919308 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '3bc6117ea44d' +down_revision = '88c8514684f7' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('storage_benchmarks', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('job', sa.Text(), nullable=False), + sa.Column('result', sa.Text(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('storage_benchmarks') + # ### end Alembic commands ### diff --git a/api-daemon/pvcapid/benchmark.py b/api-daemon/pvcapid/benchmark.py new file mode 100755 index 00000000..03e1672f --- /dev/null +++ b/api-daemon/pvcapid/benchmark.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python3 + +# benchmark.py - PVC API Benchmark functions +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018-2020 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import flask +import json +import psycopg2 +import psycopg2.extras +import os +import re +import time +import shlex +import subprocess + +from distutils.util import strtobool as dustrtobool + +import daemon_lib.common as pvc_common +import daemon_lib.node as pvc_node +import daemon_lib.ceph as pvc_ceph + +import pvcapid.libvirt_schema as libvirt_schema + +from pvcapid.ova import list_ova + +def strtobool(stringv): + if stringv is None: + return False + if isinstance(stringv, bool): + return bool(stringv) + try: + return bool(dustrtobool(stringv)) + except: + return False + +# +# Exceptions (used by Celery tasks) +# +class BenchmarkError(Exception): + """ + An exception that results from the Benchmark job. + """ + def __init__(self, message, cur_time=None, db_conn=None, db_cur=None, zk_conn=None): + self.message = message + if cur_time is not None: + # Clean up our dangling result + query = "DELETE FROM storage_benchmarks WHERE job = %s;" + args = (cur_time,) + db_cur.execute(query, args) + db_conn.commit() + # Close the database connections cleanly + close_database(db_conn, db_cur) + pvc_common.stopZKConnection(zk_conn) + + def __str__(self): + return str(self.message) + +# +# Common functions +# + +# Database connections +def open_database(config): + conn = psycopg2.connect( + host=config['database_host'], + port=config['database_port'], + dbname=config['database_name'], + user=config['database_user'], + password=config['database_password'] + ) + cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + return conn, cur + +def close_database(conn, cur, failed=False): + if not failed: + conn.commit() + cur.close() + conn.close() + +def list_benchmarks(job=None): + if limit is not None: + query = "SELECT * FROM {} WHERE job = %s;".format('storage_benchmarks') + args = (job, ) + else: + query = "SELECT * FROM {} ORDER BY id DESC;".format('storage_benchmarks') + args = () + + conn, cur = open_database(config) + cur.execute(query, args) + orig_data = cur.fetchall() + data = list() + for benchmark in orig_data: + benchmark_data = dict() + benchmark_data['id'] = benchmark['id'] + benchmark_data['job'] = benchmark['job'] + benchmark_data['benchmark_result'] = benchmark['result'] + # Append the new data to our actual output structure + data.append(benchmark_data) + close_database(conn, cur) + if data: + return data, 200 + else: + return { 'message': 'No benchmark found.' }, 404 + +def run_benchmark(self, pool): + # Runtime imports + import time + import json + from datetime import datetime + + time.sleep(2) + + cur_time = datetime.now().isoformat(timespec='seconds') + + print("Starting storage benchmark '{}' on pool '{}'".format(cur_time, pool)) + + # Phase 0 - connect to databases + try: + db_conn, db_cur = open_database(config) + except: + print('FATAL - failed to connect to Postgres') + raise Exception + + try: + zk_conn = pvc_common.startZKConnection(config['coordinators']) + except: + print('FATAL - failed to connect to Zookeeper') + raise Exception + + + print("Storing running status for job '{}' in database".format(cur_time)) + try: + query = "INSERT INTO storage_benchmarks (job, result) VALUES (%s, %s);" + args = (cur_time, "Running",) + db_cur.execute(query, args) + db_conn.commit() + except Exception as e: + raise BenchmarkError("Failed to store running status: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn) + + # Phase 1 - volume preparation + self.update_state(state='RUNNING', meta={'current': 1, 'total': 3, 'status': 'Creating benchmark volume'}) + time.sleep(1) + + volume = 'pvcbenchmark' + + # Create the RBD volume + retcode, retmsg = pvc_ceph.add_volume(zk_conn, pool, volume, "4G") + if not retcode: + raise BenchmarkError('Failed to create volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn) + else: + print(retmsg) + + # Phase 2 - benchmark run + self.update_state(state='RUNNING', meta={'current': 2, 'total': 3, 'status': 'Running fio benchmarks on volume'}) + time.sleep(1) + + # We run a total of 8 tests, to give a generalized idea of performance on the cluster: + # 1. A sequential read test of 4GB with a 4M block size + # 2. A sequential write test of 4GB with a 4M block size + # 3. A random read test of 4GB with a 4M block size + # 4. A random write test of 4GB with a 4M block size + # 5. A random read test of 4GB with a 256k block size + # 6. A random write test of 4GB with a 256k block size + # 7. A random read test of 4GB with a 4k block size + # 8. A random write test of 4GB with a 4k block size + # Taken together, these 8 results should give a very good indication of the overall storage performance + # for a variety of workloads. + test_matrix = { + 'seq_read': { + 'direction': 'read', + 'bs': '4M', + 'rw': 'read' + }, + 'seq_write': { + 'direction': 'write', + 'bs': '4M', + 'rw': 'write' + }, + 'rand_read_4M': { + 'direction': 'read', + 'bs': '4M', + 'rw': 'randread' + }, + 'rand_write_4M': { + 'direction': 'write', + 'bs': '4M', + 'rw': 'randwrite' + }, + 'rand_read_256K': { + 'direction': 'read', + 'bs': '256K', + 'rw': 'randread' + }, + 'rand_write_256K': { + 'direction': 'write', + 'bs': '256K', + 'rw': 'randwrite' + }, + 'rand_read_4K': { + 'direction': 'read', + 'bs': '4K', + 'rw': 'randread' + }, + 'rand_write_4K': { + 'direction': 'write', + 'bs': '4K', + 'rw': 'randwrite' + } + } + parsed_results = dict() + for test in test_matrix: + print("Running test '{}'".format(test)) + fio_cmd = """ + fio \ + --output-format=terse \ + --terse-version=5 \ + --ioengine=rbd \ + --pool={pool} \ + --rbdname={volume} \ + --direct=1 \ + --randrepeat=1 \ + --gtod_reduce=1 \ + --iodepth=64 \ + --size=128M \ + --name={test} \ + --bs={bs} \ + --readwrite={rw} + """.format( + pool=pool, + volume=volume, + test=test, + bs=test_matrix[test]['bs'], + rw=test_matrix[test]['rw'] + ) + + retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd) + if retcode: + raise BenchmarkError("Failed to run fio test: {}".format(stderr), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn) + + # Parse the terse results to avoid storing tons of junk + # Reference: https://fio.readthedocs.io/en/latest/fio_doc.html#terse-output + # This is written out broken up because the man page didn't bother to do this, and I'm putting it here for posterity. + # Example Read test (line breaks to match man ref): + # I 5;fio-3.12;test;0;0; (5) [0, 1, 2, 3, 4] + # R 8388608;2966268;724;2828; (4) [5, 6, 7, 8] + # 0;0;0.000000;0.000000; (4) [9, 10, 11, 12] + # 0;0;0.000000;0.000000; (4) [13, 14, 15, 16] + # 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,33, 34, 35, 36] + # 0;0;0.000000;0.000000; (4) [37, 38, 39, 40] + # 2842624;3153920;100.000000%;2967142.400000;127226.797479;5; (6) [41, 42, 43, 44, 45, 46] + # 694;770;724.400000;31.061230;5; (5) [47, 48, 49, 50, 51] + # W 0;0;0;0; (4) [52, 53, 54, 55] + # 0;0;0.000000;0.000000; (4) [56, 57, 58, 59] + # 0;0;0.000000;0.000000; (4) [60, 61, 62, 63] + # 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [64, 65, 66, 67, 68. 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83] + # 0;0;0.000000;0.000000; (4) [84, 85, 86, 87] + # 0;0;0.000000%;0.000000;0.000000;0; (6) [88, 89, 90, 91, 92, 93] + # 0;0;0.000000;0.000000;0; (5) [94, 95, 96, 97, 98] + # T 0;0;0;0; (4) [99, 100, 101, 102] + # 0;0;0.000000;0.000000; (4) [103, 104, 105, 106] + # 0;0;0.000000;0.000000; (4) [107, 108, 109, 110] + # 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130] + # 0;0;0.000000;0.000000; (4) [131, 132, 133, 134] + # 0;0;0.000000%;0.000000;0.000000;0; (6) [135, 136, 137, 138, 139, 140] + # 0;0;0.000000;0.000000;0; (5) [141, 142, 143, 144, 145] + # C 0.495225%;0.000000%;2083;0;13; (5) [146, 147, 148, 149, 150] + # D 0.1%;0.1%;0.2%;0.4%;0.8%;1.6%;96.9%; (7) [151, 152, 153, 154, 155, 156, 157] + # U 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (10) [158, 159, 160, 161, 162, 163, 164, 165, 166, 167] + # M 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (12) [168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178. 179] + # B dm-0;0;110;0;0;0;4;4;0.15%; (9) [180, 181, 182, 183, 184, 185, 186, 187, 188] + # slaves;0;118;0;28;0;23;0;0.00%; (9) [189, 190, 191, 192, 193, 194, 195, 196, 197] + # sde;0;118;0;28;0;23;0;0.00% (9) [198, 199, 200, 201, 202, 203, 204, 205, 206] + # Example Write test: + # I 5;fio-3.12;test;0;0; (5) + # R 0;0;0;0; (4) + # 0;0;0.000000;0.000000; (4) + # 0;0;0.000000;0.000000; (4) + # 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) + # 0;0;0.000000;0.000000; (4) + # 0;0;0.000000%;0.000000;0.000000;0; (6) + # 0;0;0.000000;0.000000;0; (5) + # W 8388608;1137438;277;7375; (4) + # 0;0;0.000000;0.000000; (4) + # 0;0;0.000000;0.000000; (4) + # 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) + # 0;0;0.000000;0.000000; (4) + # 704512;1400832;99.029573%;1126400.000000;175720.860374;14; (6) + # 172;342;275.000000;42.900601;14; (5) + # T 0;0;0;0; (4) + # 0;0;0.000000;0.000000; (4) + # 0;0;0.000000;0.000000; (4) + # 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) + # 0;0;0.000000;0.000000; (4) + # 0;0;0.000000%;0.000000;0.000000;0; (6) + # 0;0;0.000000;0.000000;0; (5) + # C 12.950909%;1.912124%;746;0;95883; (5) + # D 0.1%;0.1%;0.2%;0.4%;0.8%;1.6%;96.9%; (7) + # U 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (10) + # M 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (12) + # B dm-0;0;196;0;0;0;12;12;0.16%; (9) + # slaves;0;207;0;95;0;39;16;0.21%; (9) + # sde;0;207;0;95;0;39;16;0.21% (9) + results = stdout.split(';') + if test_matrix[test]['direction'] == 'read': + # Stats + # 5: Total IO (KiB) + # 6: bandwidth (KiB/sec) + # 7: IOPS + # 8: runtime (msec) + # Total latency + # 37: min + # 38: max + # 39: mean + # 40: stdev + # Bandwidth + # 41: min + # 42: max + # 43: agg % of total + # 44: mean + # 45: stdev + # 46: # samples + # IOPS + # 47: min + # 48: max + # 49: mean + # 50: stdev + # 51: # samples + # CPU + # 146: user + # 147: system + # 148: ctx switches + # 149: maj faults + # 150: min faults + parsed_results[test] = { + "overall": { + "iosize": results[5], + "bandwidth": results[6], + "iops": results[7], + "runtime": results[8] + }, + "latency": { + "min": results[37], + "max": results[38], + "mean": results[39], + "stdev": results[40] + }, + "bandwidth": { + "min": results[41], + "max": results[42], + "aggrpct": results[43], + "mean": results[44], + "stdev": results[45], + "numsamples": results[46], + }, + "iops": { + "min": results[47], + "max": results[48], + "mean": results[49], + "stdev": results[50], + "numsamples": results[51] + }, + "cpu": { + "user": results[146], + "system": results[147], + "ctxsw": results[148], + "majfault": results[149], + "minfault": results[150] + } + } + + if test_matrix[test]['direction'] == 'write': + # Stats + # 52: Total IO (KiB) + # 53: bandwidth (KiB/sec) + # 54: IOPS + # 55: runtime (msec) + # Total latency + # 84: min + # 85: max + # 86: mean + # 87: stdev + # Bandwidth + # 88: min + # 89: max + # 90: agg % of total + # 91: mean + # 92: stdev + # 93: # samples + # IOPS + # 94: min + # 95: max + # 96: mean + # 97: stdev + # 98: # samples + # CPU + # 146: user + # 147: system + # 148: ctx switches + # 149: maj faults + # 150: min faults + parsed_results[test] = { + "overall": { + "iosize": results[52], + "bandwidth": results[53], + "iops": results[54], + "runtime": results[55] + }, + "latency": { + "min": results[84], + "max": results[85], + "mean": results[86], + "stdev": results[87] + }, + "bandwidth": { + "min": results[88], + "max": results[89], + "aggrpct": results[90], + "mean": results[91], + "stdev": results[92], + "numsamples": results[93], + }, + "iops": { + "min": results[94], + "max": results[95], + "mean": results[96], + "stdev": results[97], + "numsamples": results[98] + }, + "cpu": { + "user": results[146], + "system": results[147], + "ctxsw": results[148], + "majfault": results[149], + "minfault": results[150] + } + } + print(parsed_results) + + # Phase 3 - cleanup + self.update_state(state='RUNNING', meta={'current': 3, 'total': 3, 'status': 'Cleaning up and storing results'}) + time.sleep(1) + + # Remove the RBD volume + retcode, retmsg = pvc_ceph.remove_volume(zk_conn, pool, volume) + if not retcode: + raise BenchmarkError('Failed to remove volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn) + else: + print(retmsg) + + print("Storing result of tests for job '{}' in database".format(cur_time)) + try: + query = "UPDATE storage_benchmarks SET result = %s WHERE job = %s;" + args = (json.dumps(parsed_results), cur_time) + db_cur.execute(query, args) + db_conn.commit() + except Exception as e: + raise BenchmarkError("Failed to store test results: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn) + + close_database(db_conn, db_cur) + pvc_common.stopZKConnection(zk_conn) + return { 'status': "Storage benchmark '{}' completed successfully.", 'current': 3, 'total': 3 } diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index c9e083f1..d1852818 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -39,6 +39,7 @@ from celery.task.control import inspect import pvcapid.helper as api_helper import pvcapid.provisioner as api_provisioner +import pvcapid.benchmark as api_benchmark import pvcapid.ova as api_ova from flask_sqlalchemy import SQLAlchemy @@ -107,6 +108,8 @@ try: api_helper.config = config # Set the config object in the api_provisioner namespace api_provisioner.config = config + # Set the config object in the api_benchmark namespace + api_benchmark.config = config # Set the config object in the api_ova namespace api_ova.config = config except Exception as e: @@ -198,6 +201,10 @@ def Authenticator(function): def create_vm(self, vm_name, profile_name, define_vm=True, start_vm=True, script_run_args=[]): return api_provisioner.create_vm(self, vm_name, profile_name, define_vm=define_vm, start_vm=start_vm, script_run_args=script_run_args) +@celery.task(bind=True) +def run_benchmark(self, pool): + return api_benchmark.run_benchmark(self, pool) + ########################################################## # API Root/Authentication @@ -2600,6 +2607,64 @@ class API_Storage_Ceph_Utilization(Resource): return api_helper.ceph_util() api.add_resource(API_Storage_Ceph_Utilization, '/storage/ceph/utilization') +# /storage/ceph/benchmark +class API_Storage_Ceph_Benchmark(Resource): + @RequestParser([ + { 'name': 'job' } + ]) + @Authenticator + def get(self, reqargs): + """ + List results from benchmark jobs + --- + tags: + - storage / ceph + responses: + 200: + description: OK + schema: + type: object + properties: + tbd: + type: object + description: TBD + """ + return api_benchmark.list_benchmarks(reqargs.get('job', None)) + + @RequestParser([ + { 'name': 'pool', 'required': True, 'helpmsg': "A valid pool must be specified." }, + ]) + @Authenticator + def post(self, reqargs): + """ + Execute a storage benchmark against a storage pool + --- + tags: + - storage / ceph + parameters: + - in: query + name: pool + type: string + required: true + description: The PVC storage pool to benchmark + responses: + 200: + description: OK + schema: + type: string + description: The job ID of the benchmark + """ + # Verify that the pool is valid + _list, code = api_helper.ceph_pool_list(reqargs.get('pool', None), is_fuzzy=False) + if code != 200: + return { 'message': 'Pool "{}" is not valid.'.format(reqargs.get('pool')) }, 400 + + task = run_benchmark.delay( + reqargs.get('pool', None) + ) + return { "task_id": task.id }, 202, { 'Location': Api.url_for(api, API_Storage_Ceph_Benchmark, task_id=task.id) } +api.add_resource(API_Storage_Ceph_Benchmark, '/storage/ceph/benchmark') + # /storage/ceph/option class API_Storage_Ceph_Option(Resource): @RequestParser([ diff --git a/api-daemon/pvcapid/models.py b/api-daemon/pvcapid/models.py index 2c6349e1..008090ec 100755 --- a/api-daemon/pvcapid/models.py +++ b/api-daemon/pvcapid/models.py @@ -213,3 +213,17 @@ class DBProfile(db.Model): def __repr__(self): return ''.format(self.id) + +class DBStorageBenchmarks(db.Model): + __tablename__ = 'storage_benchmarks' + + id = db.Column(db.Integer, primary_key=True) + job = db.Column(db.Text, nullable=False) + result = db.Column(db.Text, nullable=False) + + def __init__(self, job, result): + self.job = job + self.result = result + + def __repr__(self): + return ''.format(self.id) diff --git a/build-and-deploy.sh b/build-and-deploy.sh index 65eed608..f013e597 100755 --- a/build-and-deploy.sh +++ b/build-and-deploy.sh @@ -32,9 +32,9 @@ for HOST in ${HOSTS[@]}; do ssh $HOST $SUDO dpkg -i /tmp/pvc/{pvc-client-cli,pvc-daemon-common,pvc-daemon-api,pvc-daemon-node}*.deb ssh $HOST rm -rf /tmp/pvc echo "Restarting PVC node daemon..." - ssh $HOST $SUDO systemctl restart pvcnoded - echo "****" - echo "Waiting 15s for host ${HOST} to stabilize" - echo "****" - sleep 15 + ssh $HOST $SUDO systemctl restart pvcapid +# echo "****" +# echo "Waiting 15s for host ${HOST} to stabilize" +# echo "****" +# sleep 15 done diff --git a/client-cli/cli_lib/ceph.py b/client-cli/cli_lib/ceph.py index c81f52e6..bdd9ff4e 100644 --- a/client-cli/cli_lib/ceph.py +++ b/client-cli/cli_lib/ceph.py @@ -1249,3 +1249,198 @@ def format_list_snapshot(snapshot_list): ) return '\n'.join(sorted(snapshot_list_output)) + +# +# Benchmark functions +# +def ceph_benchmark_run(config, pool): + """ + Run a storage benchmark against {pool} + + API endpoint: POST /api/v1/storage/ceph/benchmark + API arguments: pool={pool} + API schema: {message} + """ + params = { + 'pool': pool + } + response = call_api(config, 'post', '/storage/ceph/benchmark', params=params) + + if response.status_code == 202: + retvalue = True + retdata = 'Task ID: {}'.format(response.json()['task_id']) + else: + retvalue = False + retdata = response.json().get('message', '') + + return retvalue, retdata + +def ceph_benchmark_list(config, job): + """ + View results of one or more previous benchmark runs + + API endpoint: GET /api/v1/storage/ceph/benchmark + API arguments: job={job} + API schema: {results} + """ + if job is not None: + params = { + 'job': job + } + else: + params = {} + + response = call_api(config, 'get', '/storage/ceph/benchmark', params=params) + + if response.status_code == 200: + retvalue = True + retdata = response.json() + else: + retvalue = False + retdata = response.json() + + return retvalue, retdata + +def format_list_benchmark(benchmark_information, detail=False): + if detail: + return format_list_benchmark_detailed(benchmark_information) + + benchmark_list_output = [] + + benchmark_id_length = 3 + benchmark_job_length = 20 + benchmark_bandwidth_length = dict() + benchmark_iops_length = dict() + + for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]: + benchmark_bandwidth_length[test] = 7 + benchmark_iops_length[test] = 6 + + # For this output, we're only showing the Sequential (seq_read and seq_write) and 4k Random (rand_read_4K and rand_write_4K) results since we're showing them for each test result. + +# print(benchmark_information) + + for benchmark in benchmark_information: + benchmark_id = benchmark['id'] + _benchmark_id_length = len(str(benchmark_id)) + if _benchmark_id_length > benchmark_id_length: + benchmark_id_length = _benchmark_id_length + + benchmark_job = benchmark['job'] + _benchmark_job_length = len(benchmark_job) + if _benchmark_job_length > benchmark_job_length: + benchmark_job_length = _benchmark_job_length + + if benchmark['benchmark_result'] == 'Running': + continue + benchmark_data = json.loads(benchmark['benchmark_result']) + + benchmark_bandwidth = dict() + benchmark_iops = dict() + for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]: + benchmark_bandwidth[test] = format_bytes_tohuman(int(benchmark_data[test]['overall']['bandwidth']) * 1024) + benchmark_iops[test] = format_ops_tohuman(int(benchmark_data[test]['overall']['iops'])) + + _benchmark_bandwidth_length = len(benchmark_bandwidth[test]) + 1 + if _benchmark_bandwidth_length > benchmark_bandwidth_length[test]: + benchmark_bandwidth_length[test] = _benchmark_bandwidth_length + + _benchmark_iops_length = len(benchmark_iops[test]) + 1 + if _benchmark_iops_length > benchmark_bandwidth_length[test]: + benchmark_iops_length[test] = _benchmark_iops_length + + # Format the output header line 1 + benchmark_list_output.append('{bold}\ +{benchmark_id: <{benchmark_id_length}} \ +{benchmark_job: <{benchmark_job_length}} \ + {seq_header: <{seq_header_length}} \ +{rand_header: <{rand_header_length}} \ +{end_bold}'.format( + bold=ansiprint.bold(), + end_bold=ansiprint.end(), + benchmark_id_length=benchmark_id_length, + benchmark_job_length=benchmark_job_length, + seq_header_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'] + 2, + rand_header_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'] + 2, + benchmark_id='ID', + benchmark_job='Benchmark Job', + seq_header='Sequential (4M blocks):', + rand_header='Random (4K blocks):' + ) + ) + + benchmark_list_output.append('{bold}\ +{benchmark_id: <{benchmark_id_length}} \ +{benchmark_job: <{benchmark_job_length}} \ + {seq_benchmark_bandwidth: <{seq_benchmark_bandwidth_length}} \ +{seq_benchmark_iops: <{seq_benchmark_iops_length}} \ +{rand_benchmark_bandwidth: <{rand_benchmark_bandwidth_length}} \ +{rand_benchmark_iops: <{rand_benchmark_iops_length}} \ +{end_bold}'.format( + bold=ansiprint.bold(), + end_bold=ansiprint.end(), + benchmark_id_length=benchmark_id_length, + benchmark_job_length=benchmark_job_length, + seq_benchmark_bandwidth_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + 1, + seq_benchmark_iops_length=benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'], + rand_benchmark_bandwidth_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + 1, + rand_benchmark_iops_length=benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'], + benchmark_id='', + benchmark_job='', + seq_benchmark_bandwidth='Bandwith (R/W)', + seq_benchmark_iops='IOPS (R/W)', + rand_benchmark_bandwidth='Bandwith (R/W)', + rand_benchmark_iops='IOPS (R/W)' + ) + ) + + for benchmark in benchmark_information: + benchmark_id = benchmark['id'] + benchmark_job = benchmark['job'] + + if benchmark['benchmark_result'] == 'Running': + seq_benchmark_bandwidth = 'Running' + seq_benchmark_iops = 'Running' + rand_benchmark_bandwidth = 'Running' + rand_benchmark_iops = 'Running' + else: + benchmark_bandwidth = dict() + benchmark_iops = dict() + for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]: + benchmark_data = json.loads(benchmark['benchmark_result']) + benchmark_bandwidth[test] = format_bytes_tohuman(int(benchmark_data[test]['overall']['bandwidth']) * 1024) + benchmark_iops[test] = format_ops_tohuman(int(benchmark_data[test]['overall']['iops'])) + + seq_benchmark_bandwidth = "{}/{}".format(benchmark_bandwidth['seq_read'], benchmark_bandwidth['seq_write']) + seq_benchmark_iops = "{}/{}".format(benchmark_iops['seq_read'], benchmark_iops['seq_write']) + rand_benchmark_bandwidth = "{}/{}".format(benchmark_bandwidth['rand_read_4K'], benchmark_bandwidth['rand_write_4K']) + rand_benchmark_iops = "{}/{}".format(benchmark_iops['rand_read_4K'], benchmark_iops['rand_write_4K']) + + + benchmark_list_output.append('{bold}\ +{benchmark_id: <{benchmark_id_length}} \ +{benchmark_job: <{benchmark_job_length}} \ + {seq_benchmark_bandwidth: <{seq_benchmark_bandwidth_length}} \ +{seq_benchmark_iops: <{seq_benchmark_iops_length}} \ +{rand_benchmark_bandwidth: <{rand_benchmark_bandwidth_length}} \ +{rand_benchmark_iops: <{rand_benchmark_iops_length}} \ +{end_bold}'.format( + bold='', + end_bold='', + benchmark_id_length=benchmark_id_length, + benchmark_job_length=benchmark_job_length, + seq_benchmark_bandwidth_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + 1, + seq_benchmark_iops_length=benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'], + rand_benchmark_bandwidth_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + 1, + rand_benchmark_iops_length=benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'], + benchmark_id=benchmark_id, + benchmark_job=benchmark_job, + seq_benchmark_bandwidth=seq_benchmark_bandwidth, + seq_benchmark_iops=seq_benchmark_iops, + rand_benchmark_bandwidth=rand_benchmark_bandwidth, + rand_benchmark_iops=rand_benchmark_iops + ) + ) + + return '\n'.join(benchmark_list_output) + diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 50a668ea..78fb26f9 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -1576,6 +1576,49 @@ def ceph_util(): retdata = pvc_ceph.format_raw_output(retdata) cleanup(retcode, retdata) +############################################################################### +# pvc storage benchmark +############################################################################### +@click.group(name='benchmark', short_help='Run or view cluster storage benchmarks.') +@cluster_req +def ceph_benchmark(): + """ + Run or view benchmarks of the storage cluster. + """ + +############################################################################### +# pvc storage benchmark run +############################################################################### +@click.command(name='run', short_help='Run a storage benchmark.') +@click.argument( + 'pool' +) +@cluster_req +def ceph_benchmark_run(pool): + """ + Run a storage benchmark on POOL in the background. + """ + retcode, retmsg = pvc_ceph.ceph_benchmark_run(config, pool) + cleanup(retcode, retmsg) + +############################################################################### +# pvc storage benchmark list +############################################################################### +@click.command(name='list', short_help='List storage benchmark results.') +@click.argument( + 'job', default=None, required=False +) +@cluster_req +def ceph_benchmark_list(job): + """ + List all Ceph storage benchmarks; optionally only match JOB. + """ + + retcode, retdata = pvc_ceph.ceph_benchmark_list(config, job) + if retcode: + retdata = pvc_ceph.format_list_benchmark(retdata) + cleanup(retcode, retdata) + ############################################################################### # pvc storage osd ############################################################################### @@ -3619,6 +3662,9 @@ net_acl.add_command(net_acl_add) net_acl.add_command(net_acl_remove) net_acl.add_command(net_acl_list) +ceph_benchmark.add_command(ceph_benchmark_run) +ceph_benchmark.add_command(ceph_benchmark_list) + ceph_osd.add_command(ceph_osd_add) ceph_osd.add_command(ceph_osd_remove) ceph_osd.add_command(ceph_osd_in) @@ -3647,6 +3693,7 @@ ceph_volume_snapshot.add_command(ceph_volume_snapshot_list) cli_storage.add_command(ceph_status) cli_storage.add_command(ceph_util) +cli_storage.add_command(ceph_benchmark) cli_storage.add_command(ceph_osd) cli_storage.add_command(ceph_pool) cli_storage.add_command(ceph_volume) diff --git a/debian/control b/debian/control index 8b707d17..ec40221b 100644 --- a/debian/control +++ b/debian/control @@ -17,7 +17,7 @@ Description: Parallel Virtual Cluster node daemon (Python 3) Package: pvc-daemon-api Architecture: all -Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-gevent, python3-celery, python-celery-common, python3-distutils, redis, python3-redis, python3-lxml, python3-flask-migrate, python3-flask-script +Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-gevent, python3-celery, python-celery-common, python3-distutils, redis, python3-redis, python3-lxml, python3-flask-migrate, python3-flask-script, fio Description: Parallel Virtual Cluster API daemon (Python 3) A KVM/Zookeeper/Ceph-based VM and private cloud manager .