Add storage benchmarking to API

2020-08-24 14:57:52 -04:00
parent e4891831ce
commit 887e14a4e2
8 changed files with 838 additions and 6 deletions
--- a/api-daemon/migrations/versions/3bc6117ea44d_pvc_version_0_7.py
+++ b/api-daemon/migrations/versions/3bc6117ea44d_pvc_version_0_7.py
@ -0,0 +1,33 @@
+"""PVC version 0.7
+
+Revision ID: 3bc6117ea44d
+Revises: 88c8514684f7
+Create Date: 2020-08-24 14:34:36.919308
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '3bc6117ea44d'
+down_revision = '88c8514684f7'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('storage_benchmarks',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('job', sa.Text(), nullable=False),
+    sa.Column('result', sa.Text(), nullable=False),
+    sa.PrimaryKeyConstraint('id')
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('storage_benchmarks')
+    # ### end Alembic commands ###
--- a/api-daemon/pvcapid/benchmark.py
+++ b/api-daemon/pvcapid/benchmark.py
@ -0,0 +1,478 @@
+#!/usr/bin/env python3
+
+# benchmark.py - PVC API Benchmark functions
+# Part of the Parallel Virtual Cluster (PVC) system
+#
+#    Copyright (C) 2018-2020 Joshua M. Boniface <joshua@boniface.me>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+###############################################################################
+
+import flask
+import json
+import psycopg2
+import psycopg2.extras
+import os
+import re
+import time
+import shlex
+import subprocess
+
+from distutils.util import strtobool as dustrtobool
+
+import daemon_lib.common as pvc_common
+import daemon_lib.node as pvc_node
+import daemon_lib.ceph as pvc_ceph
+
+import pvcapid.libvirt_schema as libvirt_schema
+
+from pvcapid.ova import list_ova
+
+def strtobool(stringv):
+    if stringv is None:
+        return False
+    if isinstance(stringv, bool):
+        return bool(stringv)
+    try:
+        return bool(dustrtobool(stringv))
+    except:
+        return False
+
+#
+# Exceptions (used by Celery tasks)
+#
+class BenchmarkError(Exception):
+    """
+    An exception that results from the Benchmark job.
+    """
+    def __init__(self, message, cur_time=None, db_conn=None, db_cur=None, zk_conn=None):
+        self.message = message
+        if cur_time is not None:
+            # Clean up our dangling result
+            query = "DELETE FROM storage_benchmarks WHERE job = %s;"
+            args = (cur_time,)
+            db_cur.execute(query, args)
+            db_conn.commit()
+            # Close the database connections cleanly
+            close_database(db_conn, db_cur)
+            pvc_common.stopZKConnection(zk_conn)
+
+    def __str__(self):
+        return str(self.message)
+
+#
+# Common functions
+#
+
+# Database connections
+def open_database(config):
+    conn = psycopg2.connect(
+        host=config['database_host'],
+        port=config['database_port'],
+        dbname=config['database_name'],
+        user=config['database_user'],
+        password=config['database_password']
+    )
+    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+    return conn, cur
+
+def close_database(conn, cur, failed=False):
+    if not failed:
+        conn.commit()
+    cur.close()
+    conn.close()
+
+def list_benchmarks(job=None):
+    if limit is not None:
+        query = "SELECT * FROM {} WHERE job = %s;".format('storage_benchmarks')
+        args = (job, )
+    else:
+        query = "SELECT * FROM {} ORDER BY id DESC;".format('storage_benchmarks')
+        args = ()
+
+    conn, cur = open_database(config)
+    cur.execute(query, args)
+    orig_data = cur.fetchall()
+    data = list()
+    for benchmark in orig_data:
+        benchmark_data = dict()
+        benchmark_data['id'] = benchmark['id']
+        benchmark_data['job'] = benchmark['job']
+        benchmark_data['benchmark_result'] = benchmark['result']
+        # Append the new data to our actual output structure
+        data.append(benchmark_data)
+    close_database(conn, cur)
+    if data:
+        return data, 200
+    else:
+        return { 'message': 'No benchmark found.' }, 404
+
+def run_benchmark(self, pool):
+    # Runtime imports
+    import time
+    import json
+    from datetime import datetime
+
+    time.sleep(2)
+
+    cur_time = datetime.now().isoformat(timespec='seconds')
+
+    print("Starting storage benchmark '{}' on pool '{}'".format(cur_time, pool))
+
+    # Phase 0 - connect to databases
+    try:
+        db_conn, db_cur = open_database(config)
+    except:
+        print('FATAL - failed to connect to Postgres')
+        raise Exception
+
+    try:
+        zk_conn = pvc_common.startZKConnection(config['coordinators'])
+    except:
+        print('FATAL - failed to connect to Zookeeper')
+        raise Exception
+
+
+    print("Storing running status for job '{}' in database".format(cur_time))
+    try:
+        query = "INSERT INTO storage_benchmarks (job, result) VALUES (%s, %s);"
+        args = (cur_time, "Running",)
+        db_cur.execute(query, args)
+        db_conn.commit()
+    except Exception as e:
+        raise BenchmarkError("Failed to store running status: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
+
+    # Phase 1 - volume preparation
+    self.update_state(state='RUNNING', meta={'current': 1, 'total': 3, 'status': 'Creating benchmark volume'})
+    time.sleep(1)
+
+    volume = 'pvcbenchmark'
+
+    # Create the RBD volume
+    retcode, retmsg = pvc_ceph.add_volume(zk_conn, pool, volume, "4G")
+    if not retcode:
+        raise BenchmarkError('Failed to create volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
+    else:
+        print(retmsg)
+
+    # Phase 2 - benchmark run
+    self.update_state(state='RUNNING', meta={'current': 2, 'total': 3, 'status': 'Running fio benchmarks on volume'})
+    time.sleep(1)
+
+    # We run a total of 8 tests, to give a generalized idea of performance on the cluster:
+    #   1. A sequential read test of 4GB with a 4M block size
+    #   2. A sequential write test of 4GB with a 4M block size
+    #   3. A random read test of 4GB with a 4M block size
+    #   4. A random write test of 4GB with a 4M block size
+    #   5. A random read test of 4GB with a 256k block size
+    #   6. A random write test of 4GB with a 256k block size
+    #   7. A random read test of 4GB with a 4k block size
+    #   8. A random write test of 4GB with a 4k block size
+    # Taken together, these 8 results should give a very good indication of the overall storage performance
+    # for a variety of workloads.
+    test_matrix = {
+        'seq_read': {
+            'direction': 'read',
+            'bs': '4M',
+            'rw': 'read'
+        },
+        'seq_write': {
+            'direction': 'write',
+            'bs': '4M',
+            'rw': 'write'
+        },
+        'rand_read_4M': {
+            'direction': 'read',
+            'bs': '4M',
+            'rw': 'randread'
+        },
+        'rand_write_4M': {
+            'direction': 'write',
+            'bs': '4M',
+            'rw': 'randwrite'
+        },
+        'rand_read_256K': {
+            'direction': 'read',
+            'bs': '256K',
+            'rw': 'randread'
+        },
+        'rand_write_256K': {
+            'direction': 'write',
+            'bs': '256K',
+            'rw': 'randwrite'
+        },
+        'rand_read_4K': {
+            'direction': 'read',
+            'bs': '4K',
+            'rw': 'randread'
+        },
+        'rand_write_4K': {
+            'direction': 'write',
+            'bs': '4K',
+            'rw': 'randwrite'
+        }
+    }
+    parsed_results = dict()
+    for test in test_matrix:
+        print("Running test '{}'".format(test))
+        fio_cmd = """
+            fio \
+                --output-format=terse \
+                --terse-version=5 \
+                --ioengine=rbd \
+                --pool={pool} \
+                --rbdname={volume} \
+                --direct=1 \
+                --randrepeat=1 \
+                --gtod_reduce=1 \
+                --iodepth=64 \
+                --size=128M \
+                --name={test} \
+                --bs={bs} \
+                --readwrite={rw}
+        """.format(
+                pool=pool,
+                volume=volume,
+                test=test,
+                bs=test_matrix[test]['bs'],
+                rw=test_matrix[test]['rw']
+            )
+
+        retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
+        if retcode:
+            raise BenchmarkError("Failed to run fio test: {}".format(stderr), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
+
+        # Parse the terse results to avoid storing tons of junk
+        # Reference: https://fio.readthedocs.io/en/latest/fio_doc.html#terse-output
+        # This is written out broken up because the man page didn't bother to do this, and I'm putting it here for posterity.
+        # Example Read test (line breaks to match man ref):
+        #    I 5;fio-3.12;test;0;0; (5) [0, 1, 2, 3, 4]
+        #    R 8388608;2966268;724;2828; (4) [5, 6, 7, 8]
+        #      0;0;0.000000;0.000000; (4) [9, 10, 11, 12]
+        #      0;0;0.000000;0.000000; (4) [13, 14, 15, 16]
+        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,33, 34, 35, 36]
+        #      0;0;0.000000;0.000000; (4) [37, 38, 39, 40]
+        #      2842624;3153920;100.000000%;2967142.400000;127226.797479;5; (6) [41, 42, 43, 44, 45, 46]
+        #      694;770;724.400000;31.061230;5; (5) [47, 48, 49, 50, 51]
+        #    W 0;0;0;0; (4) [52, 53, 54, 55]
+        #      0;0;0.000000;0.000000; (4) [56, 57, 58, 59]
+        #      0;0;0.000000;0.000000; (4) [60, 61, 62, 63]
+        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [64, 65, 66, 67, 68. 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83]
+        #      0;0;0.000000;0.000000; (4) [84, 85, 86, 87]
+        #      0;0;0.000000%;0.000000;0.000000;0; (6) [88, 89, 90, 91, 92, 93]
+        #      0;0;0.000000;0.000000;0; (5) [94, 95, 96, 97, 98]
+        #    T 0;0;0;0; (4) [99, 100, 101, 102]
+        #      0;0;0.000000;0.000000; (4) [103, 104, 105, 106]
+        #      0;0;0.000000;0.000000; (4) [107, 108, 109, 110]
+        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]
+        #      0;0;0.000000;0.000000; (4) [131, 132, 133, 134]
+        #      0;0;0.000000%;0.000000;0.000000;0; (6) [135, 136, 137, 138, 139, 140]
+        #      0;0;0.000000;0.000000;0; (5) [141, 142, 143, 144, 145]
+        #    C 0.495225%;0.000000%;2083;0;13; (5) [146, 147, 148, 149, 150]
+        #    D 0.1%;0.1%;0.2%;0.4%;0.8%;1.6%;96.9%; (7) [151, 152, 153, 154, 155, 156, 157]
+        #    U 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (10) [158, 159, 160, 161, 162, 163, 164, 165, 166, 167]
+        #    M 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (12) [168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178. 179]
+        #    B dm-0;0;110;0;0;0;4;4;0.15%; (9) [180, 181, 182, 183, 184, 185, 186, 187, 188]
+        #      slaves;0;118;0;28;0;23;0;0.00%; (9) [189, 190, 191, 192, 193, 194, 195, 196, 197]
+        #      sde;0;118;0;28;0;23;0;0.00% (9) [198, 199, 200, 201, 202, 203, 204, 205, 206]
+        # Example Write test:
+        #    I 5;fio-3.12;test;0;0; (5)
+        #    R 0;0;0;0; (4)
+        #      0;0;0.000000;0.000000; (4)
+        #      0;0;0.000000;0.000000; (4)
+        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
+        #      0;0;0.000000;0.000000; (4)
+        #      0;0;0.000000%;0.000000;0.000000;0; (6)
+        #      0;0;0.000000;0.000000;0; (5)
+        #    W 8388608;1137438;277;7375; (4)
+        #      0;0;0.000000;0.000000; (4)
+        #      0;0;0.000000;0.000000; (4)
+        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
+        #      0;0;0.000000;0.000000; (4)
+        #      704512;1400832;99.029573%;1126400.000000;175720.860374;14; (6)
+        #      172;342;275.000000;42.900601;14; (5)
+        #    T 0;0;0;0; (4)
+        #      0;0;0.000000;0.000000; (4)
+        #      0;0;0.000000;0.000000; (4)
+        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
+        #      0;0;0.000000;0.000000; (4)
+        #      0;0;0.000000%;0.000000;0.000000;0; (6)
+        #      0;0;0.000000;0.000000;0; (5)
+        #    C 12.950909%;1.912124%;746;0;95883; (5)
+        #    D 0.1%;0.1%;0.2%;0.4%;0.8%;1.6%;96.9%; (7)
+        #    U 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (10)
+        #    M 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (12)
+        #    B dm-0;0;196;0;0;0;12;12;0.16%; (9)
+        #      slaves;0;207;0;95;0;39;16;0.21%; (9)
+        #      sde;0;207;0;95;0;39;16;0.21% (9)
+        results = stdout.split(';')
+        if test_matrix[test]['direction'] == 'read':
+            # Stats
+            #         5:   Total IO (KiB)
+            #         6:   bandwidth (KiB/sec)
+            #         7:   IOPS
+            #         8:   runtime (msec)
+            # Total latency
+            #         37:  min 
+            #         38:  max 
+            #         39:  mean
+            #         40:  stdev
+            # Bandwidth
+            #         41:  min 
+            #         42:  max 
+            #         43:  agg % of total
+            #         44:  mean
+            #         45:  stdev
+            #         46:  # samples
+            # IOPS
+            #         47:  min 
+            #         48:  max 
+            #         49:  mean
+            #         50:  stdev
+            #         51:  # samples
+            # CPU
+            #         146: user
+            #         147: system
+            #         148: ctx switches
+            #         149: maj faults
+            #         150: min faults
+            parsed_results[test] = {
+                "overall": {
+                    "iosize": results[5],
+                    "bandwidth": results[6],
+                    "iops": results[7],
+                    "runtime": results[8]
+                },
+                "latency": {
+                    "min": results[37],
+                    "max": results[38],
+                    "mean": results[39],
+                    "stdev": results[40]
+                },
+                "bandwidth": {
+                    "min": results[41],
+                    "max": results[42],
+                    "aggrpct": results[43],
+                    "mean": results[44],
+                    "stdev": results[45],
+                    "numsamples": results[46],
+                },
+                "iops": {
+                    "min": results[47],
+                    "max": results[48],
+                    "mean": results[49],
+                    "stdev": results[50],
+                    "numsamples": results[51]
+                },
+                "cpu": {
+                    "user": results[146],
+                    "system": results[147],
+                    "ctxsw": results[148],
+                    "majfault": results[149],
+                    "minfault": results[150]
+                }
+            }
+
+        if test_matrix[test]['direction'] == 'write':
+            # Stats
+            #         52:  Total IO (KiB)
+            #         53:  bandwidth (KiB/sec)
+            #         54:  IOPS
+            #         55:  runtime (msec)
+            # Total latency
+            #         84:  min
+            #         85:  max
+            #         86:  mean
+            #         87:  stdev
+            # Bandwidth
+            #         88:  min
+            #         89:  max
+            #         90:  agg % of total
+            #         91:  mean
+            #         92:  stdev
+            #         93:  # samples
+            # IOPS
+            #         94:  min
+            #         95:  max
+            #         96:  mean
+            #         97:  stdev
+            #         98:  # samples
+            # CPU     
+            #         146: user
+            #         147: system
+            #         148: ctx switches
+            #         149: maj faults
+            #         150: min faults
+            parsed_results[test] = {
+                "overall": {
+                    "iosize": results[52],
+                    "bandwidth": results[53],
+                    "iops": results[54],
+                    "runtime": results[55]
+                },
+                "latency": {
+                    "min": results[84],
+                    "max": results[85],
+                    "mean": results[86],
+                    "stdev": results[87]
+                },
+                "bandwidth": {
+                    "min": results[88],
+                    "max": results[89],
+                    "aggrpct": results[90],
+                    "mean": results[91],
+                    "stdev": results[92],
+                    "numsamples": results[93],
+                },
+                "iops": {
+                    "min": results[94],
+                    "max": results[95],
+                    "mean": results[96],
+                    "stdev": results[97],
+                    "numsamples": results[98]
+                },
+                "cpu": {
+                    "user": results[146],
+                    "system": results[147],
+                    "ctxsw": results[148],
+                    "majfault": results[149],
+                    "minfault": results[150]
+                }
+            }
+        print(parsed_results)
+        
+    # Phase 3 - cleanup
+    self.update_state(state='RUNNING', meta={'current': 3, 'total': 3, 'status': 'Cleaning up and storing results'})
+    time.sleep(1)
+
+    # Remove the RBD volume
+    retcode, retmsg = pvc_ceph.remove_volume(zk_conn, pool, volume)
+    if not retcode:
+        raise BenchmarkError('Failed to remove volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
+    else:
+        print(retmsg)
+
+    print("Storing result of tests for job '{}' in database".format(cur_time))
+    try:
+        query = "UPDATE storage_benchmarks SET result = %s WHERE job = %s;"
+        args = (json.dumps(parsed_results), cur_time)
+        db_cur.execute(query, args)
+        db_conn.commit()
+    except Exception as e:
+        raise BenchmarkError("Failed to store test results: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
+
+    close_database(db_conn, db_cur)
+    pvc_common.stopZKConnection(zk_conn)
+    return { 'status': "Storage benchmark '{}' completed successfully.", 'current': 3, 'total': 3 }
--- a/api-daemon/pvcapid/flaskapi.py
+++ b/api-daemon/pvcapid/flaskapi.py
@ -39,6 +39,7 @@ from celery.task.control import inspect

 import pvcapid.helper as api_helper
 import pvcapid.provisioner as api_provisioner
+import pvcapid.benchmark as api_benchmark
 import pvcapid.ova as api_ova

 from flask_sqlalchemy import SQLAlchemy
@ -107,6 +108,8 @@ try:
    api_helper.config = config
    # Set the config object in the api_provisioner namespace
    api_provisioner.config = config
+    # Set the config object in the api_benchmark namespace
+    api_benchmark.config = config
    # Set the config object in the api_ova namespace
    api_ova.config = config
 except Exception as e:
@ -198,6 +201,10 @@ def Authenticator(function):
 def create_vm(self, vm_name, profile_name, define_vm=True, start_vm=True, script_run_args=[]):
    return api_provisioner.create_vm(self, vm_name, profile_name, define_vm=define_vm, start_vm=start_vm, script_run_args=script_run_args)

+@celery.task(bind=True)
+def run_benchmark(self, pool):
+    return api_benchmark.run_benchmark(self, pool)
+

 ##########################################################
 # API Root/Authentication
@ -2600,6 +2607,64 @@ class API_Storage_Ceph_Utilization(Resource):
        return api_helper.ceph_util()
 api.add_resource(API_Storage_Ceph_Utilization, '/storage/ceph/utilization')

+# /storage/ceph/benchmark
+class API_Storage_Ceph_Benchmark(Resource):
+    @RequestParser([
+        { 'name': 'job' }
+    ])
+    @Authenticator
+    def get(self, reqargs):
+        """
+        List results from benchmark jobs
+        ---
+        tags:
+          - storage / ceph
+        responses:
+          200:
+            description: OK
+            schema:
+              type: object
+              properties:
+                tbd:
+                  type: object
+                  description: TBD
+        """
+        return api_benchmark.list_benchmarks(reqargs.get('job', None))
+
+    @RequestParser([
+        { 'name': 'pool', 'required': True, 'helpmsg': "A valid pool must be specified." },
+    ])
+    @Authenticator
+    def post(self, reqargs):
+        """
+        Execute a storage benchmark against a storage pool
+        ---
+        tags:
+          - storage / ceph
+        parameters:
+          - in: query
+            name: pool
+            type: string
+            required: true
+            description: The PVC storage pool to benchmark
+        responses:
+          200:
+            description: OK
+            schema:
+                type: string
+                description: The job ID of the benchmark
+        """
+        # Verify that the pool is valid
+        _list, code = api_helper.ceph_pool_list(reqargs.get('pool', None), is_fuzzy=False)
+        if code != 200:
+            return { 'message': 'Pool "{}" is not valid.'.format(reqargs.get('pool')) }, 400
+
+        task = run_benchmark.delay(
+            reqargs.get('pool', None)
+        )
+        return { "task_id": task.id }, 202, { 'Location': Api.url_for(api, API_Storage_Ceph_Benchmark, task_id=task.id) }
+api.add_resource(API_Storage_Ceph_Benchmark, '/storage/ceph/benchmark')
+
 # /storage/ceph/option
 class API_Storage_Ceph_Option(Resource):
    @RequestParser([
--- a/api-daemon/pvcapid/models.py
+++ b/api-daemon/pvcapid/models.py
@ -213,3 +213,17 @@ class DBProfile(db.Model):

    def __repr__(self):
        return '<id {}>'.format(self.id)
+
+class DBStorageBenchmarks(db.Model):
+    __tablename__ = 'storage_benchmarks'
+
+    id = db.Column(db.Integer, primary_key=True)
+    job = db.Column(db.Text, nullable=False)
+    result = db.Column(db.Text, nullable=False)
+
+    def __init__(self, job, result):
+        self.job = job
+        self.result = result
+
+    def __repr__(self):
+        return '<id {}>'.format(self.id)
--- a/build-and-deploy.sh
+++ b/build-and-deploy.sh
@ -32,9 +32,9 @@ for HOST in ${HOSTS[@]}; do
    ssh $HOST $SUDO dpkg -i /tmp/pvc/{pvc-client-cli,pvc-daemon-common,pvc-daemon-api,pvc-daemon-node}*.deb
    ssh $HOST rm -rf /tmp/pvc
    echo "Restarting PVC node daemon..."
-    ssh $HOST $SUDO systemctl restart pvcnoded
-    echo "****"
-    echo "Waiting 15s for host ${HOST} to stabilize"
-    echo "****"
-    sleep 15
+    ssh $HOST $SUDO systemctl restart pvcapid
+#    echo "****"
+#   echo "Waiting 15s for host ${HOST} to stabilize"
+#    echo "****"
+#    sleep 15
 done
--- a/client-cli/cli_lib/ceph.py
+++ b/client-cli/cli_lib/ceph.py
@ -1249,3 +1249,198 @@ def format_list_snapshot(snapshot_list):
        )

    return '\n'.join(sorted(snapshot_list_output))
+
+#
+# Benchmark functions
+#
+def ceph_benchmark_run(config, pool):
+    """
+    Run a storage benchmark against {pool}
+
+    API endpoint: POST /api/v1/storage/ceph/benchmark
+    API arguments: pool={pool}
+    API schema: {message}
+    """
+    params = {
+        'pool': pool
+    }
+    response = call_api(config, 'post', '/storage/ceph/benchmark', params=params)
+
+    if response.status_code == 202:
+        retvalue = True
+        retdata = 'Task ID: {}'.format(response.json()['task_id'])
+    else:
+        retvalue = False
+        retdata = response.json().get('message', '')
+        
+    return retvalue, retdata
+
+def ceph_benchmark_list(config, job):
+    """
+    View results of one or more previous benchmark runs
+
+    API endpoint: GET /api/v1/storage/ceph/benchmark
+    API arguments: job={job}
+    API schema: {results}
+    """
+    if job is not None:
+        params = {
+            'job': job
+        }
+    else:
+        params = {}
+
+    response = call_api(config, 'get', '/storage/ceph/benchmark', params=params)
+
+    if response.status_code == 200:
+        retvalue = True
+        retdata = response.json()
+    else:
+        retvalue = False
+        retdata = response.json()
+
+    return retvalue, retdata
+
+def format_list_benchmark(benchmark_information, detail=False):
+    if detail:
+        return format_list_benchmark_detailed(benchmark_information)
+
+    benchmark_list_output = []
+    
+    benchmark_id_length = 3
+    benchmark_job_length = 20
+    benchmark_bandwidth_length = dict()
+    benchmark_iops_length = dict()
+
+    for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]:
+        benchmark_bandwidth_length[test] = 7
+        benchmark_iops_length[test] = 6
+
+    # For this output, we're only showing the Sequential (seq_read and seq_write) and 4k Random (rand_read_4K and rand_write_4K) results since we're showing them for each test result.
+
+#    print(benchmark_information)
+
+    for benchmark in benchmark_information:
+        benchmark_id = benchmark['id']
+        _benchmark_id_length = len(str(benchmark_id))
+        if _benchmark_id_length > benchmark_id_length:
+            benchmark_id_length = _benchmark_id_length
+
+        benchmark_job = benchmark['job']
+        _benchmark_job_length = len(benchmark_job)
+        if _benchmark_job_length > benchmark_job_length:
+            benchmark_job_length = _benchmark_job_length
+
+        if benchmark['benchmark_result'] == 'Running':
+            continue
+        benchmark_data = json.loads(benchmark['benchmark_result'])
+
+        benchmark_bandwidth = dict()
+        benchmark_iops = dict()
+        for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]:
+            benchmark_bandwidth[test] = format_bytes_tohuman(int(benchmark_data[test]['overall']['bandwidth']) * 1024)
+            benchmark_iops[test] = format_ops_tohuman(int(benchmark_data[test]['overall']['iops']))
+
+            _benchmark_bandwidth_length = len(benchmark_bandwidth[test]) + 1
+            if _benchmark_bandwidth_length > benchmark_bandwidth_length[test]:
+                benchmark_bandwidth_length[test] = _benchmark_bandwidth_length
+
+            _benchmark_iops_length = len(benchmark_iops[test]) + 1
+            if _benchmark_iops_length > benchmark_bandwidth_length[test]:
+                benchmark_iops_length[test] = _benchmark_iops_length
+
+    # Format the output header line 1
+    benchmark_list_output.append('{bold}\
+{benchmark_id: <{benchmark_id_length}} \
+{benchmark_job: <{benchmark_job_length}} \
+ {seq_header: <{seq_header_length}} \
+{rand_header: <{rand_header_length}} \
+{end_bold}'.format(
+            bold=ansiprint.bold(),
+            end_bold=ansiprint.end(),
+            benchmark_id_length=benchmark_id_length,
+            benchmark_job_length=benchmark_job_length,
+            seq_header_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'] + 2,
+            rand_header_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'] + 2,
+            benchmark_id='ID',
+            benchmark_job='Benchmark Job',
+            seq_header='Sequential (4M blocks):',
+            rand_header='Random (4K blocks):'
+        )
+    )
+
+    benchmark_list_output.append('{bold}\
+{benchmark_id: <{benchmark_id_length}} \
+{benchmark_job: <{benchmark_job_length}} \
+ {seq_benchmark_bandwidth: <{seq_benchmark_bandwidth_length}} \
+{seq_benchmark_iops: <{seq_benchmark_iops_length}} \
+{rand_benchmark_bandwidth: <{rand_benchmark_bandwidth_length}} \
+{rand_benchmark_iops: <{rand_benchmark_iops_length}} \
+{end_bold}'.format(
+            bold=ansiprint.bold(),
+            end_bold=ansiprint.end(),
+            benchmark_id_length=benchmark_id_length,
+            benchmark_job_length=benchmark_job_length,
+            seq_benchmark_bandwidth_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + 1,
+            seq_benchmark_iops_length=benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'],
+            rand_benchmark_bandwidth_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + 1,
+            rand_benchmark_iops_length=benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'],
+            benchmark_id='',
+            benchmark_job='',
+            seq_benchmark_bandwidth='Bandwith (R/W)',
+            seq_benchmark_iops='IOPS (R/W)',
+            rand_benchmark_bandwidth='Bandwith (R/W)',
+            rand_benchmark_iops='IOPS (R/W)'
+        )
+    )
+
+    for benchmark in benchmark_information:
+        benchmark_id = benchmark['id']
+        benchmark_job = benchmark['job']
+
+        if benchmark['benchmark_result'] == 'Running':
+            seq_benchmark_bandwidth = 'Running'
+            seq_benchmark_iops = 'Running'
+            rand_benchmark_bandwidth = 'Running'
+            rand_benchmark_iops = 'Running'
+        else:
+            benchmark_bandwidth = dict()
+            benchmark_iops = dict()
+            for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]:
+                benchmark_data = json.loads(benchmark['benchmark_result'])
+                benchmark_bandwidth[test] = format_bytes_tohuman(int(benchmark_data[test]['overall']['bandwidth']) * 1024)
+                benchmark_iops[test] = format_ops_tohuman(int(benchmark_data[test]['overall']['iops']))
+    
+            seq_benchmark_bandwidth = "{}/{}".format(benchmark_bandwidth['seq_read'], benchmark_bandwidth['seq_write'])
+            seq_benchmark_iops = "{}/{}".format(benchmark_iops['seq_read'], benchmark_iops['seq_write'])
+            rand_benchmark_bandwidth = "{}/{}".format(benchmark_bandwidth['rand_read_4K'], benchmark_bandwidth['rand_write_4K'])
+            rand_benchmark_iops = "{}/{}".format(benchmark_iops['rand_read_4K'], benchmark_iops['rand_write_4K'])
+            
+    
+        benchmark_list_output.append('{bold}\
+{benchmark_id: <{benchmark_id_length}} \
+{benchmark_job: <{benchmark_job_length}} \
+ {seq_benchmark_bandwidth: <{seq_benchmark_bandwidth_length}} \
+{seq_benchmark_iops: <{seq_benchmark_iops_length}} \
+{rand_benchmark_bandwidth: <{rand_benchmark_bandwidth_length}} \
+{rand_benchmark_iops: <{rand_benchmark_iops_length}} \
+{end_bold}'.format(
+                bold='',
+                end_bold='',
+                benchmark_id_length=benchmark_id_length,
+                benchmark_job_length=benchmark_job_length,
+                seq_benchmark_bandwidth_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + 1,
+                seq_benchmark_iops_length=benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'],
+                rand_benchmark_bandwidth_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + 1,
+                rand_benchmark_iops_length=benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'],
+                benchmark_id=benchmark_id,
+                benchmark_job=benchmark_job,
+                seq_benchmark_bandwidth=seq_benchmark_bandwidth,
+                seq_benchmark_iops=seq_benchmark_iops,
+                rand_benchmark_bandwidth=rand_benchmark_bandwidth,
+                rand_benchmark_iops=rand_benchmark_iops
+            )
+        )
+
+    return '\n'.join(benchmark_list_output)
+
--- a/client-cli/pvc.py
+++ b/client-cli/pvc.py
@ -1576,6 +1576,49 @@ def ceph_util():
        retdata = pvc_ceph.format_raw_output(retdata)
    cleanup(retcode, retdata)

+###############################################################################
+# pvc storage benchmark
+###############################################################################
+@click.group(name='benchmark', short_help='Run or view cluster storage benchmarks.')
+@cluster_req
+def ceph_benchmark():
+    """
+    Run or view benchmarks of the storage cluster.
+    """
+
+###############################################################################
+# pvc storage benchmark run
+###############################################################################
+@click.command(name='run', short_help='Run a storage benchmark.')
+@click.argument(
+    'pool'
+)
+@cluster_req
+def ceph_benchmark_run(pool):
+    """
+    Run a storage benchmark on POOL in the background.
+    """
+    retcode, retmsg = pvc_ceph.ceph_benchmark_run(config, pool)
+    cleanup(retcode, retmsg)
+
+###############################################################################
+# pvc storage benchmark list
+###############################################################################
+@click.command(name='list', short_help='List storage benchmark results.')
+@click.argument(
+    'job', default=None, required=False
+)
+@cluster_req
+def ceph_benchmark_list(job):
+    """
+    List all Ceph storage benchmarks; optionally only match JOB.
+    """
+
+    retcode, retdata = pvc_ceph.ceph_benchmark_list(config, job)
+    if retcode:
+        retdata = pvc_ceph.format_list_benchmark(retdata)
+    cleanup(retcode, retdata)
+
 ###############################################################################
 # pvc storage osd
 ###############################################################################
@ -3619,6 +3662,9 @@ net_acl.add_command(net_acl_add)
 net_acl.add_command(net_acl_remove)
 net_acl.add_command(net_acl_list)

+ceph_benchmark.add_command(ceph_benchmark_run)
+ceph_benchmark.add_command(ceph_benchmark_list)
+
 ceph_osd.add_command(ceph_osd_add)
 ceph_osd.add_command(ceph_osd_remove)
 ceph_osd.add_command(ceph_osd_in)
@ -3647,6 +3693,7 @@ ceph_volume_snapshot.add_command(ceph_volume_snapshot_list)

 cli_storage.add_command(ceph_status)
 cli_storage.add_command(ceph_util)
+cli_storage.add_command(ceph_benchmark)
 cli_storage.add_command(ceph_osd)
 cli_storage.add_command(ceph_pool)
 cli_storage.add_command(ceph_volume)
--- a/debian/control
+++ b/debian/control
@ -17,7 +17,7 @@ Description: Parallel Virtual Cluster node daemon (Python 3)

 Package: pvc-daemon-api
 Architecture: all
-Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-gevent, python3-celery, python-celery-common, python3-distutils, redis, python3-redis, python3-lxml, python3-flask-migrate, python3-flask-script
+Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-gevent, python3-celery, python-celery-common, python3-distutils, redis, python3-redis, python3-lxml, python3-flask-migrate, python3-flask-script, fio
 Description: Parallel Virtual Cluster API daemon (Python 3)
 A KVM/Zookeeper/Ceph-based VM and private cloud manager
 .