Update to storage benchmark format 1

1. Runs `fio` with the `--format=json` option and removes all terse format parsing from the results. 2. Adds a 15-second ramp time to minimize wonky ramp-up results. 3. Sets group_reporting, which isn't necessary with only a single job, but is here for consistency.
2021-10-02 01:36:27 -04:00
parent 0058f19d88
commit e06b114c48
1 changed files with 11 additions and 202 deletions
--- a/api-daemon/pvcapid/benchmark.py
+++ b/api-daemon/pvcapid/benchmark.py
@ -22,6 +22,8 @@
 import psycopg2
 import psycopg2.extras

+from json import loads, dumps
+
 from pvcapid.Daemon import config

 from daemon_lib.zkhandler import ZKHandler
@ -78,8 +80,6 @@ def close_database(conn, cur, failed=False):


 def list_benchmarks(job=None):
-    from json import loads
-
    if job is not None:
        query = "SELECT * FROM {} WHERE job = %s;".format('storage_benchmarks')
        args = (job, )
@ -109,11 +109,10 @@ def list_benchmarks(job=None):
 def run_benchmark(self, pool):
    # Runtime imports
    import time
-    import json
    from datetime import datetime

    # Define the current test format
-    TEST_FORMAT = 0
+    TEST_FORMAT = 1

    time.sleep(2)

@ -224,7 +223,8 @@ def run_benchmark(self, pool):
            'rw': 'randwrite'
        },
    }
-    parsed_results = dict()
+
+    results = dict()
    for test in test_matrix:
        print("Running test '{}'".format(test))
        fio_cmd = """
@ -233,14 +233,15 @@ def run_benchmark(self, pool):
                --ioengine=rbd \
                --pool={pool} \
                --rbdname={volume} \
-                --output-format=terse \
-                --terse-version=5 \
+                --output-format=json \
                --direct=1 \
                --randrepeat=1 \
-                --iodepth={iodepth} \
                --numjobs=1 \
                --time_based \
                --runtime=60 \
+                --ramp_time=15 \
+                --group_reporting \
+                --iodepth={iodepth} \
                --bs={bs} \
                --readwrite={rw}
        """.format(
@ -255,199 +256,7 @@ def run_benchmark(self, pool):
        if retcode:
            raise BenchmarkError("Failed to run fio test: {}".format(stderr), job_name=job_name, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)

-        # Parse the terse results to avoid storing tons of junk
-        # Reference: https://fio.readthedocs.io/en/latest/fio_doc.html#terse-output
-        # This is written out broken up because the man page didn't bother to do this, and I'm putting it here for posterity.
-        # Example Read test (line breaks to match man ref):
-        #    I 5;fio-3.12;test;0;0; (5) [0, 1, 2, 3, 4]
-        #    R 8388608;2966268;724;2828; (4) [5, 6, 7, 8]
-        #      0;0;0.000000;0.000000; (4) [9, 10, 11, 12]
-        #      0;0;0.000000;0.000000; (4) [13, 14, 15, 16]
-        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,33, 34, 35, 36]
-        #      0;0;0.000000;0.000000; (4) [37, 38, 39, 40]
-        #      2842624;3153920;100.000000%;2967142.400000;127226.797479;5; (6) [41, 42, 43, 44, 45, 46]
-        #      694;770;724.400000;31.061230;5; (5) [47, 48, 49, 50, 51]
-        #    W 0;0;0;0; (4) [52, 53, 54, 55]
-        #      0;0;0.000000;0.000000; (4) [56, 57, 58, 59]
-        #      0;0;0.000000;0.000000; (4) [60, 61, 62, 63]
-        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [64, 65, 66, 67, 68. 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83]
-        #      0;0;0.000000;0.000000; (4) [84, 85, 86, 87]
-        #      0;0;0.000000%;0.000000;0.000000;0; (6) [88, 89, 90, 91, 92, 93]
-        #      0;0;0.000000;0.000000;0; (5) [94, 95, 96, 97, 98]
-        #    T 0;0;0;0; (4) [99, 100, 101, 102]
-        #      0;0;0.000000;0.000000; (4) [103, 104, 105, 106]
-        #      0;0;0.000000;0.000000; (4) [107, 108, 109, 110]
-        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]
-        #      0;0;0.000000;0.000000; (4) [131, 132, 133, 134]
-        #      0;0;0.000000%;0.000000;0.000000;0; (6) [135, 136, 137, 138, 139, 140]
-        #      0;0;0.000000;0.000000;0; (5) [141, 142, 143, 144, 145]
-        #    C 0.495225%;0.000000%;2083;0;13; (5) [146, 147, 148, 149, 150]
-        #    D 0.1%;0.1%;0.2%;0.4%;0.8%;1.6%;96.9%; (7) [151, 152, 153, 154, 155, 156, 157]
-        #    U 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (10) [158, 159, 160, 161, 162, 163, 164, 165, 166, 167]
-        #    M 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (12) [168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178. 179]
-        #    B dm-0;0;110;0;0;0;4;4;0.15%; (9) [180, 181, 182, 183, 184, 185, 186, 187, 188]
-        #      slaves;0;118;0;28;0;23;0;0.00%; (9) [189, 190, 191, 192, 193, 194, 195, 196, 197]
-        #      sde;0;118;0;28;0;23;0;0.00% (9) [198, 199, 200, 201, 202, 203, 204, 205, 206]
-        # Example Write test:
-        #    I 5;fio-3.12;test;0;0; (5)
-        #    R 0;0;0;0; (4)
-        #      0;0;0.000000;0.000000; (4)
-        #      0;0;0.000000;0.000000; (4)
-        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
-        #      0;0;0.000000;0.000000; (4)
-        #      0;0;0.000000%;0.000000;0.000000;0; (6)
-        #      0;0;0.000000;0.000000;0; (5)
-        #    W 8388608;1137438;277;7375; (4)
-        #      0;0;0.000000;0.000000; (4)
-        #      0;0;0.000000;0.000000; (4)
-        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
-        #      0;0;0.000000;0.000000; (4)
-        #      704512;1400832;99.029573%;1126400.000000;175720.860374;14; (6)
-        #      172;342;275.000000;42.900601;14; (5)
-        #    T 0;0;0;0; (4)
-        #      0;0;0.000000;0.000000; (4)
-        #      0;0;0.000000;0.000000; (4)
-        #      0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
-        #      0;0;0.000000;0.000000; (4)
-        #      0;0;0.000000%;0.000000;0.000000;0; (6)
-        #      0;0;0.000000;0.000000;0; (5)
-        #    C 12.950909%;1.912124%;746;0;95883; (5)
-        #    D 0.1%;0.1%;0.2%;0.4%;0.8%;1.6%;96.9%; (7)
-        #    U 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (10)
-        #    M 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (12)
-        #    B dm-0;0;196;0;0;0;12;12;0.16%; (9)
-        #      slaves;0;207;0;95;0;39;16;0.21%; (9)
-        #      sde;0;207;0;95;0;39;16;0.21% (9)
-        results = stdout.split(';')
-        if test_matrix[test]['direction'] == 'read':
-            # Stats
-            #         5:   Total IO (KiB)
-            #         6:   bandwidth (KiB/sec)
-            #         7:   IOPS
-            #         8:   runtime (msec)
-            # Total latency
-            #         37:  min
-            #         38:  max
-            #         39:  mean
-            #         40:  stdev
-            # Bandwidth
-            #         41:  min
-            #         42:  max
-            #         44:  mean
-            #         45:  stdev
-            #         46:  # samples
-            # IOPS
-            #         47:  min
-            #         48:  max
-            #         49:  mean
-            #         50:  stdev
-            #         51:  # samples
-            # CPU
-            #         146: user
-            #         147: system
-            #         148: ctx switches
-            #         149: maj faults
-            #         150: min faults
-            parsed_results[test] = {
-                "overall": {
-                    "iosize": results[5],
-                    "bandwidth": results[6],
-                    "iops": results[7],
-                    "runtime": results[8]
-                },
-                "latency": {
-                    "min": results[37],
-                    "max": results[38],
-                    "mean": results[39],
-                    "stdev": results[40]
-                },
-                "bandwidth": {
-                    "min": results[41],
-                    "max": results[42],
-                    "mean": results[44],
-                    "stdev": results[45],
-                    "numsamples": results[46],
-                },
-                "iops": {
-                    "min": results[47],
-                    "max": results[48],
-                    "mean": results[49],
-                    "stdev": results[50],
-                    "numsamples": results[51]
-                },
-                "cpu": {
-                    "user": results[146],
-                    "system": results[147],
-                    "ctxsw": results[148],
-                    "majfault": results[149],
-                    "minfault": results[150]
-                }
-            }
-
-        if test_matrix[test]['direction'] == 'write':
-            # Stats
-            #         52:  Total IO (KiB)
-            #         53:  bandwidth (KiB/sec)
-            #         54:  IOPS
-            #         55:  runtime (msec)
-            # Total latency
-            #         84:  min
-            #         85:  max
-            #         86:  mean
-            #         87:  stdev
-            # Bandwidth
-            #         88:  min
-            #         89:  max
-            #         91:  mean
-            #         92:  stdev
-            #         93:  # samples
-            # IOPS
-            #         94:  min
-            #         95:  max
-            #         96:  mean
-            #         97:  stdev
-            #         98:  # samples
-            # CPU
-            #         146: user
-            #         147: system
-            #         148: ctx switches
-            #         149: maj faults
-            #         150: min faults
-            parsed_results[test] = {
-                "overall": {
-                    "iosize": results[52],
-                    "bandwidth": results[53],
-                    "iops": results[54],
-                    "runtime": results[55]
-                },
-                "latency": {
-                    "min": results[84],
-                    "max": results[85],
-                    "mean": results[86],
-                    "stdev": results[87]
-                },
-                "bandwidth": {
-                    "min": results[88],
-                    "max": results[89],
-                    "mean": results[91],
-                    "stdev": results[92],
-                    "numsamples": results[93],
-                },
-                "iops": {
-                    "min": results[94],
-                    "max": results[95],
-                    "mean": results[96],
-                    "stdev": results[97],
-                    "numsamples": results[98]
-                },
-                "cpu": {
-                    "user": results[146],
-                    "system": results[147],
-                    "ctxsw": results[148],
-                    "majfault": results[149],
-                    "minfault": results[150]
-                }
-            }
+        results[test] = loads(stdout)

    # Phase 3 - cleanup
    self.update_state(state='RUNNING', meta={'current': 3, 'total': 3, 'status': 'Cleaning up and storing results'})
@ -463,7 +272,7 @@ def run_benchmark(self, pool):
    print("Storing result of tests for job '{}' in database".format(job_name))
    try:
        query = "UPDATE storage_benchmarks SET result = %s WHERE job = %s;"
-        args = (json.dumps(parsed_results), job_name)
+        args = (dumps(results), job_name)
        db_cur.execute(query, args)
        db_conn.commit()
    except Exception as e: