Make benchmarker function as a module

1. Move the test_matrix, volume name, and size to module-level variables so they can be accessed externally if this is imported. 2. Separate the volume creation and volume cleanup into functions. 3. Separate the individual benchmark runs into a function. This should enable easier calling of the various subcomponents externally, e.g. for external benchmark scripts.
2022-11-03 11:59:37 -04:00
parent 59f97ebbfb
commit ea7a4b2b85
1 changed files with 175 additions and 116 deletions
--- a/api-daemon/pvcapid/benchmark.py
+++ b/api-daemon/pvcapid/benchmark.py
@@ -32,6 +32,74 @@ import daemon_lib.common as pvc_common
 import daemon_lib.ceph as pvc_ceph
 # We run a total of 8 tests, to give a generalized idea of performance on the cluster:
 #   1. A sequential read test of 8GB with a 4M block size
 #   2. A sequential write test of 8GB with a 4M block size
 #   3. A random read test of 8GB with a 4M block size
 #   4. A random write test of 8GB with a 4M block size
 #   5. A random read test of 8GB with a 256k block size
 #   6. A random write test of 8GB with a 256k block size
 #   7. A random read test of 8GB with a 4k block size
 #   8. A random write test of 8GB with a 4k block size
 # Taken together, these 8 results should give a very good indication of the overall storage performance
 # for a variety of workloads.
 test_matrix = {
    "seq_read": {
        "direction": "read",
        "iodepth": "64",
        "bs": "4M",
        "rw": "read",
    },
    "seq_write": {
        "direction": "write",
        "iodepth": "64",
        "bs": "4M",
        "rw": "write",
    },
    "rand_read_4M": {
        "direction": "read",
        "iodepth": "64",
        "bs": "4M",
        "rw": "randread",
    },
    "rand_write_4M": {
        "direction": "write",
        "iodepth": "64",
        "bs": "4M",
        "rw": "randwrite",
    },
    "rand_read_4K": {
        "direction": "read",
        "iodepth": "64",
        "bs": "4K",
        "rw": "randread",
    },
    "rand_write_4K": {
        "direction": "write",
        "iodepth": "64",
        "bs": "4K",
        "rw": "randwrite",
    },
    "rand_read_4K_lowdepth": {
        "direction": "read",
        "iodepth": "1",
        "bs": "4K",
        "rw": "randread",
    },
    "rand_write_4K_lowdepth": {
        "direction": "write",
        "iodepth": "1",
        "bs": "4K",
        "rw": "randwrite",
    },
 }
 # Specify the benchmark volume name and size
 benchmark_volume_name = "pvcbenchmark"
 benchmark_volume_size = "8G"
 #
 # Exceptions (used by Celery tasks)
 #
@@ -44,7 +112,7 @@ class BenchmarkError(Exception):
        self, message, job_name=None, db_conn=None, db_cur=None, zkhandler=None
    ):
        self.message = message
-        if job_name is not None:
+        if job_name is not None and db_conn is not None and db_cur is not None:
            # Clean up our dangling result
            query = "DELETE FROM storage_benchmarks WHERE job = %s;"
            args = (job_name,)
@@ -52,6 +120,7 @@ class BenchmarkError(Exception):
            db_conn.commit()
            # Close the database connections cleanly
            close_database(db_conn, db_cur)
        if job_name is not None and zkhandler is not None:
            zkhandler.disconnect()
    def __str__(self):
@@ -116,6 +185,90 @@ def list_benchmarks(job=None):
        return {"message": "No benchmark found."}, 404
 def prepare_benchmark_volume(
    pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
 ):
    # Create the RBD volume
    retcode, retmsg = pvc_ceph.add_volume(
        zkhandler, pool, benchmark_volume_name, benchmark_volume_size
    )
    if not retcode:
        raise BenchmarkError(
            'Failed to create volume "{}" on pool "{}": {}'.format(
                benchmark_volume_name, pool, retmsg
            ),
            job_name=job_name,
            db_conn=db_conn,
            db_cur=db_cur,
            zkhandler=zkhandler,
        )
    else:
        print(retmsg)
 def cleanup_benchmark_volume(
    pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
 ):
    # Remove the RBD volume
    retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, benchmark_volume_name)
    if not retcode:
        raise BenchmarkError(
            'Failed to remove volume "{}" on pool "{}": {}'.format(
                benchmark_volume_name, pool, retmsg
            ),
            job_name=job_name,
            db_conn=db_conn,
            db_cur=db_cur,
            zkhandler=zkhandler,
        )
    else:
        print(retmsg)
 def run_benchmark_job(
    test, pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
 ):
    test_spec = test_matrix[test]
    print("Running test '{}'".format(test))
    fio_cmd = """
            fio \
                --name={test} \
                --ioengine=rbd \
                --pool={pool} \
                --rbdname={volume} \
                --output-format=json \
                --direct=1 \
                --randrepeat=1 \
                --numjobs=1 \
                --time_based \
                --runtime=75 \
                --group_reporting \
                --iodepth={iodepth} \
                --bs={bs} \
                --readwrite={rw}
        """.format(
        test=test,
        pool=pool,
        volume=benchmark_volume_name,
        iodepth=test_spec["iodepth"],
        bs=test_spec["bs"],
        rw=test_spec["rw"],
    )
    print("Running fio job: {}".format(" ".join(fio_cmd.split())))
    retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
    if retcode:
        raise BenchmarkError(
            "Failed to run fio test: {}".format(stderr),
            job_name=job_name,
            db_conn=db_conn,
            db_cur=db_cur,
            zkhandler=zkhandler,
        )
    return loads(stdout)
 def run_benchmark(self, pool):
    # Runtime imports
    import time
@@ -172,20 +325,13 @@ def run_benchmark(self, pool):
    )
    time.sleep(1)
-    volume = "pvcbenchmark"
+    prepare_benchmark_volume(
-
+        pool,
-    # Create the RBD volume
+        job_name=job_name,
-    retcode, retmsg = pvc_ceph.add_volume(zkhandler, pool, volume, "8G")
+        db_conn=db_conn,
-    if not retcode:
+        db_cur=db_cur,
-        raise BenchmarkError(
+        zkhandler=zkhandler,
-            'Failed to create volume "{}": {}'.format(volume, retmsg),
+    )
            job_name=job_name,
            db_conn=db_conn,
            db_cur=db_cur,
            zkhandler=zkhandler,
        )
    else:
        print(retmsg)
    # Phase 2 - benchmark run
    self.update_state(
@@ -194,99 +340,17 @@ def run_benchmark(self, pool):
    )
    time.sleep(1)
    # We run a total of 8 tests, to give a generalized idea of performance on the cluster:
    #   1. A sequential read test of 8GB with a 4M block size
    #   2. A sequential write test of 8GB with a 4M block size
    #   3. A random read test of 8GB with a 4M block size
    #   4. A random write test of 8GB with a 4M block size
    #   5. A random read test of 8GB with a 256k block size
    #   6. A random write test of 8GB with a 256k block size
    #   7. A random read test of 8GB with a 4k block size
    #   8. A random write test of 8GB with a 4k block size
    # Taken together, these 8 results should give a very good indication of the overall storage performance
    # for a variety of workloads.
    test_matrix = {
        "seq_read": {"direction": "read", "iodepth": "64", "bs": "4M", "rw": "read"},
        "seq_write": {"direction": "write", "iodepth": "64", "bs": "4M", "rw": "write"},
        "rand_read_4M": {
            "direction": "read",
            "iodepth": "64",
            "bs": "4M",
            "rw": "randread",
        },
        "rand_write_4M": {
            "direction": "write",
            "iodepth": "64",
            "bs": "4M",
            "rw": "randwrite",
        },
        "rand_read_4K": {
            "direction": "read",
            "iodepth": "64",
            "bs": "4K",
            "rw": "randread",
        },
        "rand_write_4K": {
            "direction": "write",
            "iodepth": "64",
            "bs": "4K",
            "rw": "randwrite",
        },
        "rand_read_4K_lowdepth": {
            "direction": "read",
            "iodepth": "1",
            "bs": "4K",
            "rw": "randread",
        },
        "rand_write_4K_lowdepth": {
            "direction": "write",
            "iodepth": "1",
            "bs": "4K",
            "rw": "randwrite",
        },
    }
    results = dict()
    for test in test_matrix:
-        print("Running test '{}'".format(test))
+        results[test] = run_benchmark_job(
-        fio_cmd = """
+            test,
-            fio \
+            pool,
-                --name={test} \
+            job_name=job_name,
-                --ioengine=rbd \
+            db_conn=db_conn,
-                --pool={pool} \
+            db_cur=db_cur,
-                --rbdname={volume} \
+            zkhandler=zkhandler,
                --output-format=json \
                --direct=1 \
                --randrepeat=1 \
                --numjobs=1 \
                --time_based \
                --runtime=75 \
                --group_reporting \
                --iodepth={iodepth} \
                --bs={bs} \
                --readwrite={rw}
        """.format(
            test=test,
            pool=pool,
            volume=volume,
            iodepth=test_matrix[test]["iodepth"],
            bs=test_matrix[test]["bs"],
            rw=test_matrix[test]["rw"],
        )
        print("Running fio job: {}".format(" ".join(fio_cmd.split())))
        retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
        if retcode:
            raise BenchmarkError(
                "Failed to run fio test: {}".format(stderr),
                job_name=job_name,
                db_conn=db_conn,
                db_cur=db_cur,
                zkhandler=zkhandler,
            )
        results[test] = loads(stdout)
    # Phase 3 - cleanup
    self.update_state(
        state="RUNNING",
@@ -294,18 +358,13 @@ def run_benchmark(self, pool):
    )
    time.sleep(1)
-    # Remove the RBD volume
+    cleanup_benchmark_volume(
-    retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, volume)
+        pool,
-    if not retcode:
+        job_name=job_name,
-        raise BenchmarkError(
+        db_conn=db_conn,
-            'Failed to remove volume "{}": {}'.format(volume, retmsg),
+        db_cur=db_cur,
-            job_name=job_name,
+        zkhandler=zkhandler,
-            db_conn=db_conn,
+    )
            db_cur=db_cur,
            zkhandler=zkhandler,
        )
    else:
        print(retmsg)
    print("Storing result of tests for job '{}' in database".format(job_name))
    try: