diff --git a/api-daemon/pvcapid/benchmark.py b/api-daemon/pvcapid/benchmark.py index 0dd3ddd2..a419f972 100755 --- a/api-daemon/pvcapid/benchmark.py +++ b/api-daemon/pvcapid/benchmark.py @@ -32,6 +32,74 @@ import daemon_lib.common as pvc_common import daemon_lib.ceph as pvc_ceph +# We run a total of 8 tests, to give a generalized idea of performance on the cluster: +# 1. A sequential read test of 8GB with a 4M block size +# 2. A sequential write test of 8GB with a 4M block size +# 3. A random read test of 8GB with a 4M block size +# 4. A random write test of 8GB with a 4M block size +# 5. A random read test of 8GB with a 256k block size +# 6. A random write test of 8GB with a 256k block size +# 7. A random read test of 8GB with a 4k block size +# 8. A random write test of 8GB with a 4k block size +# Taken together, these 8 results should give a very good indication of the overall storage performance +# for a variety of workloads. +test_matrix = { + "seq_read": { + "direction": "read", + "iodepth": "64", + "bs": "4M", + "rw": "read", + }, + "seq_write": { + "direction": "write", + "iodepth": "64", + "bs": "4M", + "rw": "write", + }, + "rand_read_4M": { + "direction": "read", + "iodepth": "64", + "bs": "4M", + "rw": "randread", + }, + "rand_write_4M": { + "direction": "write", + "iodepth": "64", + "bs": "4M", + "rw": "randwrite", + }, + "rand_read_4K": { + "direction": "read", + "iodepth": "64", + "bs": "4K", + "rw": "randread", + }, + "rand_write_4K": { + "direction": "write", + "iodepth": "64", + "bs": "4K", + "rw": "randwrite", + }, + "rand_read_4K_lowdepth": { + "direction": "read", + "iodepth": "1", + "bs": "4K", + "rw": "randread", + }, + "rand_write_4K_lowdepth": { + "direction": "write", + "iodepth": "1", + "bs": "4K", + "rw": "randwrite", + }, +} + + +# Specify the benchmark volume name and size +benchmark_volume_name = "pvcbenchmark" +benchmark_volume_size = "8G" + + # # Exceptions (used by Celery tasks) # @@ -44,7 +112,7 @@ class BenchmarkError(Exception): self, message, job_name=None, db_conn=None, db_cur=None, zkhandler=None ): self.message = message - if job_name is not None: + if job_name is not None and db_conn is not None and db_cur is not None: # Clean up our dangling result query = "DELETE FROM storage_benchmarks WHERE job = %s;" args = (job_name,) @@ -52,6 +120,7 @@ class BenchmarkError(Exception): db_conn.commit() # Close the database connections cleanly close_database(db_conn, db_cur) + if job_name is not None and zkhandler is not None: zkhandler.disconnect() def __str__(self): @@ -116,6 +185,90 @@ def list_benchmarks(job=None): return {"message": "No benchmark found."}, 404 +def prepare_benchmark_volume( + pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None +): + # Create the RBD volume + retcode, retmsg = pvc_ceph.add_volume( + zkhandler, pool, benchmark_volume_name, benchmark_volume_size + ) + if not retcode: + raise BenchmarkError( + 'Failed to create volume "{}" on pool "{}": {}'.format( + benchmark_volume_name, pool, retmsg + ), + job_name=job_name, + db_conn=db_conn, + db_cur=db_cur, + zkhandler=zkhandler, + ) + else: + print(retmsg) + + +def cleanup_benchmark_volume( + pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None +): + # Remove the RBD volume + retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, benchmark_volume_name) + if not retcode: + raise BenchmarkError( + 'Failed to remove volume "{}" on pool "{}": {}'.format( + benchmark_volume_name, pool, retmsg + ), + job_name=job_name, + db_conn=db_conn, + db_cur=db_cur, + zkhandler=zkhandler, + ) + else: + print(retmsg) + + +def run_benchmark_job( + test, pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None +): + test_spec = test_matrix[test] + print("Running test '{}'".format(test)) + fio_cmd = """ + fio \ + --name={test} \ + --ioengine=rbd \ + --pool={pool} \ + --rbdname={volume} \ + --output-format=json \ + --direct=1 \ + --randrepeat=1 \ + --numjobs=1 \ + --time_based \ + --runtime=75 \ + --group_reporting \ + --iodepth={iodepth} \ + --bs={bs} \ + --readwrite={rw} + """.format( + test=test, + pool=pool, + volume=benchmark_volume_name, + iodepth=test_spec["iodepth"], + bs=test_spec["bs"], + rw=test_spec["rw"], + ) + + print("Running fio job: {}".format(" ".join(fio_cmd.split()))) + retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd) + if retcode: + raise BenchmarkError( + "Failed to run fio test: {}".format(stderr), + job_name=job_name, + db_conn=db_conn, + db_cur=db_cur, + zkhandler=zkhandler, + ) + + return loads(stdout) + + def run_benchmark(self, pool): # Runtime imports import time @@ -172,20 +325,13 @@ def run_benchmark(self, pool): ) time.sleep(1) - volume = "pvcbenchmark" - - # Create the RBD volume - retcode, retmsg = pvc_ceph.add_volume(zkhandler, pool, volume, "8G") - if not retcode: - raise BenchmarkError( - 'Failed to create volume "{}": {}'.format(volume, retmsg), - job_name=job_name, - db_conn=db_conn, - db_cur=db_cur, - zkhandler=zkhandler, - ) - else: - print(retmsg) + prepare_benchmark_volume( + pool, + job_name=job_name, + db_conn=db_conn, + db_cur=db_cur, + zkhandler=zkhandler, + ) # Phase 2 - benchmark run self.update_state( @@ -194,99 +340,17 @@ def run_benchmark(self, pool): ) time.sleep(1) - # We run a total of 8 tests, to give a generalized idea of performance on the cluster: - # 1. A sequential read test of 8GB with a 4M block size - # 2. A sequential write test of 8GB with a 4M block size - # 3. A random read test of 8GB with a 4M block size - # 4. A random write test of 8GB with a 4M block size - # 5. A random read test of 8GB with a 256k block size - # 6. A random write test of 8GB with a 256k block size - # 7. A random read test of 8GB with a 4k block size - # 8. A random write test of 8GB with a 4k block size - # Taken together, these 8 results should give a very good indication of the overall storage performance - # for a variety of workloads. - test_matrix = { - "seq_read": {"direction": "read", "iodepth": "64", "bs": "4M", "rw": "read"}, - "seq_write": {"direction": "write", "iodepth": "64", "bs": "4M", "rw": "write"}, - "rand_read_4M": { - "direction": "read", - "iodepth": "64", - "bs": "4M", - "rw": "randread", - }, - "rand_write_4M": { - "direction": "write", - "iodepth": "64", - "bs": "4M", - "rw": "randwrite", - }, - "rand_read_4K": { - "direction": "read", - "iodepth": "64", - "bs": "4K", - "rw": "randread", - }, - "rand_write_4K": { - "direction": "write", - "iodepth": "64", - "bs": "4K", - "rw": "randwrite", - }, - "rand_read_4K_lowdepth": { - "direction": "read", - "iodepth": "1", - "bs": "4K", - "rw": "randread", - }, - "rand_write_4K_lowdepth": { - "direction": "write", - "iodepth": "1", - "bs": "4K", - "rw": "randwrite", - }, - } - results = dict() for test in test_matrix: - print("Running test '{}'".format(test)) - fio_cmd = """ - fio \ - --name={test} \ - --ioengine=rbd \ - --pool={pool} \ - --rbdname={volume} \ - --output-format=json \ - --direct=1 \ - --randrepeat=1 \ - --numjobs=1 \ - --time_based \ - --runtime=75 \ - --group_reporting \ - --iodepth={iodepth} \ - --bs={bs} \ - --readwrite={rw} - """.format( - test=test, - pool=pool, - volume=volume, - iodepth=test_matrix[test]["iodepth"], - bs=test_matrix[test]["bs"], - rw=test_matrix[test]["rw"], + results[test] = run_benchmark_job( + test, + pool, + job_name=job_name, + db_conn=db_conn, + db_cur=db_cur, + zkhandler=zkhandler, ) - print("Running fio job: {}".format(" ".join(fio_cmd.split()))) - retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd) - if retcode: - raise BenchmarkError( - "Failed to run fio test: {}".format(stderr), - job_name=job_name, - db_conn=db_conn, - db_cur=db_cur, - zkhandler=zkhandler, - ) - - results[test] = loads(stdout) - # Phase 3 - cleanup self.update_state( state="RUNNING", @@ -294,18 +358,13 @@ def run_benchmark(self, pool): ) time.sleep(1) - # Remove the RBD volume - retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, volume) - if not retcode: - raise BenchmarkError( - 'Failed to remove volume "{}": {}'.format(volume, retmsg), - job_name=job_name, - db_conn=db_conn, - db_cur=db_cur, - zkhandler=zkhandler, - ) - else: - print(retmsg) + cleanup_benchmark_volume( + pool, + job_name=job_name, + db_conn=db_conn, + db_cur=db_cur, + zkhandler=zkhandler, + ) print("Storing result of tests for job '{}' in database".format(job_name)) try: