Make benchmarker function as a module

1. Move the test_matrix, volume name, and size to module-level variables
so they can be accessed externally if this is imported.
2. Separate the volume creation and volume cleanup into functions.
3. Separate the individual benchmark runs into a function.

This should enable easier calling of the various subcomponents
externally, e.g. for external benchmark scripts.
This commit is contained in:
Joshua Boniface 2022-11-03 11:59:37 -04:00
parent 59f97ebbfb
commit ea7a4b2b85
1 changed files with 175 additions and 116 deletions

View File

@ -32,6 +32,74 @@ import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph
# We run a total of 8 tests, to give a generalized idea of performance on the cluster:
# 1. A sequential read test of 8GB with a 4M block size
# 2. A sequential write test of 8GB with a 4M block size
# 3. A random read test of 8GB with a 4M block size
# 4. A random write test of 8GB with a 4M block size
# 5. A random read test of 8GB with a 256k block size
# 6. A random write test of 8GB with a 256k block size
# 7. A random read test of 8GB with a 4k block size
# 8. A random write test of 8GB with a 4k block size
# Taken together, these 8 results should give a very good indication of the overall storage performance
# for a variety of workloads.
test_matrix = {
"seq_read": {
"direction": "read",
"iodepth": "64",
"bs": "4M",
"rw": "read",
},
"seq_write": {
"direction": "write",
"iodepth": "64",
"bs": "4M",
"rw": "write",
},
"rand_read_4M": {
"direction": "read",
"iodepth": "64",
"bs": "4M",
"rw": "randread",
},
"rand_write_4M": {
"direction": "write",
"iodepth": "64",
"bs": "4M",
"rw": "randwrite",
},
"rand_read_4K": {
"direction": "read",
"iodepth": "64",
"bs": "4K",
"rw": "randread",
},
"rand_write_4K": {
"direction": "write",
"iodepth": "64",
"bs": "4K",
"rw": "randwrite",
},
"rand_read_4K_lowdepth": {
"direction": "read",
"iodepth": "1",
"bs": "4K",
"rw": "randread",
},
"rand_write_4K_lowdepth": {
"direction": "write",
"iodepth": "1",
"bs": "4K",
"rw": "randwrite",
},
}
# Specify the benchmark volume name and size
benchmark_volume_name = "pvcbenchmark"
benchmark_volume_size = "8G"
# #
# Exceptions (used by Celery tasks) # Exceptions (used by Celery tasks)
# #
@ -44,7 +112,7 @@ class BenchmarkError(Exception):
self, message, job_name=None, db_conn=None, db_cur=None, zkhandler=None self, message, job_name=None, db_conn=None, db_cur=None, zkhandler=None
): ):
self.message = message self.message = message
if job_name is not None: if job_name is not None and db_conn is not None and db_cur is not None:
# Clean up our dangling result # Clean up our dangling result
query = "DELETE FROM storage_benchmarks WHERE job = %s;" query = "DELETE FROM storage_benchmarks WHERE job = %s;"
args = (job_name,) args = (job_name,)
@ -52,6 +120,7 @@ class BenchmarkError(Exception):
db_conn.commit() db_conn.commit()
# Close the database connections cleanly # Close the database connections cleanly
close_database(db_conn, db_cur) close_database(db_conn, db_cur)
if job_name is not None and zkhandler is not None:
zkhandler.disconnect() zkhandler.disconnect()
def __str__(self): def __str__(self):
@ -116,6 +185,90 @@ def list_benchmarks(job=None):
return {"message": "No benchmark found."}, 404 return {"message": "No benchmark found."}, 404
def prepare_benchmark_volume(
pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
):
# Create the RBD volume
retcode, retmsg = pvc_ceph.add_volume(
zkhandler, pool, benchmark_volume_name, benchmark_volume_size
)
if not retcode:
raise BenchmarkError(
'Failed to create volume "{}" on pool "{}": {}'.format(
benchmark_volume_name, pool, retmsg
),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
else:
print(retmsg)
def cleanup_benchmark_volume(
pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
):
# Remove the RBD volume
retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, benchmark_volume_name)
if not retcode:
raise BenchmarkError(
'Failed to remove volume "{}" on pool "{}": {}'.format(
benchmark_volume_name, pool, retmsg
),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
else:
print(retmsg)
def run_benchmark_job(
test, pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
):
test_spec = test_matrix[test]
print("Running test '{}'".format(test))
fio_cmd = """
fio \
--name={test} \
--ioengine=rbd \
--pool={pool} \
--rbdname={volume} \
--output-format=json \
--direct=1 \
--randrepeat=1 \
--numjobs=1 \
--time_based \
--runtime=75 \
--group_reporting \
--iodepth={iodepth} \
--bs={bs} \
--readwrite={rw}
""".format(
test=test,
pool=pool,
volume=benchmark_volume_name,
iodepth=test_spec["iodepth"],
bs=test_spec["bs"],
rw=test_spec["rw"],
)
print("Running fio job: {}".format(" ".join(fio_cmd.split())))
retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
if retcode:
raise BenchmarkError(
"Failed to run fio test: {}".format(stderr),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
return loads(stdout)
def run_benchmark(self, pool): def run_benchmark(self, pool):
# Runtime imports # Runtime imports
import time import time
@ -172,20 +325,13 @@ def run_benchmark(self, pool):
) )
time.sleep(1) time.sleep(1)
volume = "pvcbenchmark" prepare_benchmark_volume(
pool,
# Create the RBD volume job_name=job_name,
retcode, retmsg = pvc_ceph.add_volume(zkhandler, pool, volume, "8G") db_conn=db_conn,
if not retcode: db_cur=db_cur,
raise BenchmarkError( zkhandler=zkhandler,
'Failed to create volume "{}": {}'.format(volume, retmsg), )
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
else:
print(retmsg)
# Phase 2 - benchmark run # Phase 2 - benchmark run
self.update_state( self.update_state(
@ -194,99 +340,17 @@ def run_benchmark(self, pool):
) )
time.sleep(1) time.sleep(1)
# We run a total of 8 tests, to give a generalized idea of performance on the cluster:
# 1. A sequential read test of 8GB with a 4M block size
# 2. A sequential write test of 8GB with a 4M block size
# 3. A random read test of 8GB with a 4M block size
# 4. A random write test of 8GB with a 4M block size
# 5. A random read test of 8GB with a 256k block size
# 6. A random write test of 8GB with a 256k block size
# 7. A random read test of 8GB with a 4k block size
# 8. A random write test of 8GB with a 4k block size
# Taken together, these 8 results should give a very good indication of the overall storage performance
# for a variety of workloads.
test_matrix = {
"seq_read": {"direction": "read", "iodepth": "64", "bs": "4M", "rw": "read"},
"seq_write": {"direction": "write", "iodepth": "64", "bs": "4M", "rw": "write"},
"rand_read_4M": {
"direction": "read",
"iodepth": "64",
"bs": "4M",
"rw": "randread",
},
"rand_write_4M": {
"direction": "write",
"iodepth": "64",
"bs": "4M",
"rw": "randwrite",
},
"rand_read_4K": {
"direction": "read",
"iodepth": "64",
"bs": "4K",
"rw": "randread",
},
"rand_write_4K": {
"direction": "write",
"iodepth": "64",
"bs": "4K",
"rw": "randwrite",
},
"rand_read_4K_lowdepth": {
"direction": "read",
"iodepth": "1",
"bs": "4K",
"rw": "randread",
},
"rand_write_4K_lowdepth": {
"direction": "write",
"iodepth": "1",
"bs": "4K",
"rw": "randwrite",
},
}
results = dict() results = dict()
for test in test_matrix: for test in test_matrix:
print("Running test '{}'".format(test)) results[test] = run_benchmark_job(
fio_cmd = """ test,
fio \ pool,
--name={test} \ job_name=job_name,
--ioengine=rbd \ db_conn=db_conn,
--pool={pool} \ db_cur=db_cur,
--rbdname={volume} \ zkhandler=zkhandler,
--output-format=json \
--direct=1 \
--randrepeat=1 \
--numjobs=1 \
--time_based \
--runtime=75 \
--group_reporting \
--iodepth={iodepth} \
--bs={bs} \
--readwrite={rw}
""".format(
test=test,
pool=pool,
volume=volume,
iodepth=test_matrix[test]["iodepth"],
bs=test_matrix[test]["bs"],
rw=test_matrix[test]["rw"],
) )
print("Running fio job: {}".format(" ".join(fio_cmd.split())))
retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
if retcode:
raise BenchmarkError(
"Failed to run fio test: {}".format(stderr),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
results[test] = loads(stdout)
# Phase 3 - cleanup # Phase 3 - cleanup
self.update_state( self.update_state(
state="RUNNING", state="RUNNING",
@ -294,18 +358,13 @@ def run_benchmark(self, pool):
) )
time.sleep(1) time.sleep(1)
# Remove the RBD volume cleanup_benchmark_volume(
retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, volume) pool,
if not retcode: job_name=job_name,
raise BenchmarkError( db_conn=db_conn,
'Failed to remove volume "{}": {}'.format(volume, retmsg), db_cur=db_cur,
job_name=job_name, zkhandler=zkhandler,
db_conn=db_conn, )
db_cur=db_cur,
zkhandler=zkhandler,
)
else:
print(retmsg)
print("Storing result of tests for job '{}' in database".format(job_name)) print("Storing result of tests for job '{}' in database".format(job_name))
try: try: