581 lines
18 KiB
Python
581 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# benchmark.py - PVC API Benchmark functions
|
|
# Part of the Parallel Virtual Cluster (PVC) system
|
|
#
|
|
# Copyright (C) 2018-2024 Joshua M. Boniface <joshua@boniface.me>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, version 3.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
#
|
|
###############################################################################
|
|
|
|
import os
|
|
import psutil
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
import subprocess
|
|
|
|
from datetime import datetime
|
|
from json import loads, dumps
|
|
from time import sleep
|
|
|
|
from daemon_lib.celery import start, fail, log_info, update, finish
|
|
|
|
import daemon_lib.ceph as pvc_ceph
|
|
|
|
|
|
# Define the current test format
|
|
TEST_FORMAT = 2
|
|
|
|
|
|
# We run a total of 8 tests, to give a generalized idea of performance on the cluster:
|
|
# 1. A sequential read test of 64GB with a 4M block size
|
|
# 2. A sequential write test of 64GB with a 4M block size
|
|
# 3. A random read test of 64GB with a 4M block size
|
|
# 4. A random write test of 64GB with a 4M block size
|
|
# 5. A random read test of 64GB with a 256k block size
|
|
# 6. A random write test of 64GB with a 256k block size
|
|
# 7. A random read test of 64GB with a 4k block size
|
|
# 8. A random write test of 64GB with a 4k block size
|
|
# Taken together, these 8 results should give a very good indication of the overall storage performance
|
|
# for a variety of workloads.
|
|
test_matrix = {
|
|
"seq_read": {
|
|
"direction": "read",
|
|
"iodepth": "64",
|
|
"bs": "4M",
|
|
"rw": "read",
|
|
},
|
|
"seq_write": {
|
|
"direction": "write",
|
|
"iodepth": "64",
|
|
"bs": "4M",
|
|
"rw": "write",
|
|
},
|
|
"rand_read_4M": {
|
|
"direction": "read",
|
|
"iodepth": "64",
|
|
"bs": "4M",
|
|
"rw": "randread",
|
|
},
|
|
"rand_write_4M": {
|
|
"direction": "write",
|
|
"iodepth": "64",
|
|
"bs": "4M",
|
|
"rw": "randwrite",
|
|
},
|
|
"rand_read_4K": {
|
|
"direction": "read",
|
|
"iodepth": "64",
|
|
"bs": "4K",
|
|
"rw": "randread",
|
|
},
|
|
"rand_write_4K": {
|
|
"direction": "write",
|
|
"iodepth": "64",
|
|
"bs": "4K",
|
|
"rw": "randwrite",
|
|
},
|
|
"rand_read_4K_lowdepth": {
|
|
"direction": "read",
|
|
"iodepth": "1",
|
|
"bs": "4K",
|
|
"rw": "randread",
|
|
},
|
|
"rand_write_4K_lowdepth": {
|
|
"direction": "write",
|
|
"iodepth": "1",
|
|
"bs": "4K",
|
|
"rw": "randwrite",
|
|
},
|
|
}
|
|
|
|
|
|
# Specify the benchmark volume name and size
|
|
benchmark_volume_name = "pvcbenchmark"
|
|
benchmark_volume_size = "64G"
|
|
|
|
|
|
#
|
|
# Exceptions (used by Celery tasks)
|
|
#
|
|
class BenchmarkError(Exception):
|
|
pass
|
|
|
|
|
|
#
|
|
# Common functions
|
|
#
|
|
|
|
|
|
def cleanup(job_name, db_conn=None, db_cur=None, zkhandler=None, final=False):
|
|
if db_conn is not None and db_cur is not None:
|
|
if not final:
|
|
# Clean up our dangling result (non-final runs only)
|
|
query = "DELETE FROM storage_benchmarks WHERE job = %s;"
|
|
args = (job_name,)
|
|
db_cur.execute(query, args)
|
|
db_conn.commit()
|
|
# Close the database connections cleanly
|
|
close_database(db_conn, db_cur)
|
|
if zkhandler is not None:
|
|
zkhandler.disconnect()
|
|
del zkhandler
|
|
|
|
|
|
# Database connections
|
|
def open_database(config):
|
|
conn = psycopg2.connect(
|
|
host=config["api_postgresql_host"],
|
|
port=config["api_postgresql_port"],
|
|
dbname=config["api_postgresql_dbname"],
|
|
user=config["api_postgresql_user"],
|
|
password=config["api_postgresql_password"],
|
|
)
|
|
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
|
return conn, cur
|
|
|
|
|
|
def close_database(conn, cur, failed=False):
|
|
if not failed:
|
|
conn.commit()
|
|
cur.close()
|
|
conn.close()
|
|
|
|
|
|
def list_benchmarks(config, job=None):
|
|
if job is not None:
|
|
query = "SELECT * FROM {} WHERE job = %s;".format("storage_benchmarks")
|
|
args = (job,)
|
|
else:
|
|
query = "SELECT * FROM {} ORDER BY id DESC;".format("storage_benchmarks")
|
|
args = ()
|
|
|
|
conn, cur = open_database(config)
|
|
cur.execute(query, args)
|
|
orig_data = cur.fetchall()
|
|
data = list()
|
|
for benchmark in orig_data:
|
|
benchmark_data = dict()
|
|
benchmark_data["id"] = benchmark["id"]
|
|
benchmark_data["job"] = benchmark["job"]
|
|
benchmark_data["test_format"] = benchmark["test_format"]
|
|
if benchmark["result"] == "Running":
|
|
benchmark_data["benchmark_result"] = "Running"
|
|
else:
|
|
try:
|
|
benchmark_data["benchmark_result"] = loads(benchmark["result"])
|
|
except Exception:
|
|
benchmark_data["benchmark_result"] = {}
|
|
# Append the new data to our actual output structure
|
|
data.append(benchmark_data)
|
|
close_database(conn, cur)
|
|
if data:
|
|
return data, 200
|
|
else:
|
|
return {"message": "No benchmark found."}, 404
|
|
|
|
|
|
def prepare_benchmark_volume(
|
|
pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
|
|
):
|
|
# Create the RBD volume
|
|
retcode, retmsg = pvc_ceph.add_volume(
|
|
zkhandler, pool, benchmark_volume_name, benchmark_volume_size
|
|
)
|
|
if not retcode:
|
|
cleanup(
|
|
job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
fail(
|
|
None,
|
|
f'Failed to create volume "{benchmark_volume_name}" on pool "{pool}": {retmsg}',
|
|
)
|
|
else:
|
|
log_info(None, retmsg)
|
|
|
|
|
|
def cleanup_benchmark_volume(
|
|
pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
|
|
):
|
|
# Remove the RBD volume
|
|
retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, benchmark_volume_name)
|
|
if not retcode:
|
|
cleanup(
|
|
job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
fail(
|
|
None,
|
|
f'Failed to remove volume "{benchmark_volume_name}" from pool "{pool}": {retmsg}',
|
|
)
|
|
else:
|
|
log_info(None, retmsg)
|
|
|
|
|
|
def run_benchmark_job(
|
|
config, test, pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
|
|
):
|
|
test_spec = test_matrix[test]
|
|
log_info(None, f"Running test '{test}'")
|
|
fio_cmd = """
|
|
fio \
|
|
--name={test} \
|
|
--ioengine=rbd \
|
|
--pool={pool} \
|
|
--rbdname={volume} \
|
|
--output-format=json \
|
|
--direct=1 \
|
|
--randrepeat=1 \
|
|
--numjobs=1 \
|
|
--time_based \
|
|
--runtime=75 \
|
|
--group_reporting \
|
|
--iodepth={iodepth} \
|
|
--bs={bs} \
|
|
--readwrite={rw}
|
|
""".format(
|
|
test=test,
|
|
pool=pool,
|
|
volume=benchmark_volume_name,
|
|
iodepth=test_spec["iodepth"],
|
|
bs=test_spec["bs"],
|
|
rw=test_spec["rw"],
|
|
)
|
|
|
|
log_info(None, "Running fio job: {}".format(" ".join(fio_cmd.split())))
|
|
|
|
# Run the fio command manually instead of using our run_os_command wrapper
|
|
# This will help us gather statistics about this node while it's running
|
|
process = subprocess.Popen(
|
|
fio_cmd.split(),
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
)
|
|
|
|
# Wait 15 seconds for the test to start
|
|
log_info(None, "Waiting 15 seconds for test resource stabilization")
|
|
sleep(15)
|
|
|
|
# Set up function to get process CPU utilization by name
|
|
def get_cpu_utilization_by_name(process_name):
|
|
cpu_usage = 0
|
|
for proc in psutil.process_iter(["name", "cpu_percent"]):
|
|
if proc.info["name"] == process_name:
|
|
cpu_usage += proc.info["cpu_percent"]
|
|
return cpu_usage
|
|
|
|
# Set up function to get process memory utilization by name
|
|
def get_memory_utilization_by_name(process_name):
|
|
memory_usage = 0
|
|
for proc in psutil.process_iter(["name", "memory_percent"]):
|
|
if proc.info["name"] == process_name:
|
|
memory_usage += proc.info["memory_percent"]
|
|
return memory_usage
|
|
|
|
# Set up function to get network traffic utilization in bps
|
|
def get_network_traffic_bps(interface, duration=1):
|
|
# Get initial network counters
|
|
net_io_start = psutil.net_io_counters(pernic=True)
|
|
if interface not in net_io_start:
|
|
return None, None
|
|
|
|
stats_start = net_io_start[interface]
|
|
bytes_sent_start = stats_start.bytes_sent
|
|
bytes_recv_start = stats_start.bytes_recv
|
|
|
|
# Wait for the specified duration
|
|
sleep(duration)
|
|
|
|
# Get final network counters
|
|
net_io_end = psutil.net_io_counters(pernic=True)
|
|
stats_end = net_io_end[interface]
|
|
bytes_sent_end = stats_end.bytes_sent
|
|
bytes_recv_end = stats_end.bytes_recv
|
|
|
|
# Calculate bytes per second
|
|
bytes_sent_per_sec = (bytes_sent_end - bytes_sent_start) / duration
|
|
bytes_recv_per_sec = (bytes_recv_end - bytes_recv_start) / duration
|
|
|
|
# Convert to bits per second (bps)
|
|
bits_sent_per_sec = bytes_sent_per_sec * 8
|
|
bits_recv_per_sec = bytes_recv_per_sec * 8
|
|
bits_total_per_sec = bits_sent_per_sec + bits_recv_per_sec
|
|
|
|
return bits_sent_per_sec, bits_recv_per_sec, bits_total_per_sec
|
|
|
|
log_info(None, f"Starting system resource polling for test '{test}'")
|
|
storage_interface = config["storage_dev"]
|
|
total_cpus = psutil.cpu_count(logical=True)
|
|
ticks = 1
|
|
osd_cpu_utilization = 0
|
|
osd_memory_utilization = 0
|
|
mon_cpu_utilization = 0
|
|
mon_memory_utilization = 0
|
|
total_cpu_utilization = 0
|
|
total_memory_utilization = 0
|
|
storage_sent_bps = 0
|
|
storage_recv_bps = 0
|
|
storage_total_bps = 0
|
|
|
|
while process.poll() is None:
|
|
# Do collection of statistics like network bandwidth and cpu utilization
|
|
current_osd_cpu_utilization = get_cpu_utilization_by_name("ceph-osd")
|
|
current_osd_memory_utilization = get_memory_utilization_by_name("ceph-osd")
|
|
current_mon_cpu_utilization = get_cpu_utilization_by_name("ceph-mon")
|
|
current_mon_memory_utilization = get_memory_utilization_by_name("ceph-mon")
|
|
current_total_cpu_utilization = psutil.cpu_percent(interval=1)
|
|
current_total_memory_utilization = psutil.virtual_memory().percent
|
|
(
|
|
current_storage_sent_bps,
|
|
current_storage_recv_bps,
|
|
current_storage_total_bps,
|
|
) = get_network_traffic_bps(storage_interface)
|
|
# Recheck if the process is done yet; if it's not, we add the values and increase the ticks
|
|
# This helps ensure that if the process finishes earlier than the longer polls above,
|
|
# this particular tick isn't counted which can skew the average
|
|
if process.poll() is None:
|
|
osd_cpu_utilization += current_osd_cpu_utilization
|
|
osd_memory_utilization += current_osd_memory_utilization
|
|
mon_cpu_utilization += current_mon_cpu_utilization
|
|
mon_memory_utilization += current_mon_memory_utilization
|
|
total_cpu_utilization += current_total_cpu_utilization
|
|
total_memory_utilization += current_total_memory_utilization
|
|
storage_sent_bps += current_storage_sent_bps
|
|
storage_recv_bps += current_storage_recv_bps
|
|
storage_total_bps += current_storage_total_bps
|
|
ticks += 1
|
|
|
|
# Get the 1-minute load average and CPU utilization, which covers the test duration
|
|
load1, _, _ = os.getloadavg()
|
|
load1 = round(load1, 2)
|
|
|
|
# Calculate the average CPU utilization values over the runtime
|
|
# Divide the OSD and MON CPU utilization by the total number of CPU cores, because
|
|
# the total is divided this way
|
|
avg_osd_cpu_utilization = round(osd_cpu_utilization / ticks / total_cpus, 2)
|
|
avg_osd_memory_utilization = round(osd_memory_utilization / ticks, 2)
|
|
avg_mon_cpu_utilization = round(mon_cpu_utilization / ticks / total_cpus, 2)
|
|
avg_mon_memory_utilization = round(mon_memory_utilization / ticks, 2)
|
|
avg_total_cpu_utilization = round(total_cpu_utilization / ticks, 2)
|
|
avg_total_memory_utilization = round(total_memory_utilization / ticks, 2)
|
|
avg_storage_sent_bps = round(storage_sent_bps / ticks, 2)
|
|
avg_storage_recv_bps = round(storage_recv_bps / ticks, 2)
|
|
avg_storage_total_bps = round(storage_total_bps / ticks, 2)
|
|
|
|
stdout, stderr = process.communicate()
|
|
retcode = process.returncode
|
|
|
|
resource_data = {
|
|
"avg_cpu_util_percent": {
|
|
"total": avg_total_cpu_utilization,
|
|
"ceph-mon": avg_mon_cpu_utilization,
|
|
"ceph-osd": avg_osd_cpu_utilization,
|
|
},
|
|
"avg_memory_util_percent": {
|
|
"total": avg_total_memory_utilization,
|
|
"ceph-mon": avg_mon_memory_utilization,
|
|
"ceph-osd": avg_osd_memory_utilization,
|
|
},
|
|
"avg_network_util_bps": {
|
|
"sent": avg_storage_sent_bps,
|
|
"recv": avg_storage_recv_bps,
|
|
"total": avg_storage_total_bps,
|
|
},
|
|
}
|
|
|
|
try:
|
|
jstdout = loads(stdout)
|
|
if retcode:
|
|
raise
|
|
except Exception:
|
|
return None, None
|
|
|
|
return resource_data, jstdout
|
|
|
|
|
|
def worker_run_benchmark(zkhandler, celery, config, pool, name):
|
|
# Phase 0 - connect to databases
|
|
if not name:
|
|
cur_time = datetime.now().isoformat(timespec="seconds")
|
|
cur_primary = zkhandler.read("base.config.primary_node")
|
|
job_name = f"{cur_time}_{cur_primary}"
|
|
else:
|
|
job_name = name
|
|
|
|
current_stage = 0
|
|
total_stages = 13
|
|
start(
|
|
celery,
|
|
f"Running storage benchmark '{job_name}' on pool '{pool}'",
|
|
current=current_stage,
|
|
total=total_stages,
|
|
)
|
|
|
|
try:
|
|
db_conn, db_cur = open_database(config)
|
|
except Exception:
|
|
cleanup(
|
|
job_name,
|
|
db_conn=None,
|
|
db_cur=None,
|
|
zkhandler=zkhandler,
|
|
)
|
|
fail(
|
|
celery,
|
|
"Failed to connect to Postgres",
|
|
)
|
|
|
|
current_stage += 1
|
|
update(
|
|
celery,
|
|
"Storing running status in database",
|
|
current=current_stage,
|
|
total=total_stages,
|
|
)
|
|
|
|
try:
|
|
query = "INSERT INTO storage_benchmarks (job, test_format, result) VALUES (%s, %s, %s);"
|
|
args = (
|
|
job_name,
|
|
TEST_FORMAT,
|
|
"Running",
|
|
)
|
|
db_cur.execute(query, args)
|
|
db_conn.commit()
|
|
except Exception as e:
|
|
cleanup(
|
|
job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
fail(celery, f"Failed to store running status: {e}", exception=BenchmarkError)
|
|
|
|
current_stage += 1
|
|
update(
|
|
celery,
|
|
"Creating benchmark volume",
|
|
current=current_stage,
|
|
total=total_stages,
|
|
)
|
|
|
|
prepare_benchmark_volume(
|
|
pool,
|
|
job_name=job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
|
|
# Phase 2 - benchmark run
|
|
results = dict()
|
|
for test in test_matrix:
|
|
current_stage += 1
|
|
update(
|
|
celery,
|
|
f"Running benchmark job '{test}'",
|
|
current=current_stage,
|
|
total=total_stages,
|
|
)
|
|
|
|
resource_data, fio_data = run_benchmark_job(
|
|
config,
|
|
test,
|
|
pool,
|
|
job_name=job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
if resource_data is None or fio_data is None:
|
|
cleanup_benchmark_volume(
|
|
pool,
|
|
job_name=job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
cleanup(
|
|
job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
fail(
|
|
None,
|
|
f"Failed to run fio test '{test}'",
|
|
)
|
|
results[test] = {**resource_data, **fio_data}
|
|
|
|
# Phase 3 - cleanup
|
|
current_stage += 1
|
|
update(
|
|
celery,
|
|
"Cleaning up venchmark volume",
|
|
current=current_stage,
|
|
total=total_stages,
|
|
)
|
|
|
|
cleanup_benchmark_volume(
|
|
pool,
|
|
job_name=job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
|
|
current_stage += 1
|
|
update(
|
|
celery,
|
|
"Storing results in database",
|
|
current=current_stage,
|
|
total=total_stages,
|
|
)
|
|
|
|
try:
|
|
query = "UPDATE storage_benchmarks SET result = %s WHERE job = %s;"
|
|
args = (dumps(results), job_name)
|
|
db_cur.execute(query, args)
|
|
db_conn.commit()
|
|
except Exception as e:
|
|
cleanup(
|
|
job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
)
|
|
fail(celery, f"Failed to store test results: {e}", exception=BenchmarkError)
|
|
|
|
cleanup(
|
|
job_name,
|
|
db_conn=db_conn,
|
|
db_cur=db_cur,
|
|
zkhandler=zkhandler,
|
|
final=True,
|
|
)
|
|
|
|
current_stage += 1
|
|
return finish(
|
|
celery,
|
|
f"Storage benchmark {job_name} completed successfully",
|
|
current=current_stage,
|
|
total=total_stages,
|
|
)
|