Add storage benchmarking to API
This commit is contained in:
parent
e4891831ce
commit
887e14a4e2
|
@ -0,0 +1,33 @@
|
|||
"""PVC version 0.7
|
||||
|
||||
Revision ID: 3bc6117ea44d
|
||||
Revises: 88c8514684f7
|
||||
Create Date: 2020-08-24 14:34:36.919308
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '3bc6117ea44d'
|
||||
down_revision = '88c8514684f7'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table('storage_benchmarks',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('job', sa.Text(), nullable=False),
|
||||
sa.Column('result', sa.Text(), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_table('storage_benchmarks')
|
||||
# ### end Alembic commands ###
|
|
@ -0,0 +1,478 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# benchmark.py - PVC API Benchmark functions
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2020 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import flask
|
||||
import json
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import shlex
|
||||
import subprocess
|
||||
|
||||
from distutils.util import strtobool as dustrtobool
|
||||
|
||||
import daemon_lib.common as pvc_common
|
||||
import daemon_lib.node as pvc_node
|
||||
import daemon_lib.ceph as pvc_ceph
|
||||
|
||||
import pvcapid.libvirt_schema as libvirt_schema
|
||||
|
||||
from pvcapid.ova import list_ova
|
||||
|
||||
def strtobool(stringv):
|
||||
if stringv is None:
|
||||
return False
|
||||
if isinstance(stringv, bool):
|
||||
return bool(stringv)
|
||||
try:
|
||||
return bool(dustrtobool(stringv))
|
||||
except:
|
||||
return False
|
||||
|
||||
#
|
||||
# Exceptions (used by Celery tasks)
|
||||
#
|
||||
class BenchmarkError(Exception):
|
||||
"""
|
||||
An exception that results from the Benchmark job.
|
||||
"""
|
||||
def __init__(self, message, cur_time=None, db_conn=None, db_cur=None, zk_conn=None):
|
||||
self.message = message
|
||||
if cur_time is not None:
|
||||
# Clean up our dangling result
|
||||
query = "DELETE FROM storage_benchmarks WHERE job = %s;"
|
||||
args = (cur_time,)
|
||||
db_cur.execute(query, args)
|
||||
db_conn.commit()
|
||||
# Close the database connections cleanly
|
||||
close_database(db_conn, db_cur)
|
||||
pvc_common.stopZKConnection(zk_conn)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.message)
|
||||
|
||||
#
|
||||
# Common functions
|
||||
#
|
||||
|
||||
# Database connections
|
||||
def open_database(config):
|
||||
conn = psycopg2.connect(
|
||||
host=config['database_host'],
|
||||
port=config['database_port'],
|
||||
dbname=config['database_name'],
|
||||
user=config['database_user'],
|
||||
password=config['database_password']
|
||||
)
|
||||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
return conn, cur
|
||||
|
||||
def close_database(conn, cur, failed=False):
|
||||
if not failed:
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
def list_benchmarks(job=None):
|
||||
if limit is not None:
|
||||
query = "SELECT * FROM {} WHERE job = %s;".format('storage_benchmarks')
|
||||
args = (job, )
|
||||
else:
|
||||
query = "SELECT * FROM {} ORDER BY id DESC;".format('storage_benchmarks')
|
||||
args = ()
|
||||
|
||||
conn, cur = open_database(config)
|
||||
cur.execute(query, args)
|
||||
orig_data = cur.fetchall()
|
||||
data = list()
|
||||
for benchmark in orig_data:
|
||||
benchmark_data = dict()
|
||||
benchmark_data['id'] = benchmark['id']
|
||||
benchmark_data['job'] = benchmark['job']
|
||||
benchmark_data['benchmark_result'] = benchmark['result']
|
||||
# Append the new data to our actual output structure
|
||||
data.append(benchmark_data)
|
||||
close_database(conn, cur)
|
||||
if data:
|
||||
return data, 200
|
||||
else:
|
||||
return { 'message': 'No benchmark found.' }, 404
|
||||
|
||||
def run_benchmark(self, pool):
|
||||
# Runtime imports
|
||||
import time
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
cur_time = datetime.now().isoformat(timespec='seconds')
|
||||
|
||||
print("Starting storage benchmark '{}' on pool '{}'".format(cur_time, pool))
|
||||
|
||||
# Phase 0 - connect to databases
|
||||
try:
|
||||
db_conn, db_cur = open_database(config)
|
||||
except:
|
||||
print('FATAL - failed to connect to Postgres')
|
||||
raise Exception
|
||||
|
||||
try:
|
||||
zk_conn = pvc_common.startZKConnection(config['coordinators'])
|
||||
except:
|
||||
print('FATAL - failed to connect to Zookeeper')
|
||||
raise Exception
|
||||
|
||||
|
||||
print("Storing running status for job '{}' in database".format(cur_time))
|
||||
try:
|
||||
query = "INSERT INTO storage_benchmarks (job, result) VALUES (%s, %s);"
|
||||
args = (cur_time, "Running",)
|
||||
db_cur.execute(query, args)
|
||||
db_conn.commit()
|
||||
except Exception as e:
|
||||
raise BenchmarkError("Failed to store running status: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
|
||||
|
||||
# Phase 1 - volume preparation
|
||||
self.update_state(state='RUNNING', meta={'current': 1, 'total': 3, 'status': 'Creating benchmark volume'})
|
||||
time.sleep(1)
|
||||
|
||||
volume = 'pvcbenchmark'
|
||||
|
||||
# Create the RBD volume
|
||||
retcode, retmsg = pvc_ceph.add_volume(zk_conn, pool, volume, "4G")
|
||||
if not retcode:
|
||||
raise BenchmarkError('Failed to create volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
|
||||
else:
|
||||
print(retmsg)
|
||||
|
||||
# Phase 2 - benchmark run
|
||||
self.update_state(state='RUNNING', meta={'current': 2, 'total': 3, 'status': 'Running fio benchmarks on volume'})
|
||||
time.sleep(1)
|
||||
|
||||
# We run a total of 8 tests, to give a generalized idea of performance on the cluster:
|
||||
# 1. A sequential read test of 4GB with a 4M block size
|
||||
# 2. A sequential write test of 4GB with a 4M block size
|
||||
# 3. A random read test of 4GB with a 4M block size
|
||||
# 4. A random write test of 4GB with a 4M block size
|
||||
# 5. A random read test of 4GB with a 256k block size
|
||||
# 6. A random write test of 4GB with a 256k block size
|
||||
# 7. A random read test of 4GB with a 4k block size
|
||||
# 8. A random write test of 4GB with a 4k block size
|
||||
# Taken together, these 8 results should give a very good indication of the overall storage performance
|
||||
# for a variety of workloads.
|
||||
test_matrix = {
|
||||
'seq_read': {
|
||||
'direction': 'read',
|
||||
'bs': '4M',
|
||||
'rw': 'read'
|
||||
},
|
||||
'seq_write': {
|
||||
'direction': 'write',
|
||||
'bs': '4M',
|
||||
'rw': 'write'
|
||||
},
|
||||
'rand_read_4M': {
|
||||
'direction': 'read',
|
||||
'bs': '4M',
|
||||
'rw': 'randread'
|
||||
},
|
||||
'rand_write_4M': {
|
||||
'direction': 'write',
|
||||
'bs': '4M',
|
||||
'rw': 'randwrite'
|
||||
},
|
||||
'rand_read_256K': {
|
||||
'direction': 'read',
|
||||
'bs': '256K',
|
||||
'rw': 'randread'
|
||||
},
|
||||
'rand_write_256K': {
|
||||
'direction': 'write',
|
||||
'bs': '256K',
|
||||
'rw': 'randwrite'
|
||||
},
|
||||
'rand_read_4K': {
|
||||
'direction': 'read',
|
||||
'bs': '4K',
|
||||
'rw': 'randread'
|
||||
},
|
||||
'rand_write_4K': {
|
||||
'direction': 'write',
|
||||
'bs': '4K',
|
||||
'rw': 'randwrite'
|
||||
}
|
||||
}
|
||||
parsed_results = dict()
|
||||
for test in test_matrix:
|
||||
print("Running test '{}'".format(test))
|
||||
fio_cmd = """
|
||||
fio \
|
||||
--output-format=terse \
|
||||
--terse-version=5 \
|
||||
--ioengine=rbd \
|
||||
--pool={pool} \
|
||||
--rbdname={volume} \
|
||||
--direct=1 \
|
||||
--randrepeat=1 \
|
||||
--gtod_reduce=1 \
|
||||
--iodepth=64 \
|
||||
--size=128M \
|
||||
--name={test} \
|
||||
--bs={bs} \
|
||||
--readwrite={rw}
|
||||
""".format(
|
||||
pool=pool,
|
||||
volume=volume,
|
||||
test=test,
|
||||
bs=test_matrix[test]['bs'],
|
||||
rw=test_matrix[test]['rw']
|
||||
)
|
||||
|
||||
retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
|
||||
if retcode:
|
||||
raise BenchmarkError("Failed to run fio test: {}".format(stderr), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
|
||||
|
||||
# Parse the terse results to avoid storing tons of junk
|
||||
# Reference: https://fio.readthedocs.io/en/latest/fio_doc.html#terse-output
|
||||
# This is written out broken up because the man page didn't bother to do this, and I'm putting it here for posterity.
|
||||
# Example Read test (line breaks to match man ref):
|
||||
# I 5;fio-3.12;test;0;0; (5) [0, 1, 2, 3, 4]
|
||||
# R 8388608;2966268;724;2828; (4) [5, 6, 7, 8]
|
||||
# 0;0;0.000000;0.000000; (4) [9, 10, 11, 12]
|
||||
# 0;0;0.000000;0.000000; (4) [13, 14, 15, 16]
|
||||
# 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,33, 34, 35, 36]
|
||||
# 0;0;0.000000;0.000000; (4) [37, 38, 39, 40]
|
||||
# 2842624;3153920;100.000000%;2967142.400000;127226.797479;5; (6) [41, 42, 43, 44, 45, 46]
|
||||
# 694;770;724.400000;31.061230;5; (5) [47, 48, 49, 50, 51]
|
||||
# W 0;0;0;0; (4) [52, 53, 54, 55]
|
||||
# 0;0;0.000000;0.000000; (4) [56, 57, 58, 59]
|
||||
# 0;0;0.000000;0.000000; (4) [60, 61, 62, 63]
|
||||
# 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [64, 65, 66, 67, 68. 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83]
|
||||
# 0;0;0.000000;0.000000; (4) [84, 85, 86, 87]
|
||||
# 0;0;0.000000%;0.000000;0.000000;0; (6) [88, 89, 90, 91, 92, 93]
|
||||
# 0;0;0.000000;0.000000;0; (5) [94, 95, 96, 97, 98]
|
||||
# T 0;0;0;0; (4) [99, 100, 101, 102]
|
||||
# 0;0;0.000000;0.000000; (4) [103, 104, 105, 106]
|
||||
# 0;0;0.000000;0.000000; (4) [107, 108, 109, 110]
|
||||
# 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20) [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130]
|
||||
# 0;0;0.000000;0.000000; (4) [131, 132, 133, 134]
|
||||
# 0;0;0.000000%;0.000000;0.000000;0; (6) [135, 136, 137, 138, 139, 140]
|
||||
# 0;0;0.000000;0.000000;0; (5) [141, 142, 143, 144, 145]
|
||||
# C 0.495225%;0.000000%;2083;0;13; (5) [146, 147, 148, 149, 150]
|
||||
# D 0.1%;0.1%;0.2%;0.4%;0.8%;1.6%;96.9%; (7) [151, 152, 153, 154, 155, 156, 157]
|
||||
# U 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (10) [158, 159, 160, 161, 162, 163, 164, 165, 166, 167]
|
||||
# M 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (12) [168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178. 179]
|
||||
# B dm-0;0;110;0;0;0;4;4;0.15%; (9) [180, 181, 182, 183, 184, 185, 186, 187, 188]
|
||||
# slaves;0;118;0;28;0;23;0;0.00%; (9) [189, 190, 191, 192, 193, 194, 195, 196, 197]
|
||||
# sde;0;118;0;28;0;23;0;0.00% (9) [198, 199, 200, 201, 202, 203, 204, 205, 206]
|
||||
# Example Write test:
|
||||
# I 5;fio-3.12;test;0;0; (5)
|
||||
# R 0;0;0;0; (4)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 0;0;0.000000%;0.000000;0.000000;0; (6)
|
||||
# 0;0;0.000000;0.000000;0; (5)
|
||||
# W 8388608;1137438;277;7375; (4)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 704512;1400832;99.029573%;1126400.000000;175720.860374;14; (6)
|
||||
# 172;342;275.000000;42.900601;14; (5)
|
||||
# T 0;0;0;0; (4)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0;0%=0; (20)
|
||||
# 0;0;0.000000;0.000000; (4)
|
||||
# 0;0;0.000000%;0.000000;0.000000;0; (6)
|
||||
# 0;0;0.000000;0.000000;0; (5)
|
||||
# C 12.950909%;1.912124%;746;0;95883; (5)
|
||||
# D 0.1%;0.1%;0.2%;0.4%;0.8%;1.6%;96.9%; (7)
|
||||
# U 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (10)
|
||||
# M 0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%; (12)
|
||||
# B dm-0;0;196;0;0;0;12;12;0.16%; (9)
|
||||
# slaves;0;207;0;95;0;39;16;0.21%; (9)
|
||||
# sde;0;207;0;95;0;39;16;0.21% (9)
|
||||
results = stdout.split(';')
|
||||
if test_matrix[test]['direction'] == 'read':
|
||||
# Stats
|
||||
# 5: Total IO (KiB)
|
||||
# 6: bandwidth (KiB/sec)
|
||||
# 7: IOPS
|
||||
# 8: runtime (msec)
|
||||
# Total latency
|
||||
# 37: min
|
||||
# 38: max
|
||||
# 39: mean
|
||||
# 40: stdev
|
||||
# Bandwidth
|
||||
# 41: min
|
||||
# 42: max
|
||||
# 43: agg % of total
|
||||
# 44: mean
|
||||
# 45: stdev
|
||||
# 46: # samples
|
||||
# IOPS
|
||||
# 47: min
|
||||
# 48: max
|
||||
# 49: mean
|
||||
# 50: stdev
|
||||
# 51: # samples
|
||||
# CPU
|
||||
# 146: user
|
||||
# 147: system
|
||||
# 148: ctx switches
|
||||
# 149: maj faults
|
||||
# 150: min faults
|
||||
parsed_results[test] = {
|
||||
"overall": {
|
||||
"iosize": results[5],
|
||||
"bandwidth": results[6],
|
||||
"iops": results[7],
|
||||
"runtime": results[8]
|
||||
},
|
||||
"latency": {
|
||||
"min": results[37],
|
||||
"max": results[38],
|
||||
"mean": results[39],
|
||||
"stdev": results[40]
|
||||
},
|
||||
"bandwidth": {
|
||||
"min": results[41],
|
||||
"max": results[42],
|
||||
"aggrpct": results[43],
|
||||
"mean": results[44],
|
||||
"stdev": results[45],
|
||||
"numsamples": results[46],
|
||||
},
|
||||
"iops": {
|
||||
"min": results[47],
|
||||
"max": results[48],
|
||||
"mean": results[49],
|
||||
"stdev": results[50],
|
||||
"numsamples": results[51]
|
||||
},
|
||||
"cpu": {
|
||||
"user": results[146],
|
||||
"system": results[147],
|
||||
"ctxsw": results[148],
|
||||
"majfault": results[149],
|
||||
"minfault": results[150]
|
||||
}
|
||||
}
|
||||
|
||||
if test_matrix[test]['direction'] == 'write':
|
||||
# Stats
|
||||
# 52: Total IO (KiB)
|
||||
# 53: bandwidth (KiB/sec)
|
||||
# 54: IOPS
|
||||
# 55: runtime (msec)
|
||||
# Total latency
|
||||
# 84: min
|
||||
# 85: max
|
||||
# 86: mean
|
||||
# 87: stdev
|
||||
# Bandwidth
|
||||
# 88: min
|
||||
# 89: max
|
||||
# 90: agg % of total
|
||||
# 91: mean
|
||||
# 92: stdev
|
||||
# 93: # samples
|
||||
# IOPS
|
||||
# 94: min
|
||||
# 95: max
|
||||
# 96: mean
|
||||
# 97: stdev
|
||||
# 98: # samples
|
||||
# CPU
|
||||
# 146: user
|
||||
# 147: system
|
||||
# 148: ctx switches
|
||||
# 149: maj faults
|
||||
# 150: min faults
|
||||
parsed_results[test] = {
|
||||
"overall": {
|
||||
"iosize": results[52],
|
||||
"bandwidth": results[53],
|
||||
"iops": results[54],
|
||||
"runtime": results[55]
|
||||
},
|
||||
"latency": {
|
||||
"min": results[84],
|
||||
"max": results[85],
|
||||
"mean": results[86],
|
||||
"stdev": results[87]
|
||||
},
|
||||
"bandwidth": {
|
||||
"min": results[88],
|
||||
"max": results[89],
|
||||
"aggrpct": results[90],
|
||||
"mean": results[91],
|
||||
"stdev": results[92],
|
||||
"numsamples": results[93],
|
||||
},
|
||||
"iops": {
|
||||
"min": results[94],
|
||||
"max": results[95],
|
||||
"mean": results[96],
|
||||
"stdev": results[97],
|
||||
"numsamples": results[98]
|
||||
},
|
||||
"cpu": {
|
||||
"user": results[146],
|
||||
"system": results[147],
|
||||
"ctxsw": results[148],
|
||||
"majfault": results[149],
|
||||
"minfault": results[150]
|
||||
}
|
||||
}
|
||||
print(parsed_results)
|
||||
|
||||
# Phase 3 - cleanup
|
||||
self.update_state(state='RUNNING', meta={'current': 3, 'total': 3, 'status': 'Cleaning up and storing results'})
|
||||
time.sleep(1)
|
||||
|
||||
# Remove the RBD volume
|
||||
retcode, retmsg = pvc_ceph.remove_volume(zk_conn, pool, volume)
|
||||
if not retcode:
|
||||
raise BenchmarkError('Failed to remove volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
|
||||
else:
|
||||
print(retmsg)
|
||||
|
||||
print("Storing result of tests for job '{}' in database".format(cur_time))
|
||||
try:
|
||||
query = "UPDATE storage_benchmarks SET result = %s WHERE job = %s;"
|
||||
args = (json.dumps(parsed_results), cur_time)
|
||||
db_cur.execute(query, args)
|
||||
db_conn.commit()
|
||||
except Exception as e:
|
||||
raise BenchmarkError("Failed to store test results: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
|
||||
|
||||
close_database(db_conn, db_cur)
|
||||
pvc_common.stopZKConnection(zk_conn)
|
||||
return { 'status': "Storage benchmark '{}' completed successfully.", 'current': 3, 'total': 3 }
|
|
@ -39,6 +39,7 @@ from celery.task.control import inspect
|
|||
|
||||
import pvcapid.helper as api_helper
|
||||
import pvcapid.provisioner as api_provisioner
|
||||
import pvcapid.benchmark as api_benchmark
|
||||
import pvcapid.ova as api_ova
|
||||
|
||||
from flask_sqlalchemy import SQLAlchemy
|
||||
|
@ -107,6 +108,8 @@ try:
|
|||
api_helper.config = config
|
||||
# Set the config object in the api_provisioner namespace
|
||||
api_provisioner.config = config
|
||||
# Set the config object in the api_benchmark namespace
|
||||
api_benchmark.config = config
|
||||
# Set the config object in the api_ova namespace
|
||||
api_ova.config = config
|
||||
except Exception as e:
|
||||
|
@ -198,6 +201,10 @@ def Authenticator(function):
|
|||
def create_vm(self, vm_name, profile_name, define_vm=True, start_vm=True, script_run_args=[]):
|
||||
return api_provisioner.create_vm(self, vm_name, profile_name, define_vm=define_vm, start_vm=start_vm, script_run_args=script_run_args)
|
||||
|
||||
@celery.task(bind=True)
|
||||
def run_benchmark(self, pool):
|
||||
return api_benchmark.run_benchmark(self, pool)
|
||||
|
||||
|
||||
##########################################################
|
||||
# API Root/Authentication
|
||||
|
@ -2600,6 +2607,64 @@ class API_Storage_Ceph_Utilization(Resource):
|
|||
return api_helper.ceph_util()
|
||||
api.add_resource(API_Storage_Ceph_Utilization, '/storage/ceph/utilization')
|
||||
|
||||
# /storage/ceph/benchmark
|
||||
class API_Storage_Ceph_Benchmark(Resource):
|
||||
@RequestParser([
|
||||
{ 'name': 'job' }
|
||||
])
|
||||
@Authenticator
|
||||
def get(self, reqargs):
|
||||
"""
|
||||
List results from benchmark jobs
|
||||
---
|
||||
tags:
|
||||
- storage / ceph
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
tbd:
|
||||
type: object
|
||||
description: TBD
|
||||
"""
|
||||
return api_benchmark.list_benchmarks(reqargs.get('job', None))
|
||||
|
||||
@RequestParser([
|
||||
{ 'name': 'pool', 'required': True, 'helpmsg': "A valid pool must be specified." },
|
||||
])
|
||||
@Authenticator
|
||||
def post(self, reqargs):
|
||||
"""
|
||||
Execute a storage benchmark against a storage pool
|
||||
---
|
||||
tags:
|
||||
- storage / ceph
|
||||
parameters:
|
||||
- in: query
|
||||
name: pool
|
||||
type: string
|
||||
required: true
|
||||
description: The PVC storage pool to benchmark
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
type: string
|
||||
description: The job ID of the benchmark
|
||||
"""
|
||||
# Verify that the pool is valid
|
||||
_list, code = api_helper.ceph_pool_list(reqargs.get('pool', None), is_fuzzy=False)
|
||||
if code != 200:
|
||||
return { 'message': 'Pool "{}" is not valid.'.format(reqargs.get('pool')) }, 400
|
||||
|
||||
task = run_benchmark.delay(
|
||||
reqargs.get('pool', None)
|
||||
)
|
||||
return { "task_id": task.id }, 202, { 'Location': Api.url_for(api, API_Storage_Ceph_Benchmark, task_id=task.id) }
|
||||
api.add_resource(API_Storage_Ceph_Benchmark, '/storage/ceph/benchmark')
|
||||
|
||||
# /storage/ceph/option
|
||||
class API_Storage_Ceph_Option(Resource):
|
||||
@RequestParser([
|
||||
|
|
|
@ -213,3 +213,17 @@ class DBProfile(db.Model):
|
|||
|
||||
def __repr__(self):
|
||||
return '<id {}>'.format(self.id)
|
||||
|
||||
class DBStorageBenchmarks(db.Model):
|
||||
__tablename__ = 'storage_benchmarks'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
job = db.Column(db.Text, nullable=False)
|
||||
result = db.Column(db.Text, nullable=False)
|
||||
|
||||
def __init__(self, job, result):
|
||||
self.job = job
|
||||
self.result = result
|
||||
|
||||
def __repr__(self):
|
||||
return '<id {}>'.format(self.id)
|
||||
|
|
|
@ -32,9 +32,9 @@ for HOST in ${HOSTS[@]}; do
|
|||
ssh $HOST $SUDO dpkg -i /tmp/pvc/{pvc-client-cli,pvc-daemon-common,pvc-daemon-api,pvc-daemon-node}*.deb
|
||||
ssh $HOST rm -rf /tmp/pvc
|
||||
echo "Restarting PVC node daemon..."
|
||||
ssh $HOST $SUDO systemctl restart pvcnoded
|
||||
echo "****"
|
||||
echo "Waiting 15s for host ${HOST} to stabilize"
|
||||
echo "****"
|
||||
sleep 15
|
||||
ssh $HOST $SUDO systemctl restart pvcapid
|
||||
# echo "****"
|
||||
# echo "Waiting 15s for host ${HOST} to stabilize"
|
||||
# echo "****"
|
||||
# sleep 15
|
||||
done
|
||||
|
|
|
@ -1249,3 +1249,198 @@ def format_list_snapshot(snapshot_list):
|
|||
)
|
||||
|
||||
return '\n'.join(sorted(snapshot_list_output))
|
||||
|
||||
#
|
||||
# Benchmark functions
|
||||
#
|
||||
def ceph_benchmark_run(config, pool):
|
||||
"""
|
||||
Run a storage benchmark against {pool}
|
||||
|
||||
API endpoint: POST /api/v1/storage/ceph/benchmark
|
||||
API arguments: pool={pool}
|
||||
API schema: {message}
|
||||
"""
|
||||
params = {
|
||||
'pool': pool
|
||||
}
|
||||
response = call_api(config, 'post', '/storage/ceph/benchmark', params=params)
|
||||
|
||||
if response.status_code == 202:
|
||||
retvalue = True
|
||||
retdata = 'Task ID: {}'.format(response.json()['task_id'])
|
||||
else:
|
||||
retvalue = False
|
||||
retdata = response.json().get('message', '')
|
||||
|
||||
return retvalue, retdata
|
||||
|
||||
def ceph_benchmark_list(config, job):
|
||||
"""
|
||||
View results of one or more previous benchmark runs
|
||||
|
||||
API endpoint: GET /api/v1/storage/ceph/benchmark
|
||||
API arguments: job={job}
|
||||
API schema: {results}
|
||||
"""
|
||||
if job is not None:
|
||||
params = {
|
||||
'job': job
|
||||
}
|
||||
else:
|
||||
params = {}
|
||||
|
||||
response = call_api(config, 'get', '/storage/ceph/benchmark', params=params)
|
||||
|
||||
if response.status_code == 200:
|
||||
retvalue = True
|
||||
retdata = response.json()
|
||||
else:
|
||||
retvalue = False
|
||||
retdata = response.json()
|
||||
|
||||
return retvalue, retdata
|
||||
|
||||
def format_list_benchmark(benchmark_information, detail=False):
|
||||
if detail:
|
||||
return format_list_benchmark_detailed(benchmark_information)
|
||||
|
||||
benchmark_list_output = []
|
||||
|
||||
benchmark_id_length = 3
|
||||
benchmark_job_length = 20
|
||||
benchmark_bandwidth_length = dict()
|
||||
benchmark_iops_length = dict()
|
||||
|
||||
for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]:
|
||||
benchmark_bandwidth_length[test] = 7
|
||||
benchmark_iops_length[test] = 6
|
||||
|
||||
# For this output, we're only showing the Sequential (seq_read and seq_write) and 4k Random (rand_read_4K and rand_write_4K) results since we're showing them for each test result.
|
||||
|
||||
# print(benchmark_information)
|
||||
|
||||
for benchmark in benchmark_information:
|
||||
benchmark_id = benchmark['id']
|
||||
_benchmark_id_length = len(str(benchmark_id))
|
||||
if _benchmark_id_length > benchmark_id_length:
|
||||
benchmark_id_length = _benchmark_id_length
|
||||
|
||||
benchmark_job = benchmark['job']
|
||||
_benchmark_job_length = len(benchmark_job)
|
||||
if _benchmark_job_length > benchmark_job_length:
|
||||
benchmark_job_length = _benchmark_job_length
|
||||
|
||||
if benchmark['benchmark_result'] == 'Running':
|
||||
continue
|
||||
benchmark_data = json.loads(benchmark['benchmark_result'])
|
||||
|
||||
benchmark_bandwidth = dict()
|
||||
benchmark_iops = dict()
|
||||
for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]:
|
||||
benchmark_bandwidth[test] = format_bytes_tohuman(int(benchmark_data[test]['overall']['bandwidth']) * 1024)
|
||||
benchmark_iops[test] = format_ops_tohuman(int(benchmark_data[test]['overall']['iops']))
|
||||
|
||||
_benchmark_bandwidth_length = len(benchmark_bandwidth[test]) + 1
|
||||
if _benchmark_bandwidth_length > benchmark_bandwidth_length[test]:
|
||||
benchmark_bandwidth_length[test] = _benchmark_bandwidth_length
|
||||
|
||||
_benchmark_iops_length = len(benchmark_iops[test]) + 1
|
||||
if _benchmark_iops_length > benchmark_bandwidth_length[test]:
|
||||
benchmark_iops_length[test] = _benchmark_iops_length
|
||||
|
||||
# Format the output header line 1
|
||||
benchmark_list_output.append('{bold}\
|
||||
{benchmark_id: <{benchmark_id_length}} \
|
||||
{benchmark_job: <{benchmark_job_length}} \
|
||||
{seq_header: <{seq_header_length}} \
|
||||
{rand_header: <{rand_header_length}} \
|
||||
{end_bold}'.format(
|
||||
bold=ansiprint.bold(),
|
||||
end_bold=ansiprint.end(),
|
||||
benchmark_id_length=benchmark_id_length,
|
||||
benchmark_job_length=benchmark_job_length,
|
||||
seq_header_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'] + 2,
|
||||
rand_header_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'] + 2,
|
||||
benchmark_id='ID',
|
||||
benchmark_job='Benchmark Job',
|
||||
seq_header='Sequential (4M blocks):',
|
||||
rand_header='Random (4K blocks):'
|
||||
)
|
||||
)
|
||||
|
||||
benchmark_list_output.append('{bold}\
|
||||
{benchmark_id: <{benchmark_id_length}} \
|
||||
{benchmark_job: <{benchmark_job_length}} \
|
||||
{seq_benchmark_bandwidth: <{seq_benchmark_bandwidth_length}} \
|
||||
{seq_benchmark_iops: <{seq_benchmark_iops_length}} \
|
||||
{rand_benchmark_bandwidth: <{rand_benchmark_bandwidth_length}} \
|
||||
{rand_benchmark_iops: <{rand_benchmark_iops_length}} \
|
||||
{end_bold}'.format(
|
||||
bold=ansiprint.bold(),
|
||||
end_bold=ansiprint.end(),
|
||||
benchmark_id_length=benchmark_id_length,
|
||||
benchmark_job_length=benchmark_job_length,
|
||||
seq_benchmark_bandwidth_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + 1,
|
||||
seq_benchmark_iops_length=benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'],
|
||||
rand_benchmark_bandwidth_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + 1,
|
||||
rand_benchmark_iops_length=benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'],
|
||||
benchmark_id='',
|
||||
benchmark_job='',
|
||||
seq_benchmark_bandwidth='Bandwith (R/W)',
|
||||
seq_benchmark_iops='IOPS (R/W)',
|
||||
rand_benchmark_bandwidth='Bandwith (R/W)',
|
||||
rand_benchmark_iops='IOPS (R/W)'
|
||||
)
|
||||
)
|
||||
|
||||
for benchmark in benchmark_information:
|
||||
benchmark_id = benchmark['id']
|
||||
benchmark_job = benchmark['job']
|
||||
|
||||
if benchmark['benchmark_result'] == 'Running':
|
||||
seq_benchmark_bandwidth = 'Running'
|
||||
seq_benchmark_iops = 'Running'
|
||||
rand_benchmark_bandwidth = 'Running'
|
||||
rand_benchmark_iops = 'Running'
|
||||
else:
|
||||
benchmark_bandwidth = dict()
|
||||
benchmark_iops = dict()
|
||||
for test in [ "seq_read", "seq_write", "rand_read_4K", "rand_write_4K" ]:
|
||||
benchmark_data = json.loads(benchmark['benchmark_result'])
|
||||
benchmark_bandwidth[test] = format_bytes_tohuman(int(benchmark_data[test]['overall']['bandwidth']) * 1024)
|
||||
benchmark_iops[test] = format_ops_tohuman(int(benchmark_data[test]['overall']['iops']))
|
||||
|
||||
seq_benchmark_bandwidth = "{}/{}".format(benchmark_bandwidth['seq_read'], benchmark_bandwidth['seq_write'])
|
||||
seq_benchmark_iops = "{}/{}".format(benchmark_iops['seq_read'], benchmark_iops['seq_write'])
|
||||
rand_benchmark_bandwidth = "{}/{}".format(benchmark_bandwidth['rand_read_4K'], benchmark_bandwidth['rand_write_4K'])
|
||||
rand_benchmark_iops = "{}/{}".format(benchmark_iops['rand_read_4K'], benchmark_iops['rand_write_4K'])
|
||||
|
||||
|
||||
benchmark_list_output.append('{bold}\
|
||||
{benchmark_id: <{benchmark_id_length}} \
|
||||
{benchmark_job: <{benchmark_job_length}} \
|
||||
{seq_benchmark_bandwidth: <{seq_benchmark_bandwidth_length}} \
|
||||
{seq_benchmark_iops: <{seq_benchmark_iops_length}} \
|
||||
{rand_benchmark_bandwidth: <{rand_benchmark_bandwidth_length}} \
|
||||
{rand_benchmark_iops: <{rand_benchmark_iops_length}} \
|
||||
{end_bold}'.format(
|
||||
bold='',
|
||||
end_bold='',
|
||||
benchmark_id_length=benchmark_id_length,
|
||||
benchmark_job_length=benchmark_job_length,
|
||||
seq_benchmark_bandwidth_length=benchmark_bandwidth_length['seq_read'] + benchmark_bandwidth_length['seq_write'] + 1,
|
||||
seq_benchmark_iops_length=benchmark_iops_length['seq_read'] + benchmark_iops_length['seq_write'],
|
||||
rand_benchmark_bandwidth_length=benchmark_bandwidth_length['rand_read_4K'] + benchmark_bandwidth_length['rand_write_4K'] + 1,
|
||||
rand_benchmark_iops_length=benchmark_iops_length['rand_read_4K'] + benchmark_iops_length['rand_write_4K'],
|
||||
benchmark_id=benchmark_id,
|
||||
benchmark_job=benchmark_job,
|
||||
seq_benchmark_bandwidth=seq_benchmark_bandwidth,
|
||||
seq_benchmark_iops=seq_benchmark_iops,
|
||||
rand_benchmark_bandwidth=rand_benchmark_bandwidth,
|
||||
rand_benchmark_iops=rand_benchmark_iops
|
||||
)
|
||||
)
|
||||
|
||||
return '\n'.join(benchmark_list_output)
|
||||
|
||||
|
|
|
@ -1576,6 +1576,49 @@ def ceph_util():
|
|||
retdata = pvc_ceph.format_raw_output(retdata)
|
||||
cleanup(retcode, retdata)
|
||||
|
||||
###############################################################################
|
||||
# pvc storage benchmark
|
||||
###############################################################################
|
||||
@click.group(name='benchmark', short_help='Run or view cluster storage benchmarks.')
|
||||
@cluster_req
|
||||
def ceph_benchmark():
|
||||
"""
|
||||
Run or view benchmarks of the storage cluster.
|
||||
"""
|
||||
|
||||
###############################################################################
|
||||
# pvc storage benchmark run
|
||||
###############################################################################
|
||||
@click.command(name='run', short_help='Run a storage benchmark.')
|
||||
@click.argument(
|
||||
'pool'
|
||||
)
|
||||
@cluster_req
|
||||
def ceph_benchmark_run(pool):
|
||||
"""
|
||||
Run a storage benchmark on POOL in the background.
|
||||
"""
|
||||
retcode, retmsg = pvc_ceph.ceph_benchmark_run(config, pool)
|
||||
cleanup(retcode, retmsg)
|
||||
|
||||
###############################################################################
|
||||
# pvc storage benchmark list
|
||||
###############################################################################
|
||||
@click.command(name='list', short_help='List storage benchmark results.')
|
||||
@click.argument(
|
||||
'job', default=None, required=False
|
||||
)
|
||||
@cluster_req
|
||||
def ceph_benchmark_list(job):
|
||||
"""
|
||||
List all Ceph storage benchmarks; optionally only match JOB.
|
||||
"""
|
||||
|
||||
retcode, retdata = pvc_ceph.ceph_benchmark_list(config, job)
|
||||
if retcode:
|
||||
retdata = pvc_ceph.format_list_benchmark(retdata)
|
||||
cleanup(retcode, retdata)
|
||||
|
||||
###############################################################################
|
||||
# pvc storage osd
|
||||
###############################################################################
|
||||
|
@ -3619,6 +3662,9 @@ net_acl.add_command(net_acl_add)
|
|||
net_acl.add_command(net_acl_remove)
|
||||
net_acl.add_command(net_acl_list)
|
||||
|
||||
ceph_benchmark.add_command(ceph_benchmark_run)
|
||||
ceph_benchmark.add_command(ceph_benchmark_list)
|
||||
|
||||
ceph_osd.add_command(ceph_osd_add)
|
||||
ceph_osd.add_command(ceph_osd_remove)
|
||||
ceph_osd.add_command(ceph_osd_in)
|
||||
|
@ -3647,6 +3693,7 @@ ceph_volume_snapshot.add_command(ceph_volume_snapshot_list)
|
|||
|
||||
cli_storage.add_command(ceph_status)
|
||||
cli_storage.add_command(ceph_util)
|
||||
cli_storage.add_command(ceph_benchmark)
|
||||
cli_storage.add_command(ceph_osd)
|
||||
cli_storage.add_command(ceph_pool)
|
||||
cli_storage.add_command(ceph_volume)
|
||||
|
|
|
@ -17,7 +17,7 @@ Description: Parallel Virtual Cluster node daemon (Python 3)
|
|||
|
||||
Package: pvc-daemon-api
|
||||
Architecture: all
|
||||
Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-gevent, python3-celery, python-celery-common, python3-distutils, redis, python3-redis, python3-lxml, python3-flask-migrate, python3-flask-script
|
||||
Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-gevent, python3-celery, python-celery-common, python3-distutils, redis, python3-redis, python3-lxml, python3-flask-migrate, python3-flask-script, fio
|
||||
Description: Parallel Virtual Cluster API daemon (Python 3)
|
||||
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
||||
.
|
||||
|
|
Loading…
Reference in New Issue