Add live migrate max downtime selector meta field

Adds a new flag to VM metadata to allow setting the VM live migration
max downtime. This will enable very busy VMs that hang live migration to
have this value changed.
This commit is contained in:
Joshua Boniface 2024-01-10 16:13:31 -05:00
parent 38eeb78423
commit 09269f182c
17 changed files with 283 additions and 30 deletions

View File

@ -0,0 +1,28 @@
"""PVC version 0.9.89
Revision ID: 977e7b4d3497
Revises: 88fa0d88a9f8
Create Date: 2024-01-10 16:09:44.659027
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '977e7b4d3497'
down_revision = '88fa0d88a9f8'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('system_template', sa.Column('migration_max_downtime', sa.Integer(), default="300", server_default="300", nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('system_template', 'migration_max_downtime')
# ### end Alembic commands ###

View File

@ -19,7 +19,8 @@ case "$( cat /etc/debian_version )" in
;; ;;
*) *)
# Debian 12+ # Debian 12+
flask --app ./pvcapid-manage_flask.py db upgrade export FLASK_APP=./pvcapid-manage_flask.py
flask db upgrade
;; ;;
esac esac

View File

@ -46,7 +46,7 @@ from flask_sqlalchemy import SQLAlchemy
app = flask.Flask(__name__) app = flask.Flask(__name__)
# Set up SQLAlchemy backend # Set up SQLAlchemy backend
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True
app.config["SQLALCHEMY_DATABASE_URI"] = "postgresql://{}:{}@{}:{}/{}".format( app.config["SQLALCHEMY_DATABASE_URI"] = "postgresql://{}:{}@{}:{}/{}".format(
config["api_postgresql_user"], config["api_postgresql_user"],
config["api_postgresql_password"], config["api_postgresql_password"],
@ -1591,6 +1591,9 @@ class API_VM_Root(Resource):
migration_method: migration_method:
type: string type: string
description: The preferred migration method (live, shutdown, none) description: The preferred migration method (live, shutdown, none)
migration_max_downtime:
type: integer
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
tags: tags:
type: array type: array
description: The tag(s) of the VM description: The tag(s) of the VM
@ -1843,6 +1846,10 @@ class API_VM_Root(Resource):
"choices": ("live", "shutdown", "none"), "choices": ("live", "shutdown", "none"),
"helptext": "A valid migration_method must be specified", "helptext": "A valid migration_method must be specified",
}, },
{
"name": "migration_max_downtime",
"helptext": "A valid migration_max_downtime must be specified",
},
{"name": "user_tags", "action": "append"}, {"name": "user_tags", "action": "append"},
{"name": "protected_tags", "action": "append"}, {"name": "protected_tags", "action": "append"},
{ {
@ -1903,6 +1910,12 @@ class API_VM_Root(Resource):
- live - live
- shutdown - shutdown
- none - none
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
default: 300
- in: query - in: query
name: user_tags name: user_tags
type: array type: array
@ -1943,6 +1956,7 @@ class API_VM_Root(Resource):
reqargs.get("selector", "none"), reqargs.get("selector", "none"),
bool(strtobool(reqargs.get("autostart", "false"))), bool(strtobool(reqargs.get("autostart", "false"))),
reqargs.get("migration_method", "none"), reqargs.get("migration_method", "none"),
reqargs.get("migration_max_downtime", 300),
user_tags, user_tags,
protected_tags, protected_tags,
) )
@ -1990,6 +2004,10 @@ class API_VM_Element(Resource):
"choices": ("live", "shutdown", "none"), "choices": ("live", "shutdown", "none"),
"helptext": "A valid migration_method must be specified", "helptext": "A valid migration_method must be specified",
}, },
{
"name": "migration_max_downtime",
"helptext": "A valid migration_max_downtime must be specified",
},
{"name": "user_tags", "action": "append"}, {"name": "user_tags", "action": "append"},
{"name": "protected_tags", "action": "append"}, {"name": "protected_tags", "action": "append"},
{ {
@ -2052,6 +2070,12 @@ class API_VM_Element(Resource):
- live - live
- shutdown - shutdown
- none - none
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
default: 300
- in: query - in: query
name: user_tags name: user_tags
type: array type: array
@ -2092,6 +2116,7 @@ class API_VM_Element(Resource):
reqargs.get("selector", "none"), reqargs.get("selector", "none"),
bool(strtobool(reqargs.get("autostart", "false"))), bool(strtobool(reqargs.get("autostart", "false"))),
reqargs.get("migration_method", "none"), reqargs.get("migration_method", "none"),
reqargs.get("migration_max_downtime", 300),
user_tags, user_tags,
protected_tags, protected_tags,
) )
@ -2218,6 +2243,9 @@ class API_VM_Metadata(Resource):
migration_method: migration_method:
type: string type: string
description: The preferred migration method (live, shutdown, none) description: The preferred migration method (live, shutdown, none)
migration_max_downtime:
type: integer
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
404: 404:
description: VM not found description: VM not found
schema: schema:
@ -2241,6 +2269,10 @@ class API_VM_Metadata(Resource):
"choices": ("live", "shutdown", "none"), "choices": ("live", "shutdown", "none"),
"helptext": "A valid migration_method must be specified", "helptext": "A valid migration_method must be specified",
}, },
{
"name": "migration_max_downtime",
"helptext": "A valid migration_max_downtime must be specified",
},
] ]
) )
@Authenticator @Authenticator
@ -2288,6 +2320,12 @@ class API_VM_Metadata(Resource):
- live - live
- shutdown - shutdown
- none - none
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
default: none
responses: responses:
200: 200:
description: OK description: OK
@ -2312,6 +2350,7 @@ class API_VM_Metadata(Resource):
reqargs.get("autostart", None), reqargs.get("autostart", None),
reqargs.get("profile", None), reqargs.get("profile", None),
reqargs.get("migration_method", None), reqargs.get("migration_method", None),
reqargs.get("migration_max_downtime", None),
) )
@ -6387,6 +6426,9 @@ class API_Provisioner_Template_System_Root(Resource):
migration_method: migration_method:
type: string type: string
description: The preferred migration method (live, shutdown, none) description: The preferred migration method (live, shutdown, none)
migration_max_downtime:
type: integer
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
parameters: parameters:
- in: query - in: query
name: limit name: limit
@ -6431,6 +6473,7 @@ class API_Provisioner_Template_System_Root(Resource):
{"name": "node_selector"}, {"name": "node_selector"},
{"name": "node_autostart"}, {"name": "node_autostart"},
{"name": "migration_method"}, {"name": "migration_method"},
{"name": "migration_max_downtime"},
] ]
) )
@Authenticator @Authenticator
@ -6491,6 +6534,11 @@ class API_Provisioner_Template_System_Root(Resource):
type: string type: string
required: false required: false
description: The preferred migration method (live, shutdown, none) description: The preferred migration method (live, shutdown, none)
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
responses: responses:
200: 200:
description: OK description: OK
@ -6541,6 +6589,7 @@ class API_Provisioner_Template_System_Root(Resource):
reqargs.get("node_selector", None), reqargs.get("node_selector", None),
node_autostart, node_autostart,
reqargs.get("migration_method", None), reqargs.get("migration_method", None),
reqargs.get("migration_max_downtime", None),
) )
@ -6596,6 +6645,7 @@ class API_Provisioner_Template_System_Element(Resource):
{"name": "node_selector"}, {"name": "node_selector"},
{"name": "node_autostart"}, {"name": "node_autostart"},
{"name": "migration_method"}, {"name": "migration_method"},
{"name": "migration_max_downtime"},
] ]
) )
@Authenticator @Authenticator
@ -6651,6 +6701,11 @@ class API_Provisioner_Template_System_Element(Resource):
type: string type: string
required: false required: false
description: The preferred migration method (live, shutdown, none) description: The preferred migration method (live, shutdown, none)
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
responses: responses:
200: 200:
description: OK description: OK
@ -6701,6 +6756,7 @@ class API_Provisioner_Template_System_Element(Resource):
reqargs.get("node_selector", None), reqargs.get("node_selector", None),
node_autostart, node_autostart,
reqargs.get("migration_method", None), reqargs.get("migration_method", None),
reqargs.get("migration_max_downtime", None),
) )
@RequestParser( @RequestParser(
@ -6714,6 +6770,7 @@ class API_Provisioner_Template_System_Element(Resource):
{"name": "node_selector"}, {"name": "node_selector"},
{"name": "node_autostart"}, {"name": "node_autostart"},
{"name": "migration_method"}, {"name": "migration_method"},
{"name": "migration_max_downtime"},
] ]
) )
@Authenticator @Authenticator
@ -6760,6 +6817,10 @@ class API_Provisioner_Template_System_Element(Resource):
name: migration_method name: migration_method
type: string type: string
description: The preferred migration method (live, shutdown, none) description: The preferred migration method (live, shutdown, none)
- in: query
name: migration_max_downtime
type: integer
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
responses: responses:
200: 200:
description: OK description: OK
@ -6783,6 +6844,7 @@ class API_Provisioner_Template_System_Element(Resource):
reqargs.get("node_selector", None), reqargs.get("node_selector", None),
reqargs.get("node_autostart", None), reqargs.get("node_autostart", None),
reqargs.get("migration_method", None), reqargs.get("migration_method", None),
reqargs.get("migration_max_downtime", None),
) )
@Authenticator @Authenticator

View File

@ -641,6 +641,7 @@ def vm_define(
selector, selector,
autostart, autostart,
migration_method, migration_method,
migration_max_downtime=300,
user_tags=[], user_tags=[],
protected_tags=[], protected_tags=[],
): ):
@ -668,6 +669,7 @@ def vm_define(
selector, selector,
autostart, autostart,
migration_method, migration_method,
migration_max_downtime,
profile=None, profile=None,
tags=tags, tags=tags,
) )
@ -826,6 +828,7 @@ def get_vm_meta(zkhandler, vm):
domain_node_selector, domain_node_selector,
domain_node_autostart, domain_node_autostart,
domain_migrate_method, domain_migrate_method,
domain_migrate_max_downtime,
) = pvc_common.getDomainMetadata(zkhandler, dom_uuid) ) = pvc_common.getDomainMetadata(zkhandler, dom_uuid)
retcode = 200 retcode = 200
@ -835,6 +838,7 @@ def get_vm_meta(zkhandler, vm):
"node_selector": domain_node_selector.lower(), "node_selector": domain_node_selector.lower(),
"node_autostart": domain_node_autostart, "node_autostart": domain_node_autostart,
"migration_method": domain_migrate_method.lower(), "migration_method": domain_migrate_method.lower(),
"migration_max_downtime": int(domain_migrate_max_downtime),
} }
return retdata, retcode return retdata, retcode
@ -842,7 +846,14 @@ def get_vm_meta(zkhandler, vm):
@ZKConnection(config) @ZKConnection(config)
def update_vm_meta( def update_vm_meta(
zkhandler, vm, limit, selector, autostart, provisioner_profile, migration_method zkhandler,
vm,
limit,
selector,
autostart,
provisioner_profile,
migration_method,
migration_max_downtime,
): ):
""" """
Update metadata of a VM. Update metadata of a VM.
@ -858,7 +869,14 @@ def update_vm_meta(
autostart = False autostart = False
retflag, retdata = pvc_vm.modify_vm_metadata( retflag, retdata = pvc_vm.modify_vm_metadata(
zkhandler, vm, limit, selector, autostart, provisioner_profile, migration_method zkhandler,
vm,
limit,
selector,
autostart,
provisioner_profile,
migration_method,
migration_max_downtime,
) )
if retflag: if retflag:

View File

@ -36,6 +36,7 @@ class DBSystemTemplate(db.Model):
node_selector = db.Column(db.Text) node_selector = db.Column(db.Text)
node_autostart = db.Column(db.Boolean, nullable=False) node_autostart = db.Column(db.Boolean, nullable=False)
migration_method = db.Column(db.Text) migration_method = db.Column(db.Text)
migration_max_downtime = db.Column(db.Integer, default=300, server_default="300")
ova = db.Column(db.Integer, db.ForeignKey("ova.id"), nullable=True) ova = db.Column(db.Integer, db.ForeignKey("ova.id"), nullable=True)
def __init__( def __init__(
@ -50,6 +51,7 @@ class DBSystemTemplate(db.Model):
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
ova=None, ova=None,
): ):
self.name = name self.name = name
@ -62,6 +64,7 @@ class DBSystemTemplate(db.Model):
self.node_selector = node_selector self.node_selector = node_selector
self.node_autostart = node_autostart self.node_autostart = node_autostart
self.migration_method = migration_method self.migration_method = migration_method
self.migration_max_downtime = migration_max_downtime
self.ova = ova self.ova = ova
def __repr__(self): def __repr__(self):

View File

@ -221,6 +221,7 @@ def create_template_system(
node_selector=None, node_selector=None,
node_autostart=False, node_autostart=False,
migration_method=None, migration_method=None,
migration_max_downtime=None,
ova=None, ova=None,
): ):
if list_template_system(name, is_fuzzy=False)[-1] != 404: if list_template_system(name, is_fuzzy=False)[-1] != 404:
@ -231,7 +232,7 @@ def create_template_system(
if node_selector == "none": if node_selector == "none":
node_selector = None node_selector = None
query = "INSERT INTO system_template (name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, ova) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" query = "INSERT INTO system_template (name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, migration_max_downtime, ova) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
args = ( args = (
name, name,
vcpu_count, vcpu_count,
@ -243,6 +244,7 @@ def create_template_system(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
ova, ova,
) )
@ -438,6 +440,7 @@ def modify_template_system(
node_selector=None, node_selector=None,
node_autostart=None, node_autostart=None,
migration_method=None, migration_method=None,
migration_max_downtime=None,
): ):
if list_template_system(name, is_fuzzy=False)[-1] != 200: if list_template_system(name, is_fuzzy=False)[-1] != 200:
retmsg = {"message": 'The system template "{}" does not exist.'.format(name)} retmsg = {"message": 'The system template "{}" does not exist.'.format(name)}
@ -505,6 +508,11 @@ def modify_template_system(
if migration_method is not None: if migration_method is not None:
fields.append({"field": "migration_method", "data": migration_method}) fields.append({"field": "migration_method", "data": migration_method})
if migration_max_downtime is not None:
fields.append(
{"field": "migration_max_downtime", "data": int(migration_max_downtime)}
)
conn, cur = open_database(config) conn, cur = open_database(config)
try: try:
for field in fields: for field in fields:

View File

@ -1098,6 +1098,14 @@ def cli_vm():
type=click.Choice(["none", "live", "shutdown"]), type=click.Choice(["none", "live", "shutdown"]),
help="The preferred migration method of the VM between nodes; saved with VM.", help="The preferred migration method of the VM between nodes; saved with VM.",
) )
@click.option(
"-d",
"--max-downtime",
"migration_max_downtime",
default=300,
show_default=True,
help="The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger downtime.",
)
@click.option( @click.option(
"-g", "-g",
"--tag", "--tag",
@ -1122,6 +1130,7 @@ def cli_vm_define(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
user_tags, user_tags,
protected_tags, protected_tags,
): ):
@ -1135,10 +1144,12 @@ def cli_vm_define(
* "load": choose the node with the lowest current load average * "load": choose the node with the lowest current load average
* "vms": choose the node with the least number of provisioned VMs * "vms": choose the node with the least number of provisioned VMs
For most clusters, "mem" should be sufficient, but others may be used based on the cluster workload and available resources. The following caveats should be considered: For most clusters, the migration method selector ("--method"/"-m") "mem" should be sufficient, but others may be used based on the cluster workload and available resources. The following caveats should be considered:
* "mem" looks at the free memory of the node in general, ignoring the amount provisioned to VMs; if any VM's internal memory usage changes, this value would be affected. * "mem" looks at the free memory of the node in general, ignoring the amount provisioned to VMs; if any VM's internal memory usage changes, this value would be affected.
* "memprov" looks at the provisioned memory, not the allocated memory; thus, stopped or disabled VMs are counted towards a node's memory for this selector, even though their memory is not actively in use. * "memprov" looks at the provisioned memory, not the allocated memory; thus, stopped or disabled VMs are counted towards a node's memory for this selector, even though their memory is not actively in use.
* "load" looks at the system load of the node in general, ignoring load in any particular VMs; if any VM's CPU usage changes, this value would be affected. This might be preferable on clusters with some very CPU intensive VMs. * "load" looks at the system load of the node in general, ignoring load in any particular VMs; if any VM's CPU usage changes, this value would be affected. This might be preferable on clusters with some very CPU intensive VMs.
For most VMs, the 300ms default maximum downtime ("--max-downtime"/"-d") should be sufficient. However very busy VMs with a lot of memory pressure or CPU load may require a larger downtime to properly migrate. Generally, keep this at the default unless you know the VM will be extremely busy, or you find you have problems migrating it later. Reasonable values range from 100ms to 2000ms (2 seconds).
""" """
# Open the XML file # Open the XML file
@ -1160,6 +1171,7 @@ def cli_vm_define(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
user_tags, user_tags,
protected_tags, protected_tags,
) )
@ -1205,6 +1217,13 @@ def cli_vm_define(
type=click.Choice(["none", "live", "shutdown"]), type=click.Choice(["none", "live", "shutdown"]),
help="The preferred migration method of the VM between nodes.", help="The preferred migration method of the VM between nodes.",
) )
@click.option(
"-d",
"--max-downtime",
"migration_max_downtime",
default=None,
help="The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger downtime.",
)
@click.option( @click.option(
"-p", "-p",
"--profile", "--profile",
@ -1220,12 +1239,13 @@ def cli_vm_meta(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
provisioner_profile, provisioner_profile,
): ):
""" """
Modify the PVC metadata of existing virtual machine DOMAIN. At least one option to update must be specified. DOMAIN may be a UUID or name. Modify the PVC metadata of existing virtual machine DOMAIN. At least one option to update must be specified. DOMAIN may be a UUID or name.
For details on the "--node-selector"/"-s" values, please see help for the command "pvc vm define". For details on the available option values, please see help for the command "pvc vm define".
""" """
if ( if (
@ -1233,6 +1253,7 @@ def cli_vm_meta(
and node_selector is None and node_selector is None
and node_autostart is None and node_autostart is None
and migration_method is None and migration_method is None
and migration_max_downtime is None
and provisioner_profile is None and provisioner_profile is None
): ):
finish(False, "At least one metadata option must be specified to update.") finish(False, "At least one metadata option must be specified to update.")
@ -1244,6 +1265,7 @@ def cli_vm_meta(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
provisioner_profile, provisioner_profile,
) )
finish(retcode, retmsg) finish(retcode, retmsg)
@ -4456,6 +4478,13 @@ def cli_provisioner_template_system():
default=None, # Use cluster default default=None, # Use cluster default
help="The preferred migration method of the VM between nodes", help="The preferred migration method of the VM between nodes",
) )
@click.option(
"--max-downtime",
"migration_max_downtime",
default=300,
show_default=True,
help="The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger downtime.",
)
def cli_provisioner_template_system_add( def cli_provisioner_template_system_add(
name, name,
vcpus, vcpus,
@ -4467,11 +4496,12 @@ def cli_provisioner_template_system_add(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
): ):
""" """
Add a new system template NAME to the PVC cluster provisioner. Add a new system template NAME to the PVC cluster provisioner.
For details on the possible "--node-selector" values, please see help for the command "pvc vm define". For details on the possible option values, please see help for the command "pvc vm define".
""" """
params = dict() params = dict()
params["name"] = name params["name"] = name
@ -4489,6 +4519,8 @@ def cli_provisioner_template_system_add(
params["node_autostart"] = node_autostart params["node_autostart"] = node_autostart
if migration_method: if migration_method:
params["migration_method"] = migration_method params["migration_method"] = migration_method
if migration_max_downtime:
params["migration_max_downtime"] = migration_max_downtime
retcode, retdata = pvc.lib.provisioner.template_add( retcode, retdata = pvc.lib.provisioner.template_add(
CLI_CONFIG, params, template_type="system" CLI_CONFIG, params, template_type="system"
@ -4551,6 +4583,12 @@ def cli_provisioner_template_system_add(
default=None, # Use cluster default default=None, # Use cluster default
help="The preferred migration method of the VM between nodes", help="The preferred migration method of the VM between nodes",
) )
@click.option(
"--max-downtime",
"migration_max_downtime",
default=None,
help="The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger downtime.",
)
def cli_provisioner_template_system_modify( def cli_provisioner_template_system_modify(
name, name,
vcpus, vcpus,
@ -4562,11 +4600,12 @@ def cli_provisioner_template_system_modify(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
): ):
""" """
Add a new system template NAME to the PVC cluster provisioner. Add a new system template NAME to the PVC cluster provisioner.
For details on the possible "--node-selector" values, please see help for the command "pvc vm define". For details on the possible option values, please see help for the command "pvc vm define".
""" """
params = dict() params = dict()
params["vcpus"] = vcpus params["vcpus"] = vcpus
@ -4578,6 +4617,7 @@ def cli_provisioner_template_system_modify(
params["node_selector"] = node_selector params["node_selector"] = node_selector
params["node_autostart"] = node_autostart params["node_autostart"] = node_autostart
params["migration_method"] = migration_method params["migration_method"] = migration_method
params["migration_max_downtime"] = migration_max_downtime
retcode, retdata = pvc.lib.provisioner.template_modify( retcode, retdata = pvc.lib.provisioner.template_modify(
CLI_CONFIG, params, name, template_type="system" CLI_CONFIG, params, name, template_type="system"

View File

@ -779,7 +779,8 @@ def format_list_template_system(template_data):
template_node_limit_length = 6 template_node_limit_length = 6
template_node_selector_length = 9 template_node_selector_length = 9
template_node_autostart_length = 10 template_node_autostart_length = 10
template_migration_method_length = 10 template_migration_method_length = 12
template_migration_max_downtime_length = 13
for template in template_data: for template in template_data:
# template_name column # template_name column
@ -826,6 +827,17 @@ def format_list_template_system(template_data):
_template_migration_method_length = len(str(template["migration_method"])) + 1 _template_migration_method_length = len(str(template["migration_method"])) + 1
if _template_migration_method_length > template_migration_method_length: if _template_migration_method_length > template_migration_method_length:
template_migration_method_length = _template_migration_method_length template_migration_method_length = _template_migration_method_length
# template_migration_max_downtime column
_template_migration_max_downtime_length = (
len(str(template["migration_max_downtime"])) + 1
)
if (
_template_migration_max_downtime_length
> template_migration_max_downtime_length
):
template_migration_max_downtime_length = (
_template_migration_max_downtime_length
)
# Format the string (header) # Format the string (header)
template_list_output.append( template_list_output.append(
@ -842,7 +854,8 @@ def format_list_template_system(template_data):
+ template_node_selector_length + template_node_selector_length
+ template_node_autostart_length + template_node_autostart_length
+ template_migration_method_length + template_migration_method_length
+ 3, + template_migration_max_downtime_length
+ 4,
template_header="System Templates " template_header="System Templates "
+ "".join( + "".join(
["-" for _ in range(17, template_name_length + template_id_length)] ["-" for _ in range(17, template_name_length + template_id_length)]
@ -874,7 +887,8 @@ def format_list_template_system(template_data):
+ template_node_selector_length + template_node_selector_length
+ template_node_autostart_length + template_node_autostart_length
+ template_migration_method_length + template_migration_method_length
+ 2, + template_migration_max_downtime_length
+ 3,
) )
] ]
), ),
@ -891,7 +905,8 @@ def format_list_template_system(template_data):
{template_node_limit: <{template_node_limit_length}} \ {template_node_limit: <{template_node_limit_length}} \
{template_node_selector: <{template_node_selector_length}} \ {template_node_selector: <{template_node_selector_length}} \
{template_node_autostart: <{template_node_autostart_length}} \ {template_node_autostart: <{template_node_autostart_length}} \
{template_migration_method: <{template_migration_method_length}}{end_bold}".format( {template_migration_method: <{template_migration_method_length}} \
{template_migration_max_downtime: <{template_migration_max_downtime_length}}{end_bold}".format(
bold=ansiprint.bold(), bold=ansiprint.bold(),
end_bold=ansiprint.end(), end_bold=ansiprint.end(),
template_name_length=template_name_length, template_name_length=template_name_length,
@ -905,6 +920,7 @@ def format_list_template_system(template_data):
template_node_selector_length=template_node_selector_length, template_node_selector_length=template_node_selector_length,
template_node_autostart_length=template_node_autostart_length, template_node_autostart_length=template_node_autostart_length,
template_migration_method_length=template_migration_method_length, template_migration_method_length=template_migration_method_length,
template_migration_max_downtime_length=template_migration_max_downtime_length,
template_name="Name", template_name="Name",
template_id="ID", template_id="ID",
template_vcpu="vCPUs", template_vcpu="vCPUs",
@ -915,7 +931,8 @@ def format_list_template_system(template_data):
template_node_limit="Limit", template_node_limit="Limit",
template_node_selector="Selector", template_node_selector="Selector",
template_node_autostart="Autostart", template_node_autostart="Autostart",
template_migration_method="Migration", template_migration_method="Mig. Method",
template_migration_max_downtime="Max Downtime",
) )
) )
@ -931,7 +948,8 @@ def format_list_template_system(template_data):
{template_node_limit: <{template_node_limit_length}} \ {template_node_limit: <{template_node_limit_length}} \
{template_node_selector: <{template_node_selector_length}} \ {template_node_selector: <{template_node_selector_length}} \
{template_node_autostart: <{template_node_autostart_length}} \ {template_node_autostart: <{template_node_autostart_length}} \
{template_migration_method: <{template_migration_method_length}}{end_bold}".format( {template_migration_method: <{template_migration_method_length}} \
{template_migration_max_downtime: <{template_migration_max_downtime_length}}{end_bold}".format(
template_name_length=template_name_length, template_name_length=template_name_length,
template_id_length=template_id_length, template_id_length=template_id_length,
template_vcpu_length=template_vcpu_length, template_vcpu_length=template_vcpu_length,
@ -943,6 +961,7 @@ def format_list_template_system(template_data):
template_node_selector_length=template_node_selector_length, template_node_selector_length=template_node_selector_length,
template_node_autostart_length=template_node_autostart_length, template_node_autostart_length=template_node_autostart_length,
template_migration_method_length=template_migration_method_length, template_migration_method_length=template_migration_method_length,
template_migration_max_downtime_length=template_migration_max_downtime_length,
bold="", bold="",
end_bold="", end_bold="",
template_name=str(template["name"]), template_name=str(template["name"]),
@ -956,6 +975,7 @@ def format_list_template_system(template_data):
template_node_selector=str(template["node_selector"]), template_node_selector=str(template["node_selector"]),
template_node_autostart=str(template["node_autostart"]), template_node_autostart=str(template["node_autostart"]),
template_migration_method=str(template["migration_method"]), template_migration_method=str(template["migration_method"]),
template_migration_max_downtime=f"{str(template['migration_max_downtime'])} ms",
) )
) )

View File

@ -205,6 +205,7 @@ def vm_metadata(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
provisioner_profile, provisioner_profile,
): ):
""" """
@ -229,6 +230,9 @@ def vm_metadata(
if migration_method is not None: if migration_method is not None:
params["migration_method"] = migration_method params["migration_method"] = migration_method
if migration_max_downtime is not None:
params["migration_max_downtime"] = migration_max_downtime
if provisioner_profile is not None: if provisioner_profile is not None:
params["profile"] = provisioner_profile params["profile"] = provisioner_profile
@ -1637,14 +1641,14 @@ def format_info(config, domain_information, long_output):
) )
) )
ainformation.append( ainformation.append(
"{}Current Node:{} {}".format( "{}Current node:{} {}".format(
ansiprint.purple(), ansiprint.end(), domain_information["node"] ansiprint.purple(), ansiprint.end(), domain_information["node"]
) )
) )
if not domain_information["last_node"]: if not domain_information["last_node"]:
domain_information["last_node"] = "N/A" domain_information["last_node"] = "N/A"
ainformation.append( ainformation.append(
"{}Previous Node:{} {}".format( "{}Previous node:{} {}".format(
ansiprint.purple(), ansiprint.end(), domain_information["last_node"] ansiprint.purple(), ansiprint.end(), domain_information["last_node"]
) )
) )
@ -1676,15 +1680,12 @@ def format_info(config, domain_information, long_output):
formatted_node_autostart = "True" formatted_node_autostart = "True"
if not domain_information.get("migration_method"): if not domain_information.get("migration_method"):
formatted_migration_method = "Any" formatted_migration_method = "Live, Shutdown"
else: else:
formatted_migration_method = str(domain_information["migration_method"]).title() formatted_migration_method = (
f"{str(domain_information['migration_method']).title()} only"
ainformation.append(
"{}Migration selector:{} {}".format(
ansiprint.purple(), ansiprint.end(), formatted_node_selector
) )
)
ainformation.append( ainformation.append(
"{}Node limit:{} {}".format( "{}Node limit:{} {}".format(
ansiprint.purple(), ansiprint.end(), formatted_node_limit ansiprint.purple(), ansiprint.end(), formatted_node_limit
@ -1700,10 +1701,22 @@ def format_info(config, domain_information, long_output):
) )
) )
ainformation.append( ainformation.append(
"{}Migration Method:{} {}".format( "{}Migration method:{} {}".format(
ansiprint.purple(), ansiprint.end(), formatted_migration_method ansiprint.purple(), ansiprint.end(), formatted_migration_method
) )
) )
ainformation.append(
"{}Migration selector:{} {}".format(
ansiprint.purple(), ansiprint.end(), formatted_node_selector
)
)
ainformation.append(
"{}Max live downtime:{} {}".format(
ansiprint.purple(),
ansiprint.end(),
f"{domain_information['migration_max_downtime']} ms",
)
)
# Tag list # Tag list
tags_name_length = 5 tags_name_length = 5

View File

@ -441,12 +441,14 @@ def getDomainMetadata(zkhandler, dom_uuid):
domain_node_selector, domain_node_selector,
domain_node_autostart, domain_node_autostart,
domain_migration_method, domain_migration_method,
domain_migration_max_downtime,
) = zkhandler.read_many( ) = zkhandler.read_many(
[ [
("domain.meta.node_limit", dom_uuid), ("domain.meta.node_limit", dom_uuid),
("domain.meta.node_selector", dom_uuid), ("domain.meta.node_selector", dom_uuid),
("domain.meta.autostart", dom_uuid), ("domain.meta.autostart", dom_uuid),
("domain.meta.migrate_method", dom_uuid), ("domain.meta.migrate_method", dom_uuid),
("domain.meta.migrate_max_downtime", dom_uuid),
] ]
) )
@ -464,11 +466,15 @@ def getDomainMetadata(zkhandler, dom_uuid):
if not domain_migration_method or domain_migration_method == "none": if not domain_migration_method or domain_migration_method == "none":
domain_migration_method = None domain_migration_method = None
if not domain_migration_max_downtime or domain_migration_max_downtime == "none":
domain_migration_max_downtime = 300
return ( return (
domain_node_limit, domain_node_limit,
domain_node_selector, domain_node_selector,
domain_node_autostart, domain_node_autostart,
domain_migration_method, domain_migration_method,
domain_migration_max_downtime,
) )
@ -505,6 +511,7 @@ def getInformationFromXML(zkhandler, uuid):
domain_node_selector, domain_node_selector,
domain_node_autostart, domain_node_autostart,
domain_migration_method, domain_migration_method,
domain_migration_max_downtime,
) = getDomainMetadata(zkhandler, uuid) ) = getDomainMetadata(zkhandler, uuid)
domain_tags = getDomainTags(zkhandler, uuid) domain_tags = getDomainTags(zkhandler, uuid)
@ -565,6 +572,7 @@ def getInformationFromXML(zkhandler, uuid):
"node_selector": domain_node_selector, "node_selector": domain_node_selector,
"node_autostart": bool(strtobool(domain_node_autostart)), "node_autostart": bool(strtobool(domain_node_autostart)),
"migration_method": domain_migration_method, "migration_method": domain_migration_method,
"migration_max_downtime": int(domain_migration_max_downtime),
"tags": domain_tags, "tags": domain_tags,
"description": domain_description, "description": domain_description,
"profile": domain_profile, "profile": domain_profile,

View File

@ -0,0 +1 @@
{"version": "13", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "logs": "/logs", "faults": "/faults", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "faults": {"id": "", "last_time": "/last_time", "first_time": "/first_time", "ack_time": "/ack_time", "status": "/status", "delta": "/delta", "message": "/message"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health", "network.stats": "/network_stats"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.migrate_max_downtime": "/migration_max_downtime", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}

View File

@ -147,6 +147,7 @@ def define_vm(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method=None, migration_method=None,
migration_max_downtime=300,
profile=None, profile=None,
tags=[], tags=[],
initial_state="stop", initial_state="stop",
@ -272,6 +273,10 @@ def define_vm(
(("domain.console.vnc", dom_uuid), ""), (("domain.console.vnc", dom_uuid), ""),
(("domain.meta.autostart", dom_uuid), node_autostart), (("domain.meta.autostart", dom_uuid), node_autostart),
(("domain.meta.migrate_method", dom_uuid), str(migration_method).lower()), (("domain.meta.migrate_method", dom_uuid), str(migration_method).lower()),
(
("domain.meta.migrate_max_downtime", dom_uuid),
int(migration_max_downtime),
),
(("domain.meta.node_limit", dom_uuid), formatted_node_limit), (("domain.meta.node_limit", dom_uuid), formatted_node_limit),
(("domain.meta.node_selector", dom_uuid), str(node_selector).lower()), (("domain.meta.node_selector", dom_uuid), str(node_selector).lower()),
(("domain.meta.tags", dom_uuid), ""), (("domain.meta.tags", dom_uuid), ""),
@ -305,6 +310,7 @@ def modify_vm_metadata(
node_autostart, node_autostart,
provisioner_profile, provisioner_profile,
migration_method, migration_method,
migration_max_downtime,
): ):
dom_uuid = getDomainUUID(zkhandler, domain) dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid: if not dom_uuid:
@ -331,6 +337,14 @@ def modify_vm_metadata(
(("domain.meta.migrate_method", dom_uuid), str(migration_method).lower()) (("domain.meta.migrate_method", dom_uuid), str(migration_method).lower())
) )
if migration_max_downtime is not None:
update_list.append(
(
("domain.meta.migrate_max_downtime", dom_uuid),
int(migration_max_downtime),
)
)
if len(update_list) < 1: if len(update_list) < 1:
return False, "ERROR: No updates to apply." return False, "ERROR: No updates to apply."
@ -563,6 +577,7 @@ def rename_vm(zkhandler, domain, new_domain):
dom_info["node_selector"], dom_info["node_selector"],
dom_info["node_autostart"], dom_info["node_autostart"],
migration_method=dom_info["migration_method"], migration_method=dom_info["migration_method"],
migration_max_downtime=dom_info["migration_max_downtime"],
profile=dom_info["profile"], profile=dom_info["profile"],
tags=dom_info["tags"], tags=dom_info["tags"],
initial_state="stop", initial_state="stop",
@ -1624,6 +1639,7 @@ def restore_vm(zkhandler, domain, backup_path, datestring, retain_snapshot=False
backup_source_details["vm_detail"]["node_selector"], backup_source_details["vm_detail"]["node_selector"],
backup_source_details["vm_detail"]["node_autostart"], backup_source_details["vm_detail"]["node_autostart"],
backup_source_details["vm_detail"]["migration_method"], backup_source_details["vm_detail"]["migration_method"],
backup_source_details["vm_detail"]["migration_max_downtime"],
backup_source_details["vm_detail"]["profile"], backup_source_details["vm_detail"]["profile"],
backup_source_details["vm_detail"]["tags"], backup_source_details["vm_detail"]["tags"],
"restore", "restore",

View File

@ -744,6 +744,7 @@ def worker_create_vm(
node_selector = vm_data["system_details"]["node_selector"] node_selector = vm_data["system_details"]["node_selector"]
node_autostart = vm_data["system_details"]["node_autostart"] node_autostart = vm_data["system_details"]["node_autostart"]
migration_method = vm_data["system_details"]["migration_method"] migration_method = vm_data["system_details"]["migration_method"]
migration_max_downtime = vm_data["system_details"]["migration_max_downtime"]
with open_zk(config) as zkhandler: with open_zk(config) as zkhandler:
retcode, retmsg = pvc_vm.define_vm( retcode, retmsg = pvc_vm.define_vm(
zkhandler, zkhandler,
@ -753,6 +754,7 @@ def worker_create_vm(
node_selector, node_selector,
node_autostart, node_autostart,
migration_method, migration_method,
migration_max_downtime,
vm_profile, vm_profile,
initial_state="provision", initial_state="provision",
) )

View File

@ -572,7 +572,7 @@ class ZKHandler(object):
# #
class ZKSchema(object): class ZKSchema(object):
# Current version # Current version
_version = 12 _version = 13
# Root for doing nested keys # Root for doing nested keys
_schema_root = "" _schema_root = ""
@ -707,6 +707,7 @@ class ZKSchema(object):
"console.vnc": "/vnc", "console.vnc": "/vnc",
"meta.autostart": "/node_autostart", "meta.autostart": "/node_autostart",
"meta.migrate_method": "/migration_method", "meta.migrate_method": "/migration_method",
"meta.migrate_max_downtime": "/migration_max_downtime",
"meta.node_selector": "/node_selector", "meta.node_selector": "/node_selector",
"meta.node_limit": "/node_limit", "meta.node_limit": "/node_limit",
"meta.tags": "/tags", "meta.tags": "/tags",
@ -1026,6 +1027,8 @@ class ZKSchema(object):
default_data = "False" default_data = "False"
elif elem == "pool" and ikey == "tier": elif elem == "pool" and ikey == "tier":
default_data = "default" default_data = "default"
elif elem == "domain" and ikey == "meta.migrate_max_downtime":
default_data = "300"
else: else:
default_data = "" default_data = ""
zkhandler.zk_conn.create( zkhandler.zk_conn.create(

View File

@ -2,12 +2,19 @@
# Generate the database migration files # Generate the database migration files
set -o xtrace
VERSION="$( head -1 debian/changelog | awk -F'[()-]' '{ print $2 }' )" VERSION="$( head -1 debian/changelog | awk -F'[()-]' '{ print $2 }' )"
sudo ip addr add 10.0.1.250/32 dev lo
pushd $( git rev-parse --show-toplevel ) &>/dev/null pushd $( git rev-parse --show-toplevel ) &>/dev/null
pushd api-daemon &>/dev/null pushd api-daemon &>/dev/null
export PVC_CONFIG_FILE="../pvc.sample.conf" export PVC_CONFIG_FILE="../pvc.sample.conf"
./pvcapid-manage_flask.py db migrate -m "PVC version ${VERSION}" export FLASK_APP=./pvcapid-manage_flask.py
./pvcapid-manage_flask.py db upgrade flask db migrate -m "PVC version ${VERSION}"
flask db upgrade
popd &>/dev/null popd &>/dev/null
popd &>/dev/null popd &>/dev/null
sudo ip addr del 10.0.1.250/32 dev lo

View File

@ -687,6 +687,29 @@ class VMInstance(object):
abort_migrate("Target node changed during preparation") abort_migrate("Target node changed during preparation")
return return
if not force_shutdown: if not force_shutdown:
# Set the maxdowntime value from Zookeeper
try:
max_downtime = self.zkhandler.read(
("domain.meta.migrate_max_downtime", self.domuuid)
)
except Exception as e:
self.logger.out(
f"Error fetching migrate max downtime; using default of 300s: {e}",
state="w",
)
self.max_downtime = 300
self.logger.out(
f"Running migrate-setmaxdowntime with downtime value {max_downtime}",
state="i",
prefix="Domain {}".format(self.domuuid),
)
retcode, stdout, stderr = common.run_os_command(
f"virsh migrate-setmaxdowntime --downtime {max_downtime} {self.domuuid}"
)
if retcode:
abort_migrate("Failed to set maxdowntime value on running VM")
return
# A live migrate is attemped 3 times in succession # A live migrate is attemped 3 times in succession
ticks = 0 ticks = 0
while True: while True:

View File

@ -168,7 +168,7 @@ database:
port: 6379 port: 6379
# Hostname; use `cluster` network floating IP address # Hostname; use `cluster` network floating IP address
hostname: 10.0.1.250 hostname: 127.0.0.1
# Path, usually "/0" # Path, usually "/0"
path: "/0" path: "/0"
@ -180,7 +180,7 @@ database:
port: 5432 port: 5432
# Hostname; use `cluster` network floating IP address # Hostname; use `cluster` network floating IP address
hostname: 10.0.1.250 hostname: 127.0.0.1
# Credentials # Credentials
credentials: credentials: