Replace broken "serial" restarts with a new method

This commit is contained in:
Joshua Boniface 2023-09-01 15:42:22 -04:00
parent 4a17a9918a
commit 146e660a21
1 changed files with 18 additions and 17 deletions

View File

@ -19,24 +19,25 @@
name: patroni name: patroni
state: restarted state: restarted
# Restart services one-at-a-time
# Ideally, this would be accomplished by a serial=1, but Ansible doesn't support that and
# likely never will. Instead, we just run the service restart manually, waiting X seconds
# before each iteration, where X is 15 seconds times the "host_id" minus 1. Thus, the
# following host configuration leads to the specified sleep times:
# * pvchv1: 0s
# * pvchv2: 15s
# * pvchv3: 30s
# * etc.
# This results in a practical sense in a "serial=1" sequence of restarts allowing the service
# to float its primary around after changing a configuration.
- name: restart ceph-mon - name: restart ceph-mon
service: shell: sleep {{ 15 * (hostvars[ansible_hostname].ansible_local.host_id|int - 1) }} && systemctl restart ceph-mon@{{ ansible_hostname }}.service
name: ceph-mon@{{ ansible_hostname }} ignore_errors: true
state: restarted
- name: restart ceph-mgr - name: restart ceph-mgr
service: shell: sleep {{ 15 * (hostvars[ansible_hostname].ansible_local.host_id|int - 1) }} && systemctl restart ceph-mgr@{{ ansible_hostname }}.service
name: ceph-mgr@{{ ansible_hostname }} ignore_errors: true
state: restarted
- name: restart pvcd
# Restart pvcd, but sequentially one-host-at-a-time with a 15s wait in between; this provides shell: sleep {{ 15 * (hostvars[ansible_hostname].ansible_local.host_id|int - 1) }} && systemctl restart pvcd.service
# plenty of time for the primary state to switch around without putting the cluster in a
# no-primary state
- name: restart pvcd
shell: systemctl restart pvcd && sleep 15
ignore_errors: true ignore_errors: true
run_once: true
delegate_to: "{{ play_host }}"
with_items: "{{ play_hosts }}"
loop_control:
loop_var: play_host