--- - hosts: all remote_user: deploy become: yes become_user: root gather_facts: yes serial: 1 tasks: - name: set PVC maintenance mode command: pvc maintenance on - name: aptitude full upgrade and cleanup apt: update_cache: "yes" autoremove: "yes" autoclean: "yes" upgrade: "full" - name: clean apt archives file: dest: "/var/cache/apt/archives" state: "absent" - name: check library freshness command: /usr/lib/check_mk_agent/plugins/freshness register: freshness changed_when: freshness.rc == 1 failed_when: false - name: check kernel version command: /usr/lib/check_mk_agent/plugins/kernelversion register: kernelversion changed_when: kernelversion.rc == 1 failed_when: false - name: restart system cleanly block: - name: secondary node command: 'pvc node secondary {{ ansible_hostname }}' ignore_errors: true - name: wait 15 seconds for system to stabilize pause: seconds: "15" become: no connection: local - name: flush node command: 'pvc node flush {{ ansible_hostname }} --wait' - name: ensure VMs are migrated away shell: "virsh list | grep running | wc -l" register: virshcount failed_when: virshcount.stdout != "0" until: virshcount.stdout == "0" retries: 60 delay: 10 - name: wait 15 seconds for system to stabilize pause: seconds: "15" become: no connection: local - name: set OSD noout command: pvc storage osd set noout - name: get running OSD services shell: "systemctl | awk '{ print $1 }' | grep 'ceph-osd@[0-9]*.service'" ignore_errors: true register: osd_services - name: stop Ceph OSD daemons cleanly service: name: "{{ item }}" state: stopped ignore_errors: true with_items: "{{ osd_services.stdout_lines }}" - name: stop Ceph Monitor daemon cleanly service: name: "ceph-mon@{{ ansible_hostname }}" state: stopped ignore_errors: true - name: stop Ceph Manager daemon cleanly service: name: "ceph-mgr@{{ ansible_hostname }}" state: stopped ignore_errors: true - name: wait 15 seconds for system to stabilize pause: seconds: "15" become: no connection: local - name: stop PVC flush daemon cleanly service: name: "pvc-flush" state: stopped - name: stop PVC daemon cleanly service: name: "pvcnoded" state: stopped - name: stop Zookeeper daemon cleanly service: name: "zookeeper" state: stopped - name: restart system reboot: post_reboot_delay: 15 reboot_timeout: 1800 - name: make sure all OSDs are active shell: "ceph osd stat | grep 'osds:' | awk '{ if ( $1 == $3 ) { print \"OK\" } else { print \"NOK\" } }'" register: osdstat failed_when: osdstat.stdout == "NOK" until: osdstat.stdout == "OK" retries: 60 delay: 10 - name: make sure all PGs have recovered shell: "ceph health | grep -wo 'Degraded data redundancy'" register: cephhealth failed_when: cephhealth.stdout == "Degraded data redundancy'" until: cephhealth.stdout == "" retries: 60 delay: 10 - name: unset OSD noout command: pvc storage osd unset noout - name: unflush node command: "pvc node ready {{ ansible_hostname }} --wait" - name: wait 30 seconds for system to stabilize pause: seconds: "30" become: no connection: local - name: reset any systemd failures command: systemctl reset-failed when: freshness.changed or kernelversion.changed - name: set PVC maintenance mode command: pvc maintenance off - name: wait 5 seconds for system to stabilize pause: seconds: "5" become: no connection: local