Adjust documentation and behaviour of cpuset
1. Detail the caveats and specific situations and ref the documentation which will provide more details. 2. Always install the configs, but use /etc/default/ceph-osd-cpuset to control if the script does anything or not (so, the "osd" cset set is always active just not set in a special way.
This commit is contained in:
		| @@ -147,40 +147,30 @@ pvc_sriov_enable: False | ||||
|  | ||||
| # CPU pinning configuration via cset | ||||
| # > ADVANCED TUNING: For most users, this is unnecessary and PVC will run fine with the default scheduling. | ||||
| # > These options can be set to maximize the CPU performance of the Ceph subsystem. Because Ceph OSD | ||||
| #   performance is heavily limited more by CPU than anything else, for users with a lot of relatively slow CPU | ||||
| #   cores, or for those looking to get maximum storage performance, tuning the pinning options here might | ||||
| #   provide noticeable benefits. | ||||
| # > This configuration makes use of the cset command and will dedicate a specific number of CPU cores to the | ||||
| #   Ceph OSD processes on each node. This is accomplished by using cset's shield mechanism to create a cgroup | ||||
| #   which will contain only Ceph OSD processes, while putting everything else onto the remaining CPUs. | ||||
| # > Avoid using this tuning if you have less than 8 total CPU cores (excluding SMT threads). Otherwise, you | ||||
| #   might not have enough CPU cores to properly run VMs, unless you are very careful with vCPU allocation. | ||||
| # > Like the 'pvc_nodes' dictionary, these options are set per-host, even if all hosts are identical. This | ||||
| #   is required to handle sitations where hosts might have different CPU topologies. Each host can have a | ||||
| #   specific set of CPUs that are included in the shield. | ||||
| # > Ensure that you know which CPU cores are "real" and which are SMT "threads". This can be obtained using | ||||
| #   the 'virsh capabilities' command and noting the 'siblings' entries for each CPU. | ||||
| # > Ensure you consider NUMA nodes when setting up this tuning. Generally speaking it is better to keep the | ||||
| #   OSD processes onto one NUMA node for simplicity; more advanced tuning is outside of the scope of this | ||||
| #   playbook. | ||||
| # > You should set a number of cores in the shield (along with their respective SMT threads) equal to the | ||||
| #   number of OSDs in the system. This can be adjusted later as needed. For instance, if you have 2 OSDs per | ||||
| #   node, and each node has a 10-core SMT-capable CPU, you would want to assign cores 0 and 1 (the first two | ||||
| #   real cores) and 10 and 11 (the SMT siblings of those cores in 'virsh capabilities') in the cset. | ||||
| #   Uncomment these options only for testing or if you are certain you meet the following conditions. | ||||
| # > These options will tune cpuset (installed by default) to limit Ceph OSDs to certain CPU cores, while | ||||
| #   simultaneously limiting other system tasks and VMs to the remaining CPU cores. In effect it dedicates the | ||||
| #   specified CPU cores to Ceph OSDs only to ensure those processes can have dedicated CPU time. | ||||
| # > Generally speaking, except in cases where extremely high random read throughput is required and in which | ||||
| #   the node(s) have a very large number of physical cores, this setting will not improve performance, and | ||||
| #   may in fact hurt performance. For more details please see the documentation. | ||||
| # > For optimal performance when using this setting, you should dedicate exactly 2 cores, and their | ||||
| #   respective SMT threads if applicable, to each OSD. For instance, with 2 OSDs, 4 real cores (and their | ||||
| #   corresponding SMT threads if applicable) should be specified. More cores has been seen to, in some cases | ||||
| #   drop performance further. For more details please see the documentation. | ||||
| # > Use the 'virsh capabilities' command to confim the exact CPU IDs (and SMT "siblings") for these lists. | ||||
| # | ||||
| # The shield mode is disabled by default and a commented out example configuration is shown. | ||||
| pvc_shield_osds_enable: False | ||||
| #pvc_shield_osds_cset: | ||||
| #  # This example host has 2x 6-core SMT-enabled CPUs; we want to use cores 0 (+SMT 12) and 2 (+SMT 14), which are | ||||
| #  # both on physical CPU 0, for 2x OSDs. | ||||
| #  # both on physical CPU 0, for 1x OSD. | ||||
| #  - hostname: pvchv1 | ||||
| #    osd_cset: | ||||
| #      - 0 | ||||
| #      - 2 | ||||
| #      - 12 | ||||
| #      - 14 | ||||
| #  # These example hosts have 1x 8-core SMT-enabled CPUs; we want to use cores 0 (+SMT 8) and 1 (+SMT 9) for 2x OSDs. | ||||
| #  # These example hosts have 1x 8-core SMT-enabled CPUs; we want to use cores 0 (+SMT 8) and 1 (+SMT 9) for 1x OSD. | ||||
| #  - hostname: pvchv2 | ||||
| #    osd_cset: | ||||
| #      - 0 | ||||
|   | ||||
| @@ -117,14 +117,6 @@ | ||||
|     - ceph-mon@{{ ansible_hostname }} | ||||
|     - ceph-mgr@{{ ansible_hostname }} | ||||
|  | ||||
| # This is separate from the block to allow *disabling* of the config without removing it | ||||
| - name: install ceph-osd-cpuset controller config | ||||
|   template: | ||||
|     src: ceph/ceph-osd-cpuset-enable.j2 | ||||
|     dest: /etc/default/ceph-osd-cpuset | ||||
|   when: | ||||
|     - pvc_shield_osds_enable is defined | ||||
|  | ||||
| # System OSD CPU shielding activation | ||||
| - block: | ||||
|     - name: install packages | ||||
| @@ -134,6 +126,11 @@ | ||||
|           - numactl | ||||
|         state: latest | ||||
|  | ||||
|     - name: install ceph-osd-cpuset controller config | ||||
|       template: | ||||
|         src: ceph/ceph-osd-cpuset-enable.j2 | ||||
|         dest: /etc/default/ceph-osd-cpuset | ||||
|  | ||||
|     - name: install ceph-osd-cpuset script | ||||
|       template: | ||||
|         src: ceph/ceph-osd-cpuset.j2 | ||||
| @@ -167,12 +164,9 @@ | ||||
|         enabled: yes | ||||
|  | ||||
|     - debug: | ||||
|         msg: "NOTICE: cpuset configs have NOT been applied to the running system. This node must be rebooted to apply these changes." | ||||
|       when: systemd_file_cpuset.changed or systemd_file_osd.changed | ||||
|         msg: "NOTICE: Any cpuset configs have NOT been applied to the running system. This node must be rebooted to apply these changes." | ||||
|   tags: pvc-ceph-cpuset | ||||
|   when: | ||||
|     - pvc_shield_osds_enable is defined | ||||
|     - pvc_shield_osds_cset is defined | ||||
|     - pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) | list | count > 0 | ||||
|  | ||||
| - meta: flush_handlers | ||||
|   | ||||
| @@ -4,10 +4,16 @@ | ||||
|  | ||||
| # This script is designed to prepare the cpusets for use by Ceph OSDs, VMs, and other system resources. | ||||
| # Libvirt does not make this easy with any way to globally set its CPUs, so we must do this trickery. | ||||
| {% if pvc_shield_osds_cset is defined %} | ||||
| {% set cset_host = pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) %} | ||||
|  | ||||
| A_OSD_CPUS=( {{ cset_host[0]['osd_cset'] | join(' ') }} ) | ||||
| A_SYS_CPUS=() | ||||
| {% else %} | ||||
|  | ||||
| A_OSD_CPUS=() | ||||
| A_SYS_CPUS=() | ||||
| {% endif %} | ||||
|  | ||||
| CPU_INFO="$( lscpu )" | ||||
|  | ||||
| @@ -41,7 +47,9 @@ for i in $( seq 0 $(( ${CPU_COUNT} - 1 )) ); do | ||||
|     fi | ||||
| done | ||||
|  | ||||
| if [[ $( cat /etc/default/ceph-osd-cpuset ) == "True" ]]; then | ||||
| {% raw %} | ||||
| if [[ $( cat /etc/default/ceph-osd-cpuset ) == "True" && ${#A_OSD_CPUS[@]} -gt 0 ]]; then | ||||
| {% endraw %} | ||||
|     # Convert arrays into CSV | ||||
|     OSD_MEMS="$( IFS=, ; echo "${A_OSD_MEMS[*]}" )" | ||||
|     OSD_CPUS="$( IFS=, ; echo "${A_OSD_CPUS[*]}" )" | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| # PVC Ceph OSD cpuset service unit | ||||
| # {{ ansible_managed }} | ||||
| {% set cset_host = pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) %} | ||||
| [Unit] | ||||
| Description = Ceph OSD cpuset shield creation | ||||
| Before = ceph-osd@.service libvirtd.service | ||||
|   | ||||
		Reference in New Issue
	
	Block a user