Dear Openstack user community,
I have a compute node with 2 numa nodes and I would like to create 2 vms, each one using a different numa node through numa affinity with cpu, memory and nvme pci devices.
pci passthrough whitelist
[root@zeus-53 ~]# tail /etc/kolla/nova-compute/nova.conf
[notifications]
[filter_scheduler]
enabled_filters = enabled_filters = RetryFilter, AvailabilityZoneFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter
available_filters = nova.scheduler.filters.all_filters
[pci]
passthrough_whitelist = [ {"address":"0000:06:00.0"}, {"address":"0000:07:00.0"}, {"address":"0000:08:00.0"}, {"address":"0000:09:00.0"}, {"address":"0000:84:00.0"}, {"address":"0000:85:00.0"}, {"address":"0000:86:00.0"}, {"address":"0000:87:00.0"}
]
alias = { "vendor_id":"8086", "product_id":"0953", "device_type":"type-PCI", "name":"nvme"}
Openstack flavor
openstack flavor create --public xlarge.numa.perf.test --ram 200000 --disk 700 --vcpus 20 --property hw:cpu_policy=dedicated --property hw:emulator_threads_policy=isolate --property hw:numa_nodes='1' --property pci_passthrough:alias='nvme:4'
The first vm is successfully created
openstack server create --network hpc --flavor xlarge.numa.perf.test --image centos7.6-image --availability-zone nova:zeus-53.localdomain --key-name mykey kudu-1
However the second vm fails
openstack server create --network hpc --flavor xlarge.numa.perf --image centos7.6-kudu-image --availability-zone nova:zeus-53.localdomain --key-name mykey kudu-4
Errors in nova compute node
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [req-b5a25c73-8c7d-466c-8128-71f29e7ae8aa 91e83343e9834c8ba0172ff369c8acac b91520cff5bd45c59a8de07c38641582 - default default] [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] Instance
failed to spawn: libvirtError: internal error: qemu unexpectedly closed the monitor: 2019-09-27T06:45:19.118089Z qemu-kvm: kvm_init_vcpu failed: Cannot allocate memory
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] Traceback (most recent call last):
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/nova/compute/manager.py", line 2369, in _build_resources
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] yield resources
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/nova/compute/manager.py", line 2133, in _build_and_run_instance
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] block_device_info=block_device_info)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/nova/virt/libvirt/driver.py", line 3142, in spawn
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] destroy_disks_on_failure=True)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/nova/virt/libvirt/driver.py", line 5705, in _create_domain_and_network
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] destroy_disks_on_failure)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/oslo_utils/excutils.py", line 220, in __exit__
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] self.force_reraise()
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/oslo_utils/excutils.py", line 196, in force_reraise
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] six.reraise(self.type_, self.value, self.tb)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/nova/virt/libvirt/driver.py", line 5674, in _create_domain_and_network
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] post_xml_callback=post_xml_callback)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/nova/virt/libvirt/driver.py", line 5608, in _create_domain
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] guest.launch(pause=pause)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/nova/virt/libvirt/guest.py", line 144, in launch
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] self._encoded_xml, errors='ignore')
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/oslo_utils/excutils.py", line 220, in __exit__
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] self.force_reraise()
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/oslo_utils/excutils.py", line 196, in force_reraise
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] six.reraise(self.type_, self.value, self.tb)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/nova/virt/libvirt/guest.py", line 139, in launch
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] return self._domain.createWithFlags(flags)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/eventlet/tpool.py", line 186, in doit
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] result = proxy_call(self._autowrap, f, *args, **kwargs)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/eventlet/tpool.py", line 144, in proxy_call
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] rv = execute(f, *args, **kwargs)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/eventlet/tpool.py", line 125, in execute
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] six.reraise(c, e, tb)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib/python2.7/site-packages/eventlet/tpool.py", line 83, in tworker
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] rv = meth(*args, **kwargs)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] File "/usr/lib64/python2.7/site-packages/libvirt.py", line 1110, in createWithFlags
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] if ret == -1: raise libvirtError ('virDomainCreateWithFlags() failed', dom=self)
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae] libvirtError: internal error: qemu unexpectedly closed the monitor: 2019-09-27T06:45:19.118089Z qemu-kvm: kvm_init_vcpu failed: Cannot
allocate memory
2019-09-27 16:45:19.785 7 ERROR nova.compute.manager [instance: ebe4e78c-501e-4535-ae15-948301cbf1ae]
Numa cell/node 1 (the one assigned on kudu-4) has enough cpu, memory, pci devices and disk capacity to fit this vm. NOTE: below is the information relevant I could think of that shows resources available after creating the second vm.
[root@zeus-53 ~]# numactl -H
available: 2 nodes (0-1)
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 28 29 30 31 32 33 34 35 36 37 38 39 40 41
node 0 size: 262029 MB
node 0 free: 52787 MB
node 1 cpus: 14 15 16 17 18 19 20 21 22 23 24 25 26 27 42 43 44 45 46 47 48 49 50 51 52 53 54 55
node 1 size: 262144 MB
node 1 free: 250624 MB
node distances:
node 0 1
0: 10 21
1: 21 10
NOTE: this is to show that numa node/cell 1 has enough resources available (also nova-compute logs shows that kudu-4 is assigned to cell 1)
[root@zeus-53 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/md127 3.7T 9.1G 3.7T 1% /
...
NOTE: vm disk files goes to root (/) partition
[root@zeus-53 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 59.6G 0 disk
†€sda1 8:1 0 1G 0 part /boot
„€sda2 8:2 0 16G 0 part [SWAP]
loop0 7:0 0 100G 0 loop
„€docker-9:127-6979358884-pool 253:0 0 100G 0 dm
†€docker-9:127-6979358884-4301cee8d0433729cd6332ca2b6111afc85f14c48d4ce2d888a1da0ef9b5ca01 253:1 0 10G 0 dm
†€docker-9:127-6979358884-d59208adcb7cee3418f810f24e6c3a55d39281f713c8e76141fc61a8deba8a2b 253:2 0 10G 0 dm
†€docker-9:127-6979358884-106bc0838e37442eca84eb9ab17aa7a45308b7e3a38be3fb21a4fa00366fe306 253:3 0 10G 0 dm
†€docker-9:127-6979358884-7e16b5d012ab8744739b671fcdc8e47db5cc64e6c3d5a5fe423bfd68cfb07b20 253:4 0 10G 0 dm
†€docker-9:127-6979358884-f1c2545b4edbfd7b42d2a492eda8224fcf7cefc3e3a41e65d307c585acffe6a8 253:5 0 10G 0 dm
†€docker-9:127-6979358884-e7fd6c7b3f624f387bdb3746a7944a30c92d8ee5395e75c76288b281bd009d90 253:6 0 10G 0 dm
†€docker-9:127-6979358884-95a818cc7afd9867385bb9a9ea750d4cc6e162916c6ae3a157097af74578e1e4 253:7 0 10G 0 dm
†€docker-9:127-6979358884-9a7f28d396c149119556f382bf5c19f5925eed5d18b94407649244c7adabb4b3 253:8 0 10G 0 dm
†€docker-9:127-6979358884-b25941b6f115300caea977911e2d7fd3541ef187c9aa5736fe10fad638ecd0d1 253:9 0 10G 0 dm
†€docker-9:127-6979358884-122b201c6ad24896a205f8db4a64759ba8fbd5bbe245d0f98984268a01e6a0c4 253:10 0 10G 0 dm
„€docker-9:127-6979358884-bc04120ba59a1b393f338a1cef64b16d920cf4e73400198e4b999bb72a42ff90 253:11 0 10G 0 dm
loop1 7:1 0 2G 0 loop
„€docker-9:127-6979358884-pool 253:0 0 100G 0 dm
†€docker-9:127-6979358884-4301cee8d0433729cd6332ca2b6111afc85f14c48d4ce2d888a1da0ef9b5ca01 253:1 0 10G 0 dm
†€docker-9:127-6979358884-d59208adcb7cee3418f810f24e6c3a55d39281f713c8e76141fc61a8deba8a2b 253:2 0 10G 0 dm
†€docker-9:127-6979358884-106bc0838e37442eca84eb9ab17aa7a45308b7e3a38be3fb21a4fa00366fe306 253:3 0 10G 0 dm
†€docker-9:127-6979358884-7e16b5d012ab8744739b671fcdc8e47db5cc64e6c3d5a5fe423bfd68cfb07b20 253:4 0 10G 0 dm
†€docker-9:127-6979358884-f1c2545b4edbfd7b42d2a492eda8224fcf7cefc3e3a41e65d307c585acffe6a8 253:5 0 10G 0 dm
†€docker-9:127-6979358884-e7fd6c7b3f624f387bdb3746a7944a30c92d8ee5395e75c76288b281bd009d90 253:6 0 10G 0 dm
†€docker-9:127-6979358884-95a818cc7afd9867385bb9a9ea750d4cc6e162916c6ae3a157097af74578e1e4 253:7 0 10G 0 dm
†€docker-9:127-6979358884-9a7f28d396c149119556f382bf5c19f5925eed5d18b94407649244c7adabb4b3 253:8 0 10G 0 dm
†€docker-9:127-6979358884-b25941b6f115300caea977911e2d7fd3541ef187c9aa5736fe10fad638ecd0d1 253:9 0 10G 0 dm
†€docker-9:127-6979358884-122b201c6ad24896a205f8db4a64759ba8fbd5bbe245d0f98984268a01e6a0c4 253:10 0 10G 0 dm
„€docker-9:127-6979358884-bc04120ba59a1b393f338a1cef64b16d920cf4e73400198e4b999bb72a42ff90 253:11 0 10G 0 dm
nvme0n1 259:8 0 1.8T 0 disk
„€nvme0n1p1 259:9 0 1.8T 0 part
„€md127 9:127 0 3.7T 0 raid0 /
nvme1n1 259:6 0 1.8T 0 disk
„€nvme1n1p1 259:7 0 1.8T 0 part
„€md127 9:127 0 3.7T 0 raid0 /
nvme2n1 259:2 0 1.8T 0 disk
nvme3n1 259:1 0 1.8T 0 disk
nvme4n1 259:0 0 1.8T 0 disk
nvme5n1 259:3 0 1.8T 0 disk
NOTE: this is to show that there are 4 nvme disks (nvme2n1, nvme3n1, nvme4n1, nvme5n1) available for the second vm
What "emu-kvm: kvm_init_vcpu failed: Cannot allocate memory" means in this context?
Thank you very much