summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Schmaus <flow@gentoo.org>2022-11-09 09:54:09 +0100
committerFlorian Schmaus <flow@gentoo.org>2022-11-09 09:54:09 +0100
commit364cc6703e42a167e223662998592c26a315fd36 (patch)
tree642939ac0d9e401dd9d1cea0e34f32b1968ce9ab
parentXen 4.15.4-pre-patchset-1 (diff)
downloadxen-upstream-patches-364cc6703e42a167e223662998592c26a315fd36.tar.gz
xen-upstream-patches-364cc6703e42a167e223662998592c26a315fd36.tar.bz2
xen-upstream-patches-364cc6703e42a167e223662998592c26a315fd36.zip
Xen 4.15.4-pre-patchset-24.15.4-pre-patchset-2
Signed-off-by: Florian Schmaus <flow@gentoo.org>
-rw-r--r--0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch5
-rw-r--r--0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch4
-rw-r--r--0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch4
-rw-r--r--0004-tools-xenstored-Harden-corrupt.patch4
-rw-r--r--0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch6
-rw-r--r--0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch4
-rw-r--r--0007-libxc-fix-compilation-error-with-gcc13.patch4
-rw-r--r--0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch4
-rw-r--r--0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch4
-rw-r--r--0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch6
-rw-r--r--0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch5
-rw-r--r--0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch4
-rw-r--r--0013-update-Xen-version-to-4.15.4-pre.patch4
-rw-r--r--0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch5
-rw-r--r--0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch4
-rw-r--r--0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch5
-rw-r--r--0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch4
-rw-r--r--0018-x86-spec-ctrl-Support-IBPB-on-entry.patch4
-rw-r--r--0019-x86-cpuid-Enumeration-for-BTC_NO.patch4
-rw-r--r--0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch4
-rw-r--r--0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch4
-rw-r--r--0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch5
-rw-r--r--0023-xl-relax-freemem-s-retry-calculation.patch4
-rw-r--r--0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch4
-rw-r--r--0025-xl-move-freemem-s-credit-expired-loop-exit.patch4
-rw-r--r--0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch4
-rw-r--r--0027-x86-deal-with-gcc12-release-build-issues.patch4
-rw-r--r--0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch4
-rw-r--r--0029-x86-also-suppress-use-of-MMX-insns.patch4
-rw-r--r--0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch4
-rw-r--r--0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch4
-rw-r--r--0032-x86-msr-fix-X2APIC_LAST.patch4
-rw-r--r--0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch5
-rw-r--r--0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch4
-rw-r--r--0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch4
-rw-r--r--0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch4
-rw-r--r--0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch4
-rw-r--r--0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch4
-rw-r--r--0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch6
-rw-r--r--0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch4
-rw-r--r--0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch4
-rw-r--r--0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch4
-rw-r--r--0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch4
-rw-r--r--0044-x86-HAP-adjust-monitor-table-related-error-handling.patch4
-rw-r--r--0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch4
-rw-r--r--0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch4
-rw-r--r--0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch4
-rw-r--r--0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch4
-rw-r--r--0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch4
-rw-r--r--0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch4
-rw-r--r--0051-libxl-docs-Use-arch-specific-default-paging-memory.patch4
-rw-r--r--0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch4
-rw-r--r--0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch5
-rw-r--r--0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch5
-rw-r--r--0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch6
-rw-r--r--0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch4
-rw-r--r--0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch4
-rw-r--r--0058-xen-sched-introduce-cpupool_update_node_affinity.patch4
-rw-r--r--0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch6
-rw-r--r--0060-xen-sched-fix-cpu-hotplug.patch4
-rw-r--r--0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch4
-rw-r--r--0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch4
-rw-r--r--0063-xen-gnttab-fix-gnttab_acquire_resource.patch4
-rw-r--r--0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch6
-rw-r--r--0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch4
-rw-r--r--0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch4
-rw-r--r--0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch4
-rw-r--r--0068-arm-p2m-Rework-p2m_init.patch88
-rw-r--r--0069-xen-arm-p2m-Populate-pages-for-GICv2-mapping-in-p2m_.patch169
-rw-r--r--0070-VMX-correct-error-handling-in-vmx_create_vmcs.patch38
-rw-r--r--0071-argo-Remove-reachable-ASSERT_UNREACHABLE.patch41
-rw-r--r--0072-EFI-don-t-convert-memory-marked-for-runtime-use-to-o.patch64
-rw-r--r--0073-xen-sched-fix-race-in-RTDS-scheduler.patch42
-rw-r--r--0074-xen-sched-fix-restore_vcpu_affinity-by-removing-it.patch158
-rw-r--r--0075-x86-shadow-drop-replace-bogus-assertions.patch71
-rw-r--r--0076-vpci-don-t-assume-that-vpci-per-device-data-exists-u.patch61
-rw-r--r--0077-vpci-msix-remove-from-table-list-on-detach.patch47
-rw-r--r--0078-x86-also-zap-secondary-time-area-handles-during-soft.patch49
-rw-r--r--0079-common-map_vcpu_info-wants-to-unshare-the-underlying.patch41
-rw-r--r--0080-x86-pv-shim-correctly-ignore-empty-onlining-requests.patch43
-rw-r--r--0081-x86-pv-shim-correct-ballooning-up-for-compat-guests.patch55
-rw-r--r--0082-x86-pv-shim-correct-ballooning-down-for-compat-guest.patch73
-rw-r--r--0083-tools-xenstore-create_node-Don-t-defer-work-to-undo-.patch120
-rw-r--r--0084-tools-xenstore-Fail-a-transaction-if-it-is-not-possi.patch145
-rw-r--r--0085-tools-xenstore-split-up-send_reply.patch213
-rw-r--r--0086-tools-xenstore-add-helpers-to-free-struct-buffered_d.patch117
-rw-r--r--0087-tools-xenstore-reduce-number-of-watch-events.patch201
-rw-r--r--0088-tools-xenstore-let-unread-watch-events-time-out.patch309
-rw-r--r--0089-tools-xenstore-limit-outstanding-requests.patch453
-rw-r--r--0090-tools-xenstore-don-t-buffer-multiple-identical-watch.patch93
-rw-r--r--0091-tools-xenstore-fix-connection-id-usage.patch61
-rw-r--r--0092-tools-xenstore-simplify-and-fix-per-domain-node-acco.patch336
-rw-r--r--0093-tools-xenstore-limit-max-number-of-nodes-accessed-in.patch255
-rw-r--r--0094-tools-xenstore-move-the-call-of-setup_structure-to-d.patch96
-rw-r--r--0095-tools-xenstore-add-infrastructure-to-keep-track-of-p.patch289
-rw-r--r--0096-tools-xenstore-add-memory-accounting-for-responses.patch82
-rw-r--r--0097-tools-xenstore-add-memory-accounting-for-watches.patch96
-rw-r--r--0098-tools-xenstore-add-memory-accounting-for-nodes.patch342
-rw-r--r--0099-tools-xenstore-add-exports-for-quota-variables.patch62
-rw-r--r--0100-tools-xenstore-add-control-command-for-setting-and-s.patch248
-rw-r--r--0101-tools-ocaml-xenstored-Synchronise-defaults-with-oxen.patch63
-rw-r--r--0102-tools-ocaml-xenstored-Check-for-maxrequests-before-p.patch101
-rw-r--r--0103-tools-ocaml-GC-parameter-tuning.patch126
-rw-r--r--0104-tools-ocaml-libs-xb-hide-type-of-Xb.t.patch92
-rw-r--r--0105-tools-ocaml-Change-Xb.input-to-return-Packet.t-optio.patch225
-rw-r--r--0106-tools-ocaml-xb-Add-BoundedQueue.patch133
-rw-r--r--0107-tools-ocaml-Limit-maximum-in-flight-requests-outstan.patch888
-rw-r--r--0108-SUPPORT.md-clarify-support-of-untrusted-driver-domai.patch55
-rw-r--r--0109-tools-xenstore-don-t-use-conn-in-as-context-for-temp.patch716
-rw-r--r--0110-tools-xenstore-fix-checking-node-permissions.patch143
-rw-r--r--0111-tools-xenstore-remove-recursion-from-construct_node.patch126
-rw-r--r--0112-tools-xenstore-don-t-let-remove_child_entry-call-cor.patch110
-rw-r--r--0113-tools-xenstore-add-generic-treewalk-function.patch250
-rw-r--r--0114-tools-xenstore-simplify-check_store.patch114
-rw-r--r--0115-tools-xenstore-use-treewalk-for-check_store.patch172
-rw-r--r--0116-tools-xenstore-use-treewalk-for-deleting-nodes.patch180
-rw-r--r--0117-tools-xenstore-use-treewalk-for-creating-node-record.patch242
-rw-r--r--0118-tools-xenstore-remove-nodes-owned-by-destroyed-domai.patch299
-rw-r--r--0119-tools-xenstore-make-the-internal-memory-data-base-th.patch101
-rw-r--r--0120-docs-enhance-xenstore.txt-with-permissions-descripti.patch51
-rw-r--r--0121-tools-ocaml-xenstored-Fix-quota-bypass-on-domain-shu.patch93
-rw-r--r--0122-tools-ocaml-Ensure-packet-size-is-never-negative.patch75
-rw-r--r--0123-tools-xenstore-fix-deleting-node-in-transaction.patch46
-rw-r--r--0124-tools-xenstore-harden-transaction-finalization-again.patch410
-rw-r--r--0125-x86-spec-ctrl-Enumeration-for-IBPB_RET.patch82
-rw-r--r--0126-x86-spec-ctrl-Mitigate-IBPB-not-flushing-the-RSB-RAS.patch113
-rw-r--r--info.txt4
127 files changed, 9714 insertions, 142 deletions
diff --git a/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch b/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
index 32ff417..4b643e1 100644
--- a/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
+++ b/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
@@ -1,7 +1,8 @@
From f6e26ce7d9317abc41130ead6dc2443a7e2dde00 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Tue, 12 Jul 2022 11:20:46 +0200
-Subject: [PATCH 01/67] build: fix exported variable name CFLAGS_stack_boundary
+Subject: [PATCH 001/126] build: fix exported variable name
+ CFLAGS_stack_boundary
Exporting a variable with a dash doesn't work reliably, they may be
striped from the environment when calling a sub-make or sub-shell.
@@ -63,5 +64,5 @@ index e857c0f2cc2c..a5b2041f9b96 100644
obj-y := stub.o
obj-$(XEN_BUILD_EFI) := $(filter-out %.init.o,$(EFIOBJ))
--
-2.37.3
+2.37.4
diff --git a/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch b/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
index 9f2f8e4..edc6857 100644
--- a/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
+++ b/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
@@ -1,7 +1,7 @@
From b89b932cfe86556c5de4ad56702aed83142e22a3 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 12 Jul 2022 11:21:14 +0200
-Subject: [PATCH 02/67] IOMMU/x86: work around bogus gcc12 warning in
+Subject: [PATCH 002/126] IOMMU/x86: work around bogus gcc12 warning in
hvm_gsi_eoi()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@@ -48,5 +48,5 @@ index 9544f3234e65..50865eec2c04 100644
/*
--
-2.37.3
+2.37.4
diff --git a/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch b/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
index 777ef8a..fd460e0 100644
--- a/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
+++ b/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
@@ -2,7 +2,7 @@ From b53df5b4341fa97614ad064a7c8e781c88b6ed71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
<marmarek@invisiblethingslab.com>
Date: Tue, 12 Jul 2022 11:22:09 +0200
-Subject: [PATCH 03/67] ehci-dbgp: fix selecting n-th ehci controller
+Subject: [PATCH 003/126] ehci-dbgp: fix selecting n-th ehci controller
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -32,5 +32,5 @@ index c893d246defa..66b4811af24a 100644
dbgp->cap = find_dbgp(dbgp, num);
if ( !dbgp->cap )
--
-2.37.3
+2.37.4
diff --git a/0004-tools-xenstored-Harden-corrupt.patch b/0004-tools-xenstored-Harden-corrupt.patch
index 62b7ec9..c9e6852 100644
--- a/0004-tools-xenstored-Harden-corrupt.patch
+++ b/0004-tools-xenstored-Harden-corrupt.patch
@@ -1,7 +1,7 @@
From 7fe638c28fa693d8bb8f9419de1220d4359a1b2d Mon Sep 17 00:00:00 2001
From: Julien Grall <jgrall@amazon.com>
Date: Tue, 12 Jul 2022 11:23:01 +0200
-Subject: [PATCH 04/67] tools/xenstored: Harden corrupt()
+Subject: [PATCH 004/126] tools/xenstored: Harden corrupt()
At the moment, corrupt() is neither checking for allocation failure
nor freeing the allocated memory.
@@ -40,5 +40,5 @@ index 8033c1e0eb28..9172dd767140 100644
check_store();
}
--
-2.37.3
+2.37.4
diff --git a/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch b/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
index 7d79c2e..dcfc447 100644
--- a/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
+++ b/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
@@ -1,8 +1,8 @@
From 799a8d49237a62ea0d33c3756a6a7f665b8389b2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:23:32 +0200
-Subject: [PATCH 05/67] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle with
- legacy IBRS
+Subject: [PATCH 005/126] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle
+ with legacy IBRS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -89,5 +89,5 @@ index 68f6c46c470c..12283573cdd5 100644
* Disable shadowing before updating the MSR. There are no SMP issues
* here; only local processor ordering concerns.
--
-2.37.3
+2.37.4
diff --git a/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch b/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
index 965c965..177d677 100644
--- a/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
+++ b/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
@@ -1,7 +1,7 @@
From cd5081e8c31651e623d86532306b4c56bbcb6e6d Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:24:11 +0200
-Subject: [PATCH 06/67] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow
+Subject: [PATCH 006/126] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow
hardware STIBP hint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@@ -230,5 +230,5 @@ index eb7fb70e86f9..8212227ee02a 100644
/*
* PV guests can poison the RSB to any virtual address from which
--
-2.37.3
+2.37.4
diff --git a/0007-libxc-fix-compilation-error-with-gcc13.patch b/0007-libxc-fix-compilation-error-with-gcc13.patch
index 9a1ca92..388111e 100644
--- a/0007-libxc-fix-compilation-error-with-gcc13.patch
+++ b/0007-libxc-fix-compilation-error-with-gcc13.patch
@@ -1,7 +1,7 @@
From 77deab4233b5d9ec5cf214fdc1652424fd4fc9d6 Mon Sep 17 00:00:00 2001
From: Charles Arnold <carnold@suse.com>
Date: Tue, 12 Jul 2022 11:24:39 +0200
-Subject: [PATCH 07/67] libxc: fix compilation error with gcc13
+Subject: [PATCH 007/126] libxc: fix compilation error with gcc13
xc_psr.c:161:5: error: conflicting types for 'xc_psr_cmt_get_data'
due to enum/integer mismatch;
@@ -29,5 +29,5 @@ index 318920166c5e..2013200b9eff 100644
int xc_psr_cmt_enabled(xc_interface *xch);
--
-2.37.3
+2.37.4
diff --git a/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch b/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
index 22a1ebe..18ec7de 100644
--- a/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
+++ b/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
@@ -1,7 +1,7 @@
From 5be1f46f435f8b05608b1eae029cb17d8bd3a560 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:25:05 +0200
-Subject: [PATCH 08/67] x86/spec-ctrl: Honour spec-ctrl=0 for unpriv-mmio
+Subject: [PATCH 008/126] x86/spec-ctrl: Honour spec-ctrl=0 for unpriv-mmio
sub-option
This was an oversight from when unpriv-mmio was introduced.
@@ -28,5 +28,5 @@ index 8212227ee02a..06790897e496 100644
else if ( val > 0 )
rc = -EINVAL;
--
-2.37.3
+2.37.4
diff --git a/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch b/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
index 53a8b70..bfae8e2 100644
--- a/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
+++ b/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
@@ -1,7 +1,7 @@
From ae417706870333bb52ebcf33c527809cdd2d7265 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:25:40 +0200
-Subject: [PATCH 09/67] xen/cmdline: Extend parse_boolean() to signal a name
+Subject: [PATCH 009/126] xen/cmdline: Extend parse_boolean() to signal a name
match
This will help parsing a sub-option which has boolean and non-boolean options
@@ -83,5 +83,5 @@ index 1198c7c0b207..be7498135170 100644
int parse_boolean(const char *name, const char *s, const char *e);
--
-2.37.3
+2.37.4
diff --git a/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch b/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
index 36577d6..621d372 100644
--- a/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
+++ b/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
@@ -1,8 +1,8 @@
From 08bfd4d01185e94fda1be9dd79a981d890a9085e Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:26:14 +0200
-Subject: [PATCH 10/67] x86/spec-ctrl: Add fine-grained cmdline suboptions for
- primitives
+Subject: [PATCH 010/126] x86/spec-ctrl: Add fine-grained cmdline suboptions
+ for primitives
Support controling the PV/HVM suboption of msr-sc/rsb/md-clear, which
previously wasn't possible.
@@ -133,5 +133,5 @@ index 06790897e496..225fe08259b3 100644
/* Xen's speculative sidechannel mitigation settings. */
--
-2.37.3
+2.37.4
diff --git a/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch b/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
index dc468c8..34acad9 100644
--- a/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
+++ b/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
@@ -1,7 +1,8 @@
From f241cc48dabeef6cb0b381db62f2562b0a3970eb Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Tue, 12 Jul 2022 11:26:47 +0200
-Subject: [PATCH 11/67] tools/helpers: fix build of xen-init-dom0 with -Werror
+Subject: [PATCH 011/126] tools/helpers: fix build of xen-init-dom0 with
+ -Werror
Missing prototype of asprintf() without _GNU_SOURCE.
@@ -24,5 +25,5 @@ index c99224a4b607..b4861c9e8041 100644
#include <stdint.h>
#include <string.h>
--
-2.37.3
+2.37.4
diff --git a/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch b/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
index 74fee03..1ca34af 100644
--- a/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
+++ b/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
@@ -1,7 +1,7 @@
From d470a54087e0fbd813dae4d773ad0b830eeec4a1 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Tue, 12 Jul 2022 11:26:58 +0200
-Subject: [PATCH 12/67] libxl: check return value of libxl__xs_directory in
+Subject: [PATCH 012/126] libxl: check return value of libxl__xs_directory in
name2bdf
libxl__xs_directory() can potentially return NULL without setting `n`.
@@ -34,5 +34,5 @@ index 92bf86b2bebd..a5f5cdf62b80 100644
for (i = 0; i < n; i++) {
--
-2.37.3
+2.37.4
diff --git a/0013-update-Xen-version-to-4.15.4-pre.patch b/0013-update-Xen-version-to-4.15.4-pre.patch
index 8626fdd..6e8c05b 100644
--- a/0013-update-Xen-version-to-4.15.4-pre.patch
+++ b/0013-update-Xen-version-to-4.15.4-pre.patch
@@ -1,7 +1,7 @@
From 505771bb1dffdf6f763fad18ee49a913b98abfea Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 12 Jul 2022 11:28:33 +0200
-Subject: [PATCH 13/67] update Xen version to 4.15.4-pre
+Subject: [PATCH 013/126] update Xen version to 4.15.4-pre
---
xen/Makefile | 2 +-
@@ -21,5 +21,5 @@ index e9a88325c467..cd66bb3b1c84 100644
-include xen-version
--
-2.37.3
+2.37.4
diff --git a/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch b/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
index a21b4d8..1c237f2 100644
--- a/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
+++ b/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
@@ -1,7 +1,8 @@
From 156ab775769d39b2dfb048ccd34dee7e86ba83a2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Fri, 1 Jul 2022 15:59:40 +0100
-Subject: [PATCH 14/67] x86/spec-ctrl: Rework spec_ctrl_flags context switching
+Subject: [PATCH 014/126] x86/spec-ctrl: Rework spec_ctrl_flags context
+ switching
We are shortly going to need to context switch new bits in both the vcpu and
S3 paths. Introduce SCF_IST_MASK and SCF_DOM_MASK, and rework d->arch.verw
@@ -163,5 +164,5 @@ index 5a590bac44aa..66b00d511fc6 100644
.macro SPEC_CTRL_ENTRY_FROM_INTR_IST
/*
--
-2.37.3
+2.37.4
diff --git a/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch b/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
index 49351ae..a9cc63f 100644
--- a/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
+++ b/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
@@ -1,7 +1,7 @@
From 2cfbca32b9dc3a8d6520549ff468a7f550daf1b1 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 28 Jun 2022 14:36:56 +0100
-Subject: [PATCH 15/67] x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr
+Subject: [PATCH 015/126] x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr
We are about to introduce SCF_ist_ibpb, at which point SCF_ist_wrmsr becomes
ambiguous.
@@ -106,5 +106,5 @@ index 66b00d511fc6..0ff1b118f882 100644
DO_SPEC_CTRL_EXIT_TO_XEN
--
-2.37.3
+2.37.4
diff --git a/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch b/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
index f114f6d..cfe270c 100644
--- a/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
+++ b/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
@@ -1,7 +1,8 @@
From c707015bf118df2c43e3a48b3774916322fca50a Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 4 Jul 2022 21:32:17 +0100
-Subject: [PATCH 16/67] x86/spec-ctrl: Rename opt_ibpb to opt_ibpb_ctxt_switch
+Subject: [PATCH 016/126] x86/spec-ctrl: Rename opt_ibpb to
+ opt_ibpb_ctxt_switch
We are about to introduce the use of IBPB at different points in Xen, making
opt_ibpb ambiguous. Rename it to opt_ibpb_ctxt_switch.
@@ -93,5 +94,5 @@ index 6f8b0e09348e..fd8162ca9ab9 100644
extern int8_t opt_eager_fpu;
extern int8_t opt_l1d_flush;
--
-2.37.3
+2.37.4
diff --git a/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch b/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
index e162148..5a6bfa5 100644
--- a/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
+++ b/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
@@ -1,7 +1,7 @@
From d7f5fb1e2abd0d56cada9bfcf96ab530d214d9aa Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Fri, 1 Jul 2022 15:59:40 +0100
-Subject: [PATCH 17/67] x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST
+Subject: [PATCH 017/126] x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST
We are shortly going to add a conditional IBPB in this path.
@@ -102,5 +102,5 @@ index 0ff1b118f882..15e24cde00d1 100644
/* Opencoded UNLIKELY_START() with no condition. */
--
-2.37.3
+2.37.4
diff --git a/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch b/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
index 1de9d4c..43b2d76 100644
--- a/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
+++ b/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
@@ -1,7 +1,7 @@
From f0d78e0c11d3984c74f34a7325f862dee93a5835 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 24 Feb 2022 13:44:33 +0000
-Subject: [PATCH 18/67] x86/spec-ctrl: Support IBPB-on-entry
+Subject: [PATCH 018/126] x86/spec-ctrl: Support IBPB-on-entry
We are going to need this to mitigate Branch Type Confusion on AMD/Hygon CPUs,
but as we've talked about using it in other cases too, arrange to support it
@@ -296,5 +296,5 @@ index 15e24cde00d1..9eb4ad9ab71d 100644
jz .L\@_skip_rsb
--
-2.37.3
+2.37.4
diff --git a/0019-x86-cpuid-Enumeration-for-BTC_NO.patch b/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
index a4444f4..626bfd8 100644
--- a/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
+++ b/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
@@ -1,7 +1,7 @@
From 2b29ac476fa0c91655906fac3512202e514ecbed Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 16 May 2022 15:48:24 +0100
-Subject: [PATCH 19/67] x86/cpuid: Enumeration for BTC_NO
+Subject: [PATCH 019/126] x86/cpuid: Enumeration for BTC_NO
BTC_NO indicates that hardware is not succeptable to Branch Type Confusion.
@@ -102,5 +102,5 @@ index 9686c82ed75c..1bbc7da4b53c 100644
/* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */
--
-2.37.3
+2.37.4
diff --git a/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch b/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
index 4d12421..933660d 100644
--- a/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
+++ b/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
@@ -1,7 +1,7 @@
From 409976bed91f61fb7b053d536d2fc87cf3ad7018 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 15 Mar 2022 18:30:25 +0000
-Subject: [PATCH 20/67] x86/spec-ctrl: Enable Zen2 chickenbit
+Subject: [PATCH 020/126] x86/spec-ctrl: Enable Zen2 chickenbit
... as instructed in the Branch Type Confusion whitepaper.
@@ -101,5 +101,5 @@ index 1e743461e91d..b4a360723b14 100644
#define MSR_AMD64_DR0_ADDRESS_MASK 0xc0011027
#define MSR_AMD64_DR1_ADDRESS_MASK 0xc0011019
--
-2.37.3
+2.37.4
diff --git a/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch b/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
index b676ba3..01be575 100644
--- a/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
+++ b/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
@@ -1,7 +1,7 @@
From 35bf91d30f1a480dcf5bfd99b79384b2b283da7f Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 27 Jun 2022 19:29:40 +0100
-Subject: [PATCH 21/67] x86/spec-ctrl: Mitigate Branch Type Confusion when
+Subject: [PATCH 021/126] x86/spec-ctrl: Mitigate Branch Type Confusion when
possible
Branch Type Confusion affects AMD/Hygon CPUs on Zen2 and earlier. To
@@ -301,5 +301,5 @@ index 10cd0cd2518f..33e845991b0a 100644
extern int8_t opt_eager_fpu;
extern int8_t opt_l1d_flush;
--
-2.37.3
+2.37.4
diff --git a/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch b/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch
index 81f5b9a..5b038c4 100644
--- a/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch
+++ b/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch
@@ -1,7 +1,8 @@
From 3859f3ee7e37323ae5e0014c07ba8d3a4d7890b2 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 26 Jul 2022 15:03:14 +0200
-Subject: [PATCH 22/67] x86/mm: correct TLB flush condition in _get_page_type()
+Subject: [PATCH 022/126] x86/mm: correct TLB flush condition in
+ _get_page_type()
When this logic was moved, it was moved across the point where nx is
updated to hold the new type for the page. IOW originally it was
@@ -41,5 +42,5 @@ index 7d0747017db5..c88dc749d431 100644
perfc_incr(need_flush_tlb_flush);
/*
--
-2.37.3
+2.37.4
diff --git a/0023-xl-relax-freemem-s-retry-calculation.patch b/0023-xl-relax-freemem-s-retry-calculation.patch
index d7dda30..1879884 100644
--- a/0023-xl-relax-freemem-s-retry-calculation.patch
+++ b/0023-xl-relax-freemem-s-retry-calculation.patch
@@ -1,7 +1,7 @@
From 2173d9c8be28d5f33c0e299a363ac994867d111b Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Wed, 27 Jul 2022 09:28:46 +0200
-Subject: [PATCH 23/67] xl: relax freemem()'s retry calculation
+Subject: [PATCH 023/126] xl: relax freemem()'s retry calculation
While in principle possible also under other conditions as long as other
parallel operations potentially consuming memory aren't "locked out", in
@@ -76,5 +76,5 @@ index 435155a03396..5dee7730ca76 100644
return false;
}
--
-2.37.3
+2.37.4
diff --git a/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch b/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch
index fbb1448..ccde751 100644
--- a/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch
+++ b/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch
@@ -1,7 +1,7 @@
From a2684d9cbbfb02b268be7e551674f709db0617a4 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 27 Jul 2022 09:29:08 +0200
-Subject: [PATCH 24/67] tools/init-xenstore-domain: fix memory map for PVH
+Subject: [PATCH 024/126] tools/init-xenstore-domain: fix memory map for PVH
stubdom
In case of maxmem != memsize the E820 map of the PVH stubdom is wrong,
@@ -55,5 +55,5 @@ index 6836002f0bad..32689abd7479 100644
}
--
-2.37.3
+2.37.4
diff --git a/0025-xl-move-freemem-s-credit-expired-loop-exit.patch b/0025-xl-move-freemem-s-credit-expired-loop-exit.patch
index c3a1965..a3b2e2b 100644
--- a/0025-xl-move-freemem-s-credit-expired-loop-exit.patch
+++ b/0025-xl-move-freemem-s-credit-expired-loop-exit.patch
@@ -1,7 +1,7 @@
From c37099426ea678c1d5b6c99ae5ad6834f4edd2e6 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Wed, 27 Jul 2022 09:29:31 +0200
-Subject: [PATCH 25/67] xl: move freemem()'s "credit expired" loop exit
+Subject: [PATCH 025/126] xl: move freemem()'s "credit expired" loop exit
Move the "credit expired" loop exit to the middle of the loop,
immediately after "return true". This way having reached the goal on the
@@ -51,5 +51,5 @@ index 5dee7730ca76..d1c6f8aae67a 100644
static void reload_domain_config(uint32_t domid,
--
-2.37.3
+2.37.4
diff --git a/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch b/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch
index fbf3f41..fbbf450 100644
--- a/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch
+++ b/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch
@@ -1,7 +1,7 @@
From 5f1d0179e15d726622a49044a825894d5010df15 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Wed, 27 Jul 2022 09:29:54 +0200
-Subject: [PATCH 26/67] x86/spec-ctrl: correct per-guest-type reporting of
+Subject: [PATCH 026/126] x86/spec-ctrl: correct per-guest-type reporting of
MD_CLEAR
There are command line controls for this and the default also isn't "always
@@ -52,5 +52,5 @@ index 563519ce0e31..f7b0251c42bc 100644
printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
--
-2.37.3
+2.37.4
diff --git a/0027-x86-deal-with-gcc12-release-build-issues.patch b/0027-x86-deal-with-gcc12-release-build-issues.patch
index d26f6d3..b30c65b 100644
--- a/0027-x86-deal-with-gcc12-release-build-issues.patch
+++ b/0027-x86-deal-with-gcc12-release-build-issues.patch
@@ -1,7 +1,7 @@
From a095c6cde8a717325cc31bb393c547cad5e16e35 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Wed, 27 Jul 2022 09:30:24 +0200
-Subject: [PATCH 27/67] x86: deal with gcc12 release build issues
+Subject: [PATCH 027/126] x86: deal with gcc12 release build issues
While a number of issues we previously had with pre-release gcc12 were
fixed in the final release, we continue to have one issue (with multiple
@@ -61,5 +61,5 @@ index 5c19b71eca70..71dd28f126c3 100644
#define PRtype_info "016lx"/* should only be used for printk's */
--
-2.37.3
+2.37.4
diff --git a/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch b/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch
index 26b959e..1a63be4 100644
--- a/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch
+++ b/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch
@@ -1,7 +1,7 @@
From 4799a202a9017360708c18aa8cd699bd8d6be08b Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Wed, 27 Jul 2022 09:31:01 +0200
-Subject: [PATCH 28/67] x86emul: add memory operand low bits checks for
+Subject: [PATCH 028/126] x86emul: add memory operand low bits checks for
ENQCMD{,S}
Already ISE rev 044 added text to this effect; rev 045 further dropped
@@ -41,5 +41,5 @@ index 5e297f797187..247c14dc4e68 100644
if ( (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
state, ctxt)) != X86EMUL_OKAY )
--
-2.37.3
+2.37.4
diff --git a/0029-x86-also-suppress-use-of-MMX-insns.patch b/0029-x86-also-suppress-use-of-MMX-insns.patch
index 1298a47..d954cdd 100644
--- a/0029-x86-also-suppress-use-of-MMX-insns.patch
+++ b/0029-x86-also-suppress-use-of-MMX-insns.patch
@@ -1,7 +1,7 @@
From 30d3de4c61c297e12662df1fdb89af335947e59d Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Wed, 27 Jul 2022 09:31:31 +0200
-Subject: [PATCH 29/67] x86: also suppress use of MMX insns
+Subject: [PATCH 029/126] x86: also suppress use of MMX insns
Passing -mno-sse alone is not enough: The compiler may still find
(questionable) reasons to use MMX insns. In particular with gcc12 use
@@ -35,5 +35,5 @@ index 456e5d5c1ad7..c4337a1a118c 100644
# Compile with thunk-extern, indirect-branch-register if avaiable.
CFLAGS-$(CONFIG_INDIRECT_THUNK) += -mindirect-branch=thunk-extern
--
-2.37.3
+2.37.4
diff --git a/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch b/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch
index a9bf845..b4f6881 100644
--- a/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch
+++ b/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch
@@ -1,7 +1,7 @@
From b64f1c9e3e3a2a416c7bb5aab77ba5d2cba98638 Mon Sep 17 00:00:00 2001
From: Luca Fancellu <luca.fancellu@arm.com>
Date: Wed, 27 Jul 2022 09:31:49 +0200
-Subject: [PATCH 30/67] common/memory: Fix ifdefs for ptdom_max_order
+Subject: [PATCH 030/126] common/memory: Fix ifdefs for ptdom_max_order
In common/memory.c the ifdef code surrounding ptdom_max_order is
using HAS_PASSTHROUGH instead of CONFIG_HAS_PASSTHROUGH, fix the
@@ -48,5 +48,5 @@ index 297b98a562b2..95b2b934e4a2 100644
order = ptdom_max_order;
#endif
--
-2.37.3
+2.37.4
diff --git a/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch b/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch
index a52055a..65fe05b 100644
--- a/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch
+++ b/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch
@@ -1,7 +1,7 @@
From 1b9845dcf959421db3a071a6bc0aa9d8edbffb50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
Date: Wed, 3 Aug 2022 12:41:18 +0200
-Subject: [PATCH 31/67] tools/libxl: env variable to signal whether disk/nic
+Subject: [PATCH 031/126] tools/libxl: env variable to signal whether disk/nic
backend is trusted
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@@ -103,5 +103,5 @@ index 0b9e70c9d13d..f87890d1d65f 100644
}
--
-2.37.3
+2.37.4
diff --git a/0032-x86-msr-fix-X2APIC_LAST.patch b/0032-x86-msr-fix-X2APIC_LAST.patch
index ac42842..4046822 100644
--- a/0032-x86-msr-fix-X2APIC_LAST.patch
+++ b/0032-x86-msr-fix-X2APIC_LAST.patch
@@ -1,7 +1,7 @@
From df3395f6b2d759aba39fb67a7bc0fe49147c8b39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
Date: Wed, 3 Aug 2022 12:41:49 +0200
-Subject: [PATCH 32/67] x86/msr: fix X2APIC_LAST
+Subject: [PATCH 032/126] x86/msr: fix X2APIC_LAST
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -62,5 +62,5 @@ index b4a360723b14..f1b2cf5460c1 100644
#define MSR_X2APIC_TPR 0x00000808
#define MSR_X2APIC_PPR 0x0000080a
--
-2.37.3
+2.37.4
diff --git a/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch b/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch
index 46780c4..f1400b8 100644
--- a/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch
+++ b/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch
@@ -1,7 +1,8 @@
From 8ae0b4d1331c14fb9e30a42987c0152c9b00f530 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 15 Aug 2022 15:40:05 +0200
-Subject: [PATCH 33/67] x86/spec-ctrl: Use IST RSB protection for !SVM systems
+Subject: [PATCH 033/126] x86/spec-ctrl: Use IST RSB protection for !SVM
+ systems
There is a corner case where a VT-x guest which manages to reliably trigger
non-fatal #MC's could evade the rogue RSB speculation protections that were
@@ -50,5 +51,5 @@ index f7b0251c42bc..ac73806eacd8 100644
/* Check whether Eager FPU should be enabled by default. */
--
-2.37.3
+2.37.4
diff --git a/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch b/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch
index 6a73c21..5433ddb 100644
--- a/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch
+++ b/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch
@@ -1,7 +1,7 @@
From 5efcae1eb30ff24e100954e00889a568c1745ea1 Mon Sep 17 00:00:00 2001
From: Jason Andryuk <jandryuk@gmail.com>
Date: Mon, 15 Aug 2022 15:40:47 +0200
-Subject: [PATCH 34/67] x86: Expose more MSR_ARCH_CAPS to hwdom
+Subject: [PATCH 034/126] x86: Expose more MSR_ARCH_CAPS to hwdom
commit e46474278a0e ("x86/intel: Expose MSR_ARCH_CAPS to dom0") started
exposing MSR_ARCH_CAPS to dom0. More bits in MSR_ARCH_CAPS have since
@@ -64,5 +64,5 @@ index f1b2cf5460c1..49ca1f1845e6 100644
#define MSR_FLUSH_CMD 0x0000010b
#define FLUSH_CMD_L1D (_AC(1, ULL) << 0)
--
-2.37.3
+2.37.4
diff --git a/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch b/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch
index 0dfb3b4..150de40 100644
--- a/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch
+++ b/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch
@@ -1,7 +1,7 @@
From 1e31848cdd8d2ff3cb76f364f04f9771f9b3a8b1 Mon Sep 17 00:00:00 2001
From: Dario Faggioli <dfaggioli@suse.com>
Date: Mon, 15 Aug 2022 15:41:25 +0200
-Subject: [PATCH 35/67] xen/sched: setup dom0 vCPUs affinity only once
+Subject: [PATCH 035/126] xen/sched: setup dom0 vCPUs affinity only once
Right now, affinity for dom0 vCPUs is setup in two steps. This is a
problem as, at least in Credit2, unit_insert() sees and uses the
@@ -119,5 +119,5 @@ index 8f4b1ca10d1c..f07bd2681fcb 100644
}
#endif
--
-2.37.3
+2.37.4
diff --git a/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch b/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch
index 1637236..bd1b1cb 100644
--- a/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch
+++ b/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch
@@ -1,7 +1,7 @@
From c373ad3d084614a93c55e25dc20e70ffc7574971 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Mon, 15 Aug 2022 15:42:09 +0200
-Subject: [PATCH 36/67] tools/libxl: Replace deprecated -sdl option on QEMU
+Subject: [PATCH 036/126] tools/libxl: Replace deprecated -sdl option on QEMU
command line
"-sdl" is deprecated upstream since 6695e4c0fd9e ("softmmu/vl:
@@ -34,5 +34,5 @@ index 24f6e73b0a77..ae5f35e0c3fd 100644
flexarray_append_pair(dm_envs, "DISPLAY", sdl->display);
if (sdl->xauthority)
--
-2.37.3
+2.37.4
diff --git a/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch b/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch
index d27766b..bfd812b 100644
--- a/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch
+++ b/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch
@@ -1,7 +1,7 @@
From fba0c22e79922085c46527eb1391123aadfb24d1 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 15 Aug 2022 15:42:31 +0200
-Subject: [PATCH 37/67] x86/spec-ctrl: Enumeration for PBRSB_NO
+Subject: [PATCH 037/126] x86/spec-ctrl: Enumeration for PBRSB_NO
The PBRSB_NO bit indicates that the CPU is not vulnerable to the Post-Barrier
RSB speculative vulnerability.
@@ -63,5 +63,5 @@ index 49ca1f1845e6..5a830f76a8d4 100644
#define MSR_FLUSH_CMD 0x0000010b
#define FLUSH_CMD_L1D (_AC(1, ULL) << 0)
--
-2.37.3
+2.37.4
diff --git a/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch b/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch
index e0e0f87..e3d159b 100644
--- a/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch
+++ b/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch
@@ -1,7 +1,7 @@
From 104a54a307b08945365faf6d285cd5a02f94a80f Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Mon, 15 Aug 2022 15:43:08 +0200
-Subject: [PATCH 38/67] x86/amd: only call setup_force_cpu_cap for boot CPU
+Subject: [PATCH 038/126] x86/amd: only call setup_force_cpu_cap for boot CPU
This should only be called for the boot CPU to avoid calling _init code
after it has been unloaded.
@@ -29,5 +29,5 @@ index 60dbe61a61ca..a8d2fb8a1590 100644
switch(c->x86)
--
-2.37.3
+2.37.4
diff --git a/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch b/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch
index 50d83b6..f6e62b7 100644
--- a/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch
+++ b/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch
@@ -1,8 +1,8 @@
From a075900cf768fe45f270b6f1d09c4e99281da142 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 15 Aug 2022 15:43:56 +0200
-Subject: [PATCH 39/67] build/x86: suppress GNU ld 2.39 warning about RWX load
- segments
+Subject: [PATCH 039/126] build/x86: suppress GNU ld 2.39 warning about RWX
+ load segments
Commit 68f5aac012b9 ("build: suppress future GNU ld warning about RWX
load segments") didn't quite cover all the cases: Apparently I missed
@@ -34,5 +34,5 @@ index e90680cd9f52..d2fae5cf9eee 100644
%.S: %.bin
(od -v -t x $< | tr -s ' ' | awk 'NR > 1 {print s} {s=$$0}' | \
--
-2.37.3
+2.37.4
diff --git a/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch b/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch
index c29e5ac..1de5d0d 100644
--- a/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch
+++ b/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch
@@ -1,7 +1,7 @@
From 9acedc3c58c31930737edbe212f2ccf437a0b757 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 15 Aug 2022 15:44:23 +0200
-Subject: [PATCH 40/67] PCI: simplify (and thus correct)
+Subject: [PATCH 040/126] PCI: simplify (and thus correct)
pci_get_pdev{,_by_domain}()
The last "wildcard" use of either function went away with f591755823a7
@@ -149,5 +149,5 @@ index 8e3d4d94543a..cd238ae852b0 100644
uint8_t pci_conf_read8(pci_sbdf_t sbdf, unsigned int reg);
--
-2.37.3
+2.37.4
diff --git a/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch b/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
index 3fa0e43..e695f96 100644
--- a/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
+++ b/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
@@ -1,7 +1,7 @@
From 09fc590c15773c2471946a78740c6b02e8c34a45 Mon Sep 17 00:00:00 2001
From: Julien Grall <jgrall@amazon.com>
Date: Tue, 11 Oct 2022 15:05:53 +0200
-Subject: [PATCH 41/67] xen/arm: p2m: Prevent adding mapping when domain is
+Subject: [PATCH 041/126] xen/arm: p2m: Prevent adding mapping when domain is
dying
During the domain destroy process, the domain will still be accessible
@@ -58,5 +58,5 @@ index 2ddd06801a82..8398251c518b 100644
start = p2m->lowest_mapped_gfn;
--
-2.37.3
+2.37.4
diff --git a/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch b/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
index 8217a06..96b8528 100644
--- a/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
+++ b/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
@@ -1,7 +1,7 @@
From 0d805f9fba4bc155d15047685024f7d842e925e4 Mon Sep 17 00:00:00 2001
From: Julien Grall <jgrall@amazon.com>
Date: Tue, 11 Oct 2022 15:06:36 +0200
-Subject: [PATCH 42/67] xen/arm: p2m: Handle preemption when freeing
+Subject: [PATCH 042/126] xen/arm: p2m: Handle preemption when freeing
intermediate page tables
At the moment the P2M page tables will be freed when the domain structure
@@ -163,5 +163,5 @@ index 6a2108398fd7..3a2d51b35d71 100644
/*
* Remove mapping refcount on each mapping page in the p2m
--
-2.37.3
+2.37.4
diff --git a/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch b/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
index f3f7e3a..f8d61bb 100644
--- a/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
+++ b/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
@@ -1,7 +1,7 @@
From 0f3eab90f327210d91e8e31a769376f286e8819a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
Date: Tue, 11 Oct 2022 15:07:25 +0200
-Subject: [PATCH 43/67] x86/p2m: add option to skip root pagetable removal in
+Subject: [PATCH 043/126] x86/p2m: add option to skip root pagetable removal in
p2m_teardown()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@@ -134,5 +134,5 @@ index 46e8b94a49df..46eb51d44cf5 100644
/* Add a page to a domain's p2m table */
--
-2.37.3
+2.37.4
diff --git a/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch b/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch
index 39db626..97a55a5 100644
--- a/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch
+++ b/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch
@@ -1,7 +1,7 @@
From d24a10a91d46a56e1d406239643ec651a31033d4 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 11 Oct 2022 15:07:42 +0200
-Subject: [PATCH 44/67] x86/HAP: adjust monitor table related error handling
+Subject: [PATCH 044/126] x86/HAP: adjust monitor table related error handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -73,5 +73,5 @@ index a8f5a19da917..d75dc2b9ed3d 100644
put_gfn(d, cr3_gfn);
}
--
-2.37.3
+2.37.4
diff --git a/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch b/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
index 7cf356d..08ff309 100644
--- a/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
+++ b/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
@@ -1,7 +1,7 @@
From 95f6d555ec84383f7daaf3374f65bec5ff4351f5 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 11 Oct 2022 15:07:57 +0200
-Subject: [PATCH 45/67] x86/shadow: tolerate failure of
+Subject: [PATCH 045/126] x86/shadow: tolerate failure of
sh_set_toplevel_shadow()
Subsequently sh_set_toplevel_shadow() will be adjusted to install a
@@ -72,5 +72,5 @@ index 9b43cb116c47..7e0494cf7faa 100644
#error This should never happen
#endif
--
-2.37.3
+2.37.4
diff --git a/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch b/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
index 62be72a..4773eef 100644
--- a/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
+++ b/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
@@ -1,7 +1,7 @@
From 1e26afa846fb9a00b9155280eeae3b8cb8375dd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
Date: Tue, 11 Oct 2022 15:08:14 +0200
-Subject: [PATCH 46/67] x86/shadow: tolerate failure in shadow_prealloc()
+Subject: [PATCH 046/126] x86/shadow: tolerate failure in shadow_prealloc()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -275,5 +275,5 @@ index 911db46e7399..3fe0388e7c4f 100644
u32 shadow_type,
unsigned long backpointer);
--
-2.37.3
+2.37.4
diff --git a/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch b/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch
index c81cfab..880b68d 100644
--- a/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch
+++ b/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch
@@ -1,7 +1,7 @@
From 4f9b535194f70582863f2a78f113547d8822b2b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
Date: Tue, 11 Oct 2022 15:08:28 +0200
-Subject: [PATCH 47/67] x86/p2m: refuse new allocations for dying domains
+Subject: [PATCH 047/126] x86/p2m: refuse new allocations for dying domains
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -96,5 +96,5 @@ index fc4f7f78ce43..9ad7e5a88650 100644
* paging lock) and the log-dirty code (which always does). */
paging_lock_recursive(d);
--
-2.37.3
+2.37.4
diff --git a/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch b/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
index c3d5a2c..280b6d8 100644
--- a/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
+++ b/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
@@ -1,7 +1,7 @@
From 7f055b011a657f8f16b0df242301efb312058eea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
Date: Tue, 11 Oct 2022 15:08:42 +0200
-Subject: [PATCH 48/67] x86/p2m: truly free paging pool memory for dying
+Subject: [PATCH 048/126] x86/p2m: truly free paging pool memory for dying
domains
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@@ -111,5 +111,5 @@ index 9ad7e5a88650..366956c146aa 100644
paging_unlock(d);
}
--
-2.37.3
+2.37.4
diff --git a/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch b/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch
index 83502a6..aef6a24 100644
--- a/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch
+++ b/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch
@@ -1,7 +1,7 @@
From 686c920fa9389fe2b6b619643024ed98b4b7d51f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
Date: Tue, 11 Oct 2022 15:08:58 +0200
-Subject: [PATCH 49/67] x86/p2m: free the paging memory pool preemptively
+Subject: [PATCH 049/126] x86/p2m: free the paging memory pool preemptively
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -177,5 +177,5 @@ index 366956c146aa..680766fd5170 100644
}
--
-2.37.3
+2.37.4
diff --git a/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch b/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
index 23e10ba..8ab565d 100644
--- a/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
+++ b/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
@@ -1,7 +1,7 @@
From b03074bb47d10c9373688b3661c7c31da01c21a3 Mon Sep 17 00:00:00 2001
From: Julien Grall <jgrall@amazon.com>
Date: Tue, 11 Oct 2022 15:09:12 +0200
-Subject: [PATCH 50/67] xen/x86: p2m: Add preemption in p2m_teardown()
+Subject: [PATCH 050/126] xen/x86: p2m: Add preemption in p2m_teardown()
The list p2m->pages contain all the pages used by the P2M. On large
instance this can be quite large and the time spent to call
@@ -193,5 +193,5 @@ index 46eb51d44cf5..edbe4cee2717 100644
/* Add a page to a domain's p2m table */
--
-2.37.3
+2.37.4
diff --git a/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch b/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch
index f3bded4..4ec35bf 100644
--- a/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch
+++ b/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch
@@ -1,7 +1,7 @@
From 0c0680d6e7953ca4c91699e60060c732f9ead5c1 Mon Sep 17 00:00:00 2001
From: Henry Wang <Henry.Wang@arm.com>
Date: Tue, 11 Oct 2022 15:09:32 +0200
-Subject: [PATCH 51/67] libxl, docs: Use arch-specific default paging memory
+Subject: [PATCH 051/126] libxl, docs: Use arch-specific default paging memory
The default paging memory (descibed in `shadow_memory` entry in xl
config) in libxl is used to determine the memory pool size for xl
@@ -143,5 +143,5 @@ index 18c3c77ccde3..4d66478fe9dd 100644
* Local variables:
* mode: C
--
-2.37.3
+2.37.4
diff --git a/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch b/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
index 77093a7..a17ad53 100644
--- a/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
+++ b/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
@@ -1,7 +1,7 @@
From 45336d8f88725aec65ee177b1b09abf6eef1dc8d Mon Sep 17 00:00:00 2001
From: Henry Wang <Henry.Wang@arm.com>
Date: Tue, 11 Oct 2022 15:09:58 +0200
-Subject: [PATCH 52/67] xen/arm: Construct the P2M pages pool for guests
+Subject: [PATCH 052/126] xen/arm: Construct the P2M pages pool for guests
This commit constructs the p2m pages pool for guests from the
data structure and helper perspective.
@@ -185,5 +185,5 @@ index 3a2d51b35d71..18675b234570 100644
{
write_lock(&p2m->lock);
--
-2.37.3
+2.37.4
diff --git a/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch b/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
index 52ce67c..c4e543d 100644
--- a/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
+++ b/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
@@ -1,7 +1,8 @@
From c5215044578e88b401a1296ed6302df05c113c5f Mon Sep 17 00:00:00 2001
From: Henry Wang <Henry.Wang@arm.com>
Date: Tue, 11 Oct 2022 15:10:16 +0200
-Subject: [PATCH 53/67] xen/arm, libxl: Implement XEN_DOMCTL_shadow_op for Arm
+Subject: [PATCH 053/126] xen/arm, libxl: Implement XEN_DOMCTL_shadow_op for
+ Arm
This commit implements the `XEN_DOMCTL_shadow_op` support in Xen
for Arm. The p2m pages pool size for xl guests is supposed to be
@@ -104,5 +105,5 @@ index a8c48b0beaab..a049bc7f3e52 100644
{
gfn_t s = _gfn(domctl->u.cacheflush.start_pfn);
--
-2.37.3
+2.37.4
diff --git a/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch b/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
index 3ef7019..78ce712 100644
--- a/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
+++ b/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
@@ -1,7 +1,8 @@
From 7ad38a39f08aadc1578bdb46ccabaad79ed0faee Mon Sep 17 00:00:00 2001
From: Henry Wang <Henry.Wang@arm.com>
Date: Tue, 11 Oct 2022 15:10:34 +0200
-Subject: [PATCH 54/67] xen/arm: Allocate and free P2M pages from the P2M pool
+Subject: [PATCH 054/126] xen/arm: Allocate and free P2M pages from the P2M
+ pool
This commit sets/tearsdown of p2m pages pool for non-privileged Arm
guests by calling `p2m_set_allocation` and `p2m_teardown_allocation`.
@@ -285,5 +286,5 @@ index 6883d8627702..c1055ff2a745 100644
if ( p2m->root )
free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
--
-2.37.3
+2.37.4
diff --git a/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch b/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
index be83ce5..5b8a7ea 100644
--- a/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
+++ b/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
@@ -1,8 +1,8 @@
From bb43a10fefe494ab747b020fef3e823b63fc566d Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 11 Oct 2022 15:11:01 +0200
-Subject: [PATCH 55/67] gnttab: correct locking on transitive grant copy error
- path
+Subject: [PATCH 055/126] gnttab: correct locking on transitive grant copy
+ error path
While the comment next to the lock dropping in preparation of
recursively calling acquire_grant_for_copy() mistakenly talks about the
@@ -62,5 +62,5 @@ index 77bba9806937..0523beb9b734 100644
*page = NULL;
return ERESTART;
--
-2.37.3
+2.37.4
diff --git a/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch b/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
index c5d2c9c..80a1923 100644
--- a/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
+++ b/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
@@ -1,7 +1,7 @@
From d65ebacb78901b695bc5e8a075ad1ad865a78928 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Tue, 11 Oct 2022 15:13:15 +0200
-Subject: [PATCH 56/67] tools/libxl: Replace deprecated -soundhw on QEMU
+Subject: [PATCH 056/126] tools/libxl: Replace deprecated -soundhw on QEMU
command line
-soundhw is deprecated since 825ff02911c9 ("audio: add soundhw
@@ -108,5 +108,5 @@ index 3593e21dbb64..caa08d3229cd 100644
+ (7, "sb16"),
+ ])
--
-2.37.3
+2.37.4
diff --git a/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch b/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
index 9b1cce8..2949fb0 100644
--- a/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
+++ b/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
@@ -1,7 +1,7 @@
From 7923ea47e578bca30a6e45951a9da09e827ff028 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 11 Oct 2022 15:14:05 +0200
-Subject: [PATCH 57/67] x86/CPUID: surface suitable value in EBX of XSTATE
+Subject: [PATCH 057/126] x86/CPUID: surface suitable value in EBX of XSTATE
subleaf 1
While the SDM isn't very clear about this, our present behavior make
@@ -40,5 +40,5 @@ index ee2c4ea03a89..11c95178f110 100644
/*
* TODO: Figure out what to do for XSS state. VT-x manages
--
-2.37.3
+2.37.4
diff --git a/0058-xen-sched-introduce-cpupool_update_node_affinity.patch b/0058-xen-sched-introduce-cpupool_update_node_affinity.patch
index c15edb8..c2cf0b8 100644
--- a/0058-xen-sched-introduce-cpupool_update_node_affinity.patch
+++ b/0058-xen-sched-introduce-cpupool_update_node_affinity.patch
@@ -1,7 +1,7 @@
From 735b10844489babf52d3193193285a7311cf2c39 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 11 Oct 2022 15:14:22 +0200
-Subject: [PATCH 58/67] xen/sched: introduce cpupool_update_node_affinity()
+Subject: [PATCH 058/126] xen/sched: introduce cpupool_update_node_affinity()
For updating the node affinities of all domains in a cpupool add a new
function cpupool_update_node_affinity().
@@ -253,5 +253,5 @@ index 701963f84cb8..4e25627d9685 100644
/*
* To be implemented by each architecture, sanity checking the configuration
--
-2.37.3
+2.37.4
diff --git a/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch b/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
index 587eef7..7e81f53 100644
--- a/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
+++ b/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
@@ -1,8 +1,8 @@
From d638c2085f71f694344b34e70eb1b371c86b00f0 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 11 Oct 2022 15:15:14 +0200
-Subject: [PATCH 59/67] xen/sched: carve out memory allocation and freeing from
- schedule_cpu_rm()
+Subject: [PATCH 059/126] xen/sched: carve out memory allocation and freeing
+ from schedule_cpu_rm()
In order to prepare not allocating or freeing memory from
schedule_cpu_rm(), move this functionality to dedicated functions.
@@ -259,5 +259,5 @@ index 6e036f8c8077..ff3185425219 100644
int sched_move_domain(struct domain *d, struct cpupool *c);
struct cpupool *cpupool_get_by_id(unsigned int poolid);
--
-2.37.3
+2.37.4
diff --git a/0060-xen-sched-fix-cpu-hotplug.patch b/0060-xen-sched-fix-cpu-hotplug.patch
index 3e158f4..264c8ef 100644
--- a/0060-xen-sched-fix-cpu-hotplug.patch
+++ b/0060-xen-sched-fix-cpu-hotplug.patch
@@ -1,7 +1,7 @@
From d17680808b4c8015e31070c971e1ee548170ae34 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 11 Oct 2022 15:15:41 +0200
-Subject: [PATCH 60/67] xen/sched: fix cpu hotplug
+Subject: [PATCH 060/126] xen/sched: fix cpu hotplug
Cpu unplugging is calling schedule_cpu_rm() via stop_machine_run() with
interrupts disabled, thus any memory allocation or freeing must be
@@ -303,5 +303,5 @@ index ff3185425219..3bab78ccb240 100644
struct cpupool *cpupool_get_by_id(unsigned int poolid);
void cpupool_put(struct cpupool *pool);
--
-2.37.3
+2.37.4
diff --git a/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch b/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
index 0f044b2..64144fe 100644
--- a/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
+++ b/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
@@ -1,7 +1,7 @@
From 19cf28b515f21da02df80e68f901ad7650daaa37 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 11 Oct 2022 15:15:55 +0200
-Subject: [PATCH 61/67] Config.mk: correct PIE-related option(s) in
+Subject: [PATCH 061/126] Config.mk: correct PIE-related option(s) in
EMBEDDED_EXTRA_CFLAGS
I haven't been able to find evidence of "-nopie" ever having been a
@@ -54,5 +54,5 @@ index 96d89b2f7dfc..9f87608f6602 100644
XEN_EXTFILES_URL ?= http://xenbits.xen.org/xen-extfiles
--
-2.37.3
+2.37.4
diff --git a/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch b/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
index 65882a9..c2299bf 100644
--- a/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
+++ b/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
@@ -1,7 +1,7 @@
From 182f8bb503b9dd3db5dd9118dc763d241787c6fc Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 11 Oct 2022 15:16:09 +0200
-Subject: [PATCH 62/67] tools/xenstore: minor fix of the migration stream doc
+Subject: [PATCH 062/126] tools/xenstore: minor fix of the migration stream doc
Drop mentioning the non-existent read-only socket in the migration
stream description document.
@@ -37,5 +37,5 @@ index 5f1155273ec3..78530bbb0ef4 100644
\pagebreak
--
-2.37.3
+2.37.4
diff --git a/0063-xen-gnttab-fix-gnttab_acquire_resource.patch b/0063-xen-gnttab-fix-gnttab_acquire_resource.patch
index 0d58157..9087ddb 100644
--- a/0063-xen-gnttab-fix-gnttab_acquire_resource.patch
+++ b/0063-xen-gnttab-fix-gnttab_acquire_resource.patch
@@ -1,7 +1,7 @@
From 3ac64b3751837a117ee3dfb3e2cc27057a83d0f7 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 11 Oct 2022 15:16:53 +0200
-Subject: [PATCH 63/67] xen/gnttab: fix gnttab_acquire_resource()
+Subject: [PATCH 063/126] xen/gnttab: fix gnttab_acquire_resource()
Commit 9dc46386d89d ("gnttab: work around "may be used uninitialized"
warning") was wrong, as vaddrs can legitimately be NULL in case
@@ -65,5 +65,5 @@ index 0523beb9b734..01e426c67fb6 100644
ASSERT_UNREACHABLE();
rc = -ENODATA;
--
-2.37.3
+2.37.4
diff --git a/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch b/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
index 4246b01..738df82 100644
--- a/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
+++ b/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
@@ -1,8 +1,8 @@
From 62e534d17cdd838828bfd75d3d845e31524dd336 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 11 Oct 2022 15:17:12 +0200
-Subject: [PATCH 64/67] x86: wire up VCPUOP_register_vcpu_time_memory_area for
- 32-bit guests
+Subject: [PATCH 064/126] x86: wire up VCPUOP_register_vcpu_time_memory_area
+ for 32-bit guests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -55,5 +55,5 @@ index c46dccc25a54..d51d99344796 100644
rc = arch_do_vcpu_op(cmd, v, arg);
break;
--
-2.37.3
+2.37.4
diff --git a/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch b/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
index df4fb38..84edf5d 100644
--- a/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
+++ b/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
@@ -1,7 +1,7 @@
From 9690bb261d5fa09cb281e1fa124d93db7b84fda5 Mon Sep 17 00:00:00 2001
From: Tamas K Lengyel <tamas.lengyel@intel.com>
Date: Tue, 11 Oct 2022 15:17:42 +0200
-Subject: [PATCH 65/67] x86/vpmu: Fix race-condition in vpmu_load
+Subject: [PATCH 065/126] x86/vpmu: Fix race-condition in vpmu_load
The vPMU code-bases attempts to perform an optimization on saving/reloading the
PMU context by keeping track of what vCPU ran on each pCPU. When a pCPU is
@@ -93,5 +93,5 @@ index fb1b296a6cc1..800eff87dc03 100644
if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
(!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
--
-2.37.3
+2.37.4
diff --git a/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch b/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch
index 24b9576..8578e02 100644
--- a/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch
+++ b/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch
@@ -1,7 +1,7 @@
From 0d233924d4b0f676056856096e8761205add3ee8 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 12 Oct 2022 17:31:44 +0200
-Subject: [PATCH 66/67] tools/tests: fix wrong backport of upstream commit
+Subject: [PATCH 066/126] tools/tests: fix wrong backport of upstream commit
52daa6a8483e4
The backport of upstream commit 52daa6a8483e4 had a bug, correct it.
@@ -27,5 +27,5 @@ index bf485baff2b4..51a8f4a000f6 100644
if ( res )
{
--
-2.37.3
+2.37.4
diff --git a/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch b/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch
index 309d486..6e75a84 100644
--- a/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch
+++ b/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch
@@ -1,7 +1,7 @@
From 816580afdd1730d4f85f64477a242a439af1cdf8 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Wed, 12 Oct 2022 17:33:40 +0200
-Subject: [PATCH 67/67] libxl/Arm: correct xc_shadow_control() invocation to
+Subject: [PATCH 067/126] libxl/Arm: correct xc_shadow_control() invocation to
fix build
The backport didn't adapt to the earlier function prototype taking more
@@ -38,5 +38,5 @@ index d21f614ed788..ba548befdd25 100644
}
--
-2.37.3
+2.37.4
diff --git a/0068-arm-p2m-Rework-p2m_init.patch b/0068-arm-p2m-Rework-p2m_init.patch
new file mode 100644
index 0000000..cc80d52
--- /dev/null
+++ b/0068-arm-p2m-Rework-p2m_init.patch
@@ -0,0 +1,88 @@
+From 6f948fd1929c01b82a119f03670cab38ffebb47e Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 25 Oct 2022 09:21:11 +0000
+Subject: [PATCH 068/126] arm/p2m: Rework p2m_init()
+
+p2m_init() is mostly trivial initialisation, but has two fallible operations
+which are on either side of the backpointer trigger for teardown to take
+actions.
+
+p2m_free_vmid() is idempotent with a failed p2m_alloc_vmid(), so rearrange
+p2m_init() to perform all trivial setup, then set the backpointer, then
+perform all fallible setup.
+
+This will simplify a future bugfix which needs to add a third fallible
+operation.
+
+No practical change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>
+(cherry picked from commit: 3783e583319fa1ce75e414d851f0fde191a14753)
+---
+ xen/arch/arm/p2m.c | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index c1055ff2a745..25eb1d84cbc1 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1733,7 +1733,7 @@ void p2m_final_teardown(struct domain *d)
+ int p2m_init(struct domain *d)
+ {
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+- int rc = 0;
++ int rc;
+ unsigned int cpu;
+
+ rwlock_init(&p2m->lock);
+@@ -1742,11 +1742,6 @@ int p2m_init(struct domain *d)
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist);
+
+ p2m->vmid = INVALID_VMID;
+-
+- rc = p2m_alloc_vmid(d);
+- if ( rc != 0 )
+- return rc;
+-
+ p2m->max_mapped_gfn = _gfn(0);
+ p2m->lowest_mapped_gfn = _gfn(ULONG_MAX);
+
+@@ -1762,8 +1757,6 @@ int p2m_init(struct domain *d)
+ p2m->clean_pte = is_iommu_enabled(d) &&
+ !iommu_has_feature(d, IOMMU_FEAT_COHERENT_WALK);
+
+- rc = p2m_alloc_table(d);
+-
+ /*
+ * Make sure that the type chosen to is able to store the an vCPU ID
+ * between 0 and the maximum of virtual CPUS supported as long as
+@@ -1776,13 +1769,20 @@ int p2m_init(struct domain *d)
+ p2m->last_vcpu_ran[cpu] = INVALID_VCPU_ID;
+
+ /*
+- * Besides getting a domain when we only have the p2m in hand,
+- * the back pointer to domain is also used in p2m_teardown()
+- * as an end-of-initialization indicator.
++ * "Trivial" initialisation is now complete. Set the backpointer so
++ * p2m_teardown() and friends know to do something.
+ */
+ p2m->domain = d;
+
+- return rc;
++ rc = p2m_alloc_vmid(d);
++ if ( rc )
++ return rc;
++
++ rc = p2m_alloc_table(d);
++ if ( rc )
++ return rc;
++
++ return 0;
+ }
+
+ /*
+--
+2.37.4
+
diff --git a/0069-xen-arm-p2m-Populate-pages-for-GICv2-mapping-in-p2m_.patch b/0069-xen-arm-p2m-Populate-pages-for-GICv2-mapping-in-p2m_.patch
new file mode 100644
index 0000000..67cdb7a
--- /dev/null
+++ b/0069-xen-arm-p2m-Populate-pages-for-GICv2-mapping-in-p2m_.patch
@@ -0,0 +1,169 @@
+From f8915cd5dbe0f51e9bb31a54fe40600b839dd707 Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 25 Oct 2022 09:21:12 +0000
+Subject: [PATCH 069/126] xen/arm: p2m: Populate pages for GICv2 mapping in
+ p2m_init()
+
+Hardware using GICv2 needs to create a P2M mapping of 8KB GICv2 area
+when the domain is created. Considering the worst case of page tables
+which requires 6 P2M pages as the two pages will be consecutive but not
+necessarily in the same L3 page table and keep a buffer, populate 16
+pages as the default value to the P2M pages pool in p2m_init() at the
+domain creation stage to satisfy the GICv2 requirement. For GICv3, the
+above-mentioned P2M mapping is not necessary, but since the allocated
+16 pages here would not be lost, hence populate these pages
+unconditionally.
+
+With the default 16 P2M pages populated, there would be a case that
+failures would happen in the domain creation with P2M pages already in
+use. To properly free the P2M for this case, firstly support the
+optionally preemption of p2m_teardown(), then call p2m_teardown() and
+p2m_set_allocation(d, 0, NULL) non-preemptively in p2m_final_teardown().
+As non-preemptive p2m_teardown() should only return 0, use a
+BUG_ON to confirm that.
+
+Since p2m_final_teardown() is called either after
+domain_relinquish_resources() where relinquish_p2m_mapping() has been
+called, or from failure path of domain_create()/arch_domain_create()
+where mappings that require p2m_put_l3_page() should never be created,
+relinquish_p2m_mapping() is not added in p2m_final_teardown(), add
+in-code comments to refer this.
+
+Fixes: cbea5a1149ca ("xen/arm: Allocate and free P2M pages from the P2M pool")
+Suggested-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>
+(cherry picked from commit: c7cff1188802646eaa38e918e5738da0e84949be)
+---
+ xen/arch/arm/domain.c | 2 +-
+ xen/arch/arm/p2m.c | 34 ++++++++++++++++++++++++++++++++--
+ xen/include/asm-arm/p2m.h | 14 ++++++++++----
+ 3 files changed, 43 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
+index a5ffd952ecd0..b11359b8cca3 100644
+--- a/xen/arch/arm/domain.c
++++ b/xen/arch/arm/domain.c
+@@ -1041,7 +1041,7 @@ int domain_relinquish_resources(struct domain *d)
+ return ret;
+
+ PROGRESS(p2m):
+- ret = p2m_teardown(d);
++ ret = p2m_teardown(d, true);
+ if ( ret )
+ return ret;
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 25eb1d84cbc1..f6012f2a538f 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1664,7 +1664,7 @@ static void p2m_free_vmid(struct domain *d)
+ spin_unlock(&vmid_alloc_lock);
+ }
+
+-int p2m_teardown(struct domain *d)
++int p2m_teardown(struct domain *d, bool allow_preemption)
+ {
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ unsigned long count = 0;
+@@ -1672,6 +1672,9 @@ int p2m_teardown(struct domain *d)
+ unsigned int i;
+ int rc = 0;
+
++ if ( page_list_empty(&p2m->pages) )
++ return 0;
++
+ p2m_write_lock(p2m);
+
+ /*
+@@ -1695,7 +1698,7 @@ int p2m_teardown(struct domain *d)
+ p2m_free_page(p2m->domain, pg);
+ count++;
+ /* Arbitrarily preempt every 512 iterations */
+- if ( !(count % 512) && hypercall_preempt_check() )
++ if ( allow_preemption && !(count % 512) && hypercall_preempt_check() )
+ {
+ rc = -ERESTART;
+ break;
+@@ -1715,7 +1718,20 @@ void p2m_final_teardown(struct domain *d)
+ if ( !p2m->domain )
+ return;
+
++ /*
++ * No need to call relinquish_p2m_mapping() here because
++ * p2m_final_teardown() is called either after domain_relinquish_resources()
++ * where relinquish_p2m_mapping() has been called, or from failure path of
++ * domain_create()/arch_domain_create() where mappings that require
++ * p2m_put_l3_page() should never be created. For the latter case, also see
++ * comment on top of the p2m_set_entry() for more info.
++ */
++
++ BUG_ON(p2m_teardown(d, false));
+ ASSERT(page_list_empty(&p2m->pages));
++
++ while ( p2m_teardown_allocation(d) == -ERESTART )
++ continue; /* No preemption support here */
+ ASSERT(page_list_empty(&d->arch.paging.p2m_freelist));
+
+ if ( p2m->root )
+@@ -1782,6 +1798,20 @@ int p2m_init(struct domain *d)
+ if ( rc )
+ return rc;
+
++ /*
++ * Hardware using GICv2 needs to create a P2M mapping of 8KB GICv2 area
++ * when the domain is created. Considering the worst case for page
++ * tables and keep a buffer, populate 16 pages to the P2M pages pool here.
++ * For GICv3, the above-mentioned P2M mapping is not necessary, but since
++ * the allocated 16 pages here would not be lost, hence populate these
++ * pages unconditionally.
++ */
++ spin_lock(&d->arch.paging.lock);
++ rc = p2m_set_allocation(d, 16, NULL);
++ spin_unlock(&d->arch.paging.lock);
++ if ( rc )
++ return rc;
++
+ return 0;
+ }
+
+diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
+index 18675b234570..ea7ca41d82b2 100644
+--- a/xen/include/asm-arm/p2m.h
++++ b/xen/include/asm-arm/p2m.h
+@@ -194,14 +194,18 @@ int p2m_init(struct domain *d);
+
+ /*
+ * The P2M resources are freed in two parts:
+- * - p2m_teardown() will be called when relinquish the resources. It
+- * will free large resources (e.g. intermediate page-tables) that
+- * requires preemption.
++ * - p2m_teardown() will be called preemptively when relinquish the
++ * resources, in which case it will free large resources (e.g. intermediate
++ * page-tables) that requires preemption.
+ * - p2m_final_teardown() will be called when domain struct is been
+ * freed. This *cannot* be preempted and therefore one small
+ * resources should be freed here.
++ * Note that p2m_final_teardown() will also call p2m_teardown(), to properly
++ * free the P2M when failures happen in the domain creation with P2M pages
++ * already in use. In this case p2m_teardown() is called non-preemptively and
++ * p2m_teardown() will always return 0.
+ */
+-int p2m_teardown(struct domain *d);
++int p2m_teardown(struct domain *d, bool allow_preemption);
+ void p2m_final_teardown(struct domain *d);
+
+ /*
+@@ -266,6 +270,8 @@ mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn,
+ /*
+ * Direct set a p2m entry: only for use by the P2M code.
+ * The P2M write lock should be taken.
++ * TODO: Add a check in __p2m_set_entry() to avoid creating a mapping in
++ * arch_domain_create() that requires p2m_put_l3_page() to be called.
+ */
+ int p2m_set_entry(struct p2m_domain *p2m,
+ gfn_t sgfn,
+--
+2.37.4
+
diff --git a/0070-VMX-correct-error-handling-in-vmx_create_vmcs.patch b/0070-VMX-correct-error-handling-in-vmx_create_vmcs.patch
new file mode 100644
index 0000000..4823c64
--- /dev/null
+++ b/0070-VMX-correct-error-handling-in-vmx_create_vmcs.patch
@@ -0,0 +1,38 @@
+From 3885fa42349c3c6f31f0e0eec3b4605dca7fdda9 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 31 Oct 2022 13:31:26 +0100
+Subject: [PATCH 070/126] VMX: correct error handling in vmx_create_vmcs()
+
+With the addition of vmx_add_msr() calls to construct_vmcs() there are
+now cases where simply freeing the VMCS isn't enough: The MSR bitmap
+page as well as one of the MSR area ones (if it's the 2nd vmx_add_msr()
+which fails) may also need freeing. Switch to using vmx_destroy_vmcs()
+instead.
+
+Fixes: 3bd36952dab6 ("x86/spec-ctrl: Introduce an option to control L1D_FLUSH for HVM HAP guests")
+Fixes: 53a570b28569 ("x86/spec-ctrl: Support IBPB-on-entry")
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+master commit: 448d28309f1a966bdc850aff1a637e0b79a03e43
+master date: 2022-10-12 17:57:56 +0200
+---
+ xen/arch/x86/hvm/vmx/vmcs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
+index dd817cee4e69..237b13459d4f 100644
+--- a/xen/arch/x86/hvm/vmx/vmcs.c
++++ b/xen/arch/x86/hvm/vmx/vmcs.c
+@@ -1831,7 +1831,7 @@ int vmx_create_vmcs(struct vcpu *v)
+
+ if ( (rc = construct_vmcs(v)) != 0 )
+ {
+- vmx_free_vmcs(vmx->vmcs_pa);
++ vmx_destroy_vmcs(v);
+ return rc;
+ }
+
+--
+2.37.4
+
diff --git a/0071-argo-Remove-reachable-ASSERT_UNREACHABLE.patch b/0071-argo-Remove-reachable-ASSERT_UNREACHABLE.patch
new file mode 100644
index 0000000..d1563bd
--- /dev/null
+++ b/0071-argo-Remove-reachable-ASSERT_UNREACHABLE.patch
@@ -0,0 +1,41 @@
+From 916668baf9252ac30260e3394278a098712c5d34 Mon Sep 17 00:00:00 2001
+From: Jason Andryuk <jandryuk@gmail.com>
+Date: Mon, 31 Oct 2022 13:32:59 +0100
+Subject: [PATCH 071/126] argo: Remove reachable ASSERT_UNREACHABLE
+
+I observed this ASSERT_UNREACHABLE in partner_rings_remove consistently
+trip. It was in OpenXT with the viptables patch applied.
+
+dom10 shuts down.
+dom7 is REJECTED sending to dom10.
+dom7 shuts down and this ASSERT trips for dom10.
+
+The argo_send_info has a domid, but there is no refcount taken on
+the domain. Therefore it's not appropriate to ASSERT that the domain
+can be looked up via domid. Replace with a debug message.
+
+Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
+Reviewed-by: Christopher Clark <christopher.w.clark@gmail.com>
+master commit: 197f612b77c5afe04e60df2100a855370d720ad7
+master date: 2022-10-14 14:45:41 +0100
+---
+ xen/common/argo.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/common/argo.c b/xen/common/argo.c
+index 49be715f638e..2b0d980d4bba 100644
+--- a/xen/common/argo.c
++++ b/xen/common/argo.c
+@@ -1299,7 +1299,8 @@ partner_rings_remove(struct domain *src_d)
+ ASSERT_UNREACHABLE();
+ }
+ else
+- ASSERT_UNREACHABLE();
++ argo_dprintk("%pd has entry for stale partner d%u\n",
++ src_d, send_info->id.domain_id);
+
+ if ( dst_d )
+ rcu_unlock_domain(dst_d);
+--
+2.37.4
+
diff --git a/0072-EFI-don-t-convert-memory-marked-for-runtime-use-to-o.patch b/0072-EFI-don-t-convert-memory-marked-for-runtime-use-to-o.patch
new file mode 100644
index 0000000..7993482
--- /dev/null
+++ b/0072-EFI-don-t-convert-memory-marked-for-runtime-use-to-o.patch
@@ -0,0 +1,64 @@
+From b833014293f3fa5a7c48756ce0c8c9f3e4a666ff Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 31 Oct 2022 13:33:29 +0100
+Subject: [PATCH 072/126] EFI: don't convert memory marked for runtime use to
+ ordinary RAM
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+efi_init_memory() in both relevant places is treating EFI_MEMORY_RUNTIME
+higher priority than the type of the range. To avoid accessing memory at
+runtime which was re-used for other purposes, make
+efi_arch_process_memory_map() follow suit. While in theory the same would
+apply to EfiACPIReclaimMemory, we don't actually "reclaim" or clobber
+that memory (converted to E820_ACPI on x86) there (and it would be a bug
+if the Dom0 kernel tried to reclaim the range, bypassing Xen's memory
+management, plus it would be at least bogus if it clobbered that space),
+hence that type's handling can be left alone.
+
+Fixes: bf6501a62e80 ("x86-64: EFI boot code")
+Fixes: facac0af87ef ("x86-64: EFI runtime code")
+Fixes: 6d70ea10d49f ("Add ARM EFI boot support")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+master commit: f324300c8347b6aa6f9c0b18e0a90bbf44011a9a
+master date: 2022-10-21 12:30:24 +0200
+---
+ xen/arch/arm/efi/efi-boot.h | 3 ++-
+ xen/arch/x86/efi/efi-boot.h | 4 +++-
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/arm/efi/efi-boot.h b/xen/arch/arm/efi/efi-boot.h
+index cf9c37153fea..37d7ebd59ae2 100644
+--- a/xen/arch/arm/efi/efi-boot.h
++++ b/xen/arch/arm/efi/efi-boot.h
+@@ -149,7 +149,8 @@ static EFI_STATUS __init efi_process_memory_map_bootinfo(EFI_MEMORY_DESCRIPTOR *
+
+ for ( Index = 0; Index < (mmap_size / desc_size); Index++ )
+ {
+- if ( desc_ptr->Attribute & EFI_MEMORY_WB &&
++ if ( !(desc_ptr->Attribute & EFI_MEMORY_RUNTIME) &&
++ (desc_ptr->Attribute & EFI_MEMORY_WB) &&
+ (desc_ptr->Type == EfiConventionalMemory ||
+ desc_ptr->Type == EfiLoaderCode ||
+ desc_ptr->Type == EfiLoaderData ||
+diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
+index 84fd77931456..3c3b3ab936f4 100644
+--- a/xen/arch/x86/efi/efi-boot.h
++++ b/xen/arch/x86/efi/efi-boot.h
+@@ -183,7 +183,9 @@ static void __init efi_arch_process_memory_map(EFI_SYSTEM_TABLE *SystemTable,
+ /* fall through */
+ case EfiLoaderCode:
+ case EfiLoaderData:
+- if ( desc->Attribute & EFI_MEMORY_WB )
++ if ( desc->Attribute & EFI_MEMORY_RUNTIME )
++ type = E820_RESERVED;
++ else if ( desc->Attribute & EFI_MEMORY_WB )
+ type = E820_RAM;
+ else
+ case EfiUnusableMemory:
+--
+2.37.4
+
diff --git a/0073-xen-sched-fix-race-in-RTDS-scheduler.patch b/0073-xen-sched-fix-race-in-RTDS-scheduler.patch
new file mode 100644
index 0000000..bb456ca
--- /dev/null
+++ b/0073-xen-sched-fix-race-in-RTDS-scheduler.patch
@@ -0,0 +1,42 @@
+From 1f679f084fef76810762ee69a584fc1b524be0b6 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Mon, 31 Oct 2022 13:33:59 +0100
+Subject: [PATCH 073/126] xen/sched: fix race in RTDS scheduler
+
+When a domain gets paused the unit runnable state can change to "not
+runnable" without the scheduling lock being involved. This means that
+a specific scheduler isn't involved in this change of runnable state.
+
+In the RTDS scheduler this can result in an inconsistency in case a
+unit is losing its "runnable" capability while the RTDS scheduler's
+scheduling function is active. RTDS will remove the unit from the run
+queue, but doesn't do so for the replenish queue, leading to hitting
+an ASSERT() in replq_insert() later when the domain is unpaused again.
+
+Fix that by removing the unit from the replenish queue as well in this
+case.
+
+Fixes: 7c7b407e7772 ("xen/sched: introduce unit_runnable_state()")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Dario Faggioli <dfaggioli@suse.com>
+master commit: 73c62927f64ecb48f27d06176befdf76b879f340
+master date: 2022-10-21 12:32:23 +0200
+---
+ xen/common/sched/rt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xen/common/sched/rt.c b/xen/common/sched/rt.c
+index c24cd2ac3200..ec2ca1bebc26 100644
+--- a/xen/common/sched/rt.c
++++ b/xen/common/sched/rt.c
+@@ -1087,6 +1087,7 @@ rt_schedule(const struct scheduler *ops, struct sched_unit *currunit,
+ else if ( !unit_runnable_state(snext->unit) )
+ {
+ q_remove(snext);
++ replq_remove(ops, snext);
+ snext = rt_unit(sched_idle_unit(sched_cpu));
+ }
+
+--
+2.37.4
+
diff --git a/0074-xen-sched-fix-restore_vcpu_affinity-by-removing-it.patch b/0074-xen-sched-fix-restore_vcpu_affinity-by-removing-it.patch
new file mode 100644
index 0000000..9085f67
--- /dev/null
+++ b/0074-xen-sched-fix-restore_vcpu_affinity-by-removing-it.patch
@@ -0,0 +1,158 @@
+From 9c5114696c6f7773b7f3691f27aaa7a0636c916d Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Mon, 31 Oct 2022 13:34:28 +0100
+Subject: [PATCH 074/126] xen/sched: fix restore_vcpu_affinity() by removing it
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When the system is coming up after having been suspended,
+restore_vcpu_affinity() is called for each domain in order to adjust
+the vcpu's affinity settings in case a cpu didn't come to live again.
+
+The way restore_vcpu_affinity() is doing that is wrong, because the
+specific scheduler isn't being informed about a possible migration of
+the vcpu to another cpu. Additionally the migration is often even
+happening if all cpus are running again, as it is done without check
+whether it is really needed.
+
+As cpupool management is already calling cpu_disable_scheduler() for
+cpus not having come up again, and cpu_disable_scheduler() is taking
+care of eventually needed vcpu migration in the proper way, there is
+simply no need for restore_vcpu_affinity().
+
+So just remove restore_vcpu_affinity() completely, together with the
+no longer used sched_reset_affinity_broken().
+
+Fixes: 8a04eaa8ea83 ("xen/sched: move some per-vcpu items to struct sched_unit")
+Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Dario Faggioli <dfaggioli@suse.com>
+Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+master commit: fce1f381f7388daaa3e96dbb0d67d7a3e4bb2d2d
+master date: 2022-10-24 11:16:27 +0100
+---
+ xen/arch/x86/acpi/power.c | 3 --
+ xen/common/sched/core.c | 78 ---------------------------------------
+ xen/include/xen/sched.h | 1 -
+ 3 files changed, 82 deletions(-)
+
+diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
+index dd397f713067..1a7baeebe6d0 100644
+--- a/xen/arch/x86/acpi/power.c
++++ b/xen/arch/x86/acpi/power.c
+@@ -159,10 +159,7 @@ static void thaw_domains(void)
+
+ rcu_read_lock(&domlist_read_lock);
+ for_each_domain ( d )
+- {
+- restore_vcpu_affinity(d);
+ domain_unpause(d);
+- }
+ rcu_read_unlock(&domlist_read_lock);
+ }
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 900aab8f66a7..9173cf690c72 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -1188,84 +1188,6 @@ static bool sched_check_affinity_broken(const struct sched_unit *unit)
+ return false;
+ }
+
+-static void sched_reset_affinity_broken(const struct sched_unit *unit)
+-{
+- struct vcpu *v;
+-
+- for_each_sched_unit_vcpu ( unit, v )
+- v->affinity_broken = false;
+-}
+-
+-void restore_vcpu_affinity(struct domain *d)
+-{
+- unsigned int cpu = smp_processor_id();
+- struct sched_unit *unit;
+-
+- ASSERT(system_state == SYS_STATE_resume);
+-
+- rcu_read_lock(&sched_res_rculock);
+-
+- for_each_sched_unit ( d, unit )
+- {
+- spinlock_t *lock;
+- unsigned int old_cpu = sched_unit_master(unit);
+- struct sched_resource *res;
+-
+- ASSERT(!unit_runnable(unit));
+-
+- /*
+- * Re-assign the initial processor as after resume we have no
+- * guarantee the old processor has come back to life again.
+- *
+- * Therefore, here, before actually unpausing the domains, we should
+- * set v->processor of each of their vCPUs to something that will
+- * make sense for the scheduler of the cpupool in which they are in.
+- */
+- lock = unit_schedule_lock_irq(unit);
+-
+- cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
+- cpupool_domain_master_cpumask(d));
+- if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
+- {
+- if ( sched_check_affinity_broken(unit) )
+- {
+- sched_set_affinity(unit, unit->cpu_hard_affinity_saved, NULL);
+- sched_reset_affinity_broken(unit);
+- cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
+- cpupool_domain_master_cpumask(d));
+- }
+-
+- if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
+- {
+- /* Affinity settings of one vcpu are for the complete unit. */
+- printk(XENLOG_DEBUG "Breaking affinity for %pv\n",
+- unit->vcpu_list);
+- sched_set_affinity(unit, &cpumask_all, NULL);
+- cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
+- cpupool_domain_master_cpumask(d));
+- }
+- }
+-
+- res = get_sched_res(cpumask_any(cpumask_scratch_cpu(cpu)));
+- sched_set_res(unit, res);
+-
+- spin_unlock_irq(lock);
+-
+- /* v->processor might have changed, so reacquire the lock. */
+- lock = unit_schedule_lock_irq(unit);
+- res = sched_pick_resource(unit_scheduler(unit), unit);
+- sched_set_res(unit, res);
+- spin_unlock_irq(lock);
+-
+- if ( old_cpu != sched_unit_master(unit) )
+- sched_move_irqs(unit);
+- }
+-
+- rcu_read_unlock(&sched_res_rculock);
+-
+- domain_update_node_affinity(d);
+-}
+-
+ /*
+ * This function is used by cpu_hotplug code via cpu notifier chain
+ * and from cpupools to switch schedulers on a cpu.
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 4e25627d9685..bb05d167ae0f 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -993,7 +993,6 @@ void vcpu_set_periodic_timer(struct vcpu *v, s_time_t value);
+ void sched_setup_dom0_vcpus(struct domain *d);
+ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
+ int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
+-void restore_vcpu_affinity(struct domain *d);
+ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
+ struct xen_domctl_vcpuaffinity *vcpuaff);
+
+--
+2.37.4
+
diff --git a/0075-x86-shadow-drop-replace-bogus-assertions.patch b/0075-x86-shadow-drop-replace-bogus-assertions.patch
new file mode 100644
index 0000000..183dc68
--- /dev/null
+++ b/0075-x86-shadow-drop-replace-bogus-assertions.patch
@@ -0,0 +1,71 @@
+From 08bc78b4eecaef33250038f7e484bdf01ea1017c Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 31 Oct 2022 13:35:06 +0100
+Subject: [PATCH 075/126] x86/shadow: drop (replace) bogus assertions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The addition of a call to shadow_blow_tables() from shadow_teardown()
+has resulted in the "no vcpus" related assertion becoming triggerable:
+If domain_create() fails with at least one page successfully allocated
+in the course of shadow_enable(), or if domain_create() succeeds and
+the domain is then killed without ever invoking XEN_DOMCTL_max_vcpus.
+Note that in-tree tests (test-resource and test-tsx) do exactly the
+latter of these two.
+
+The assertion's comment was bogus anyway: Shadow mode has been getting
+enabled before allocation of vCPU-s for quite some time. Convert the
+assertion to a conditional: As long as there are no vCPU-s, there's
+nothing to blow away.
+
+Fixes: e7aa55c0aab3 ("x86/p2m: free the paging memory pool preemptively")
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+A similar assertion/comment pair exists in _shadow_prealloc(); the
+comment is similarly bogus, and the assertion could in principle trigger
+e.g. when shadow_alloc_p2m_page() is called early enough. Replace those
+at the same time by a similar early return, here indicating failure to
+the caller (which will generally lead to the domain being crashed in
+shadow_prealloc()).
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: a92dc2bb30ba65ae25d2f417677eb7ef9a6a0fef
+master date: 2022-10-24 15:46:11 +0200
+---
+ xen/arch/x86/mm/shadow/common.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 8f7fddcee1e5..e36d49d1fcba 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -942,8 +942,9 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+ /* No reclaim when the domain is dying, teardown will take care of it. */
+ return false;
+
+- /* Shouldn't have enabled shadows if we've no vcpus. */
+- ASSERT(d->vcpu && d->vcpu[0]);
++ /* Nothing to reclaim when there are no vcpus yet. */
++ if ( !d->vcpu[0] )
++ return false;
+
+ /* Stage one: walk the list of pinned pages, unpinning them */
+ perfc_incr(shadow_prealloc_1);
+@@ -1033,8 +1034,9 @@ void shadow_blow_tables(struct domain *d)
+ mfn_t smfn;
+ int i;
+
+- /* Shouldn't have enabled shadows if we've no vcpus. */
+- ASSERT(d->vcpu && d->vcpu[0]);
++ /* Nothing to do when there are no vcpus yet. */
++ if ( !d->vcpu[0] )
++ return;
+
+ /* Pass one: unpin all pinned pages */
+ foreach_pinned_shadow(d, sp, t)
+--
+2.37.4
+
diff --git a/0076-vpci-don-t-assume-that-vpci-per-device-data-exists-u.patch b/0076-vpci-don-t-assume-that-vpci-per-device-data-exists-u.patch
new file mode 100644
index 0000000..0350771
--- /dev/null
+++ b/0076-vpci-don-t-assume-that-vpci-per-device-data-exists-u.patch
@@ -0,0 +1,61 @@
+From 6b035f4f5829eb213cb9fcbe83b5dfae05c857a6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 31 Oct 2022 13:35:33 +0100
+Subject: [PATCH 076/126] vpci: don't assume that vpci per-device data exists
+ unconditionally
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+It's possible for a device to be assigned to a domain but have no
+vpci structure if vpci_process_pending() failed and called
+vpci_remove_device() as a result. The unconditional accesses done by
+vpci_{read,write}() and vpci_remove_device() to pdev->vpci would
+then trigger a NULL pointer dereference.
+
+Add checks for pdev->vpci presence in the affected functions.
+
+Fixes: 9c244fdef7 ('vpci: add header handlers')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 6ccb5e308ceeb895fbccd87a528a8bd24325aa39
+master date: 2022-10-26 14:55:30 +0200
+---
+ xen/drivers/vpci/vpci.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
+index a27c9e600df1..6b90e4fa32dc 100644
+--- a/xen/drivers/vpci/vpci.c
++++ b/xen/drivers/vpci/vpci.c
+@@ -37,6 +37,9 @@ extern vpci_register_init_t *const __end_vpci_array[];
+
+ void vpci_remove_device(struct pci_dev *pdev)
+ {
++ if ( !pdev->vpci )
++ return;
++
+ spin_lock(&pdev->vpci->lock);
+ while ( !list_empty(&pdev->vpci->handlers) )
+ {
+@@ -320,7 +323,7 @@ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
+
+ /* Find the PCI dev matching the address. */
+ pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
+- if ( !pdev )
++ if ( !pdev || !pdev->vpci )
+ return vpci_read_hw(sbdf, reg, size);
+
+ spin_lock(&pdev->vpci->lock);
+@@ -430,7 +433,7 @@ void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size,
+ * Passthrough everything that's not trapped.
+ */
+ pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
+- if ( !pdev )
++ if ( !pdev || !pdev->vpci )
+ {
+ vpci_write_hw(sbdf, reg, size, data);
+ return;
+--
+2.37.4
+
diff --git a/0077-vpci-msix-remove-from-table-list-on-detach.patch b/0077-vpci-msix-remove-from-table-list-on-detach.patch
new file mode 100644
index 0000000..2e60831
--- /dev/null
+++ b/0077-vpci-msix-remove-from-table-list-on-detach.patch
@@ -0,0 +1,47 @@
+From bff4c4457950abb498270d921d728f654876f944 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Mon, 31 Oct 2022 13:35:59 +0100
+Subject: [PATCH 077/126] vpci/msix: remove from table list on detach
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Teardown of MSIX vPCI related data doesn't currently remove the MSIX
+device data from the list of MSIX tables handled by the domain,
+leading to a use-after-free of the data in the msix structure.
+
+Remove the structure from the list before freeing in order to solve
+it.
+
+Reported-by: Jan Beulich <jbeulich@suse.com>
+Fixes: d6281be9d0 ('vpci/msix: add MSI-X handlers')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: c14aea137eab29eb9c30bfad745a00c65ad21066
+master date: 2022-10-26 14:56:58 +0200
+---
+ xen/drivers/vpci/vpci.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
+index 6b90e4fa32dc..75edbbee4025 100644
+--- a/xen/drivers/vpci/vpci.c
++++ b/xen/drivers/vpci/vpci.c
+@@ -51,8 +51,12 @@ void vpci_remove_device(struct pci_dev *pdev)
+ xfree(r);
+ }
+ spin_unlock(&pdev->vpci->lock);
+- if ( pdev->vpci->msix && pdev->vpci->msix->pba )
+- iounmap(pdev->vpci->msix->pba);
++ if ( pdev->vpci->msix )
++ {
++ list_del(&pdev->vpci->msix->next);
++ if ( pdev->vpci->msix->pba )
++ iounmap(pdev->vpci->msix->pba);
++ }
+ xfree(pdev->vpci->msix);
+ xfree(pdev->vpci->msi);
+ xfree(pdev->vpci);
+--
+2.37.4
+
diff --git a/0078-x86-also-zap-secondary-time-area-handles-during-soft.patch b/0078-x86-also-zap-secondary-time-area-handles-during-soft.patch
new file mode 100644
index 0000000..e3db6ad
--- /dev/null
+++ b/0078-x86-also-zap-secondary-time-area-handles-during-soft.patch
@@ -0,0 +1,49 @@
+From 9b8b65c827169eca2d0e500150009ac0f857d455 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 31 Oct 2022 13:36:25 +0100
+Subject: [PATCH 078/126] x86: also zap secondary time area handles during soft
+ reset
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Just like domain_soft_reset() properly zaps runstate area handles, the
+secondary time area ones also need discarding to prevent guest memory
+corruption once the guest is re-started.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: b80d4f8d2ea6418e32fb4f20d1304ace6d6566e3
+master date: 2022-10-27 11:49:09 +0200
+---
+ xen/arch/x86/domain.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index ce6ddcf31397..e9b8ed4c96c2 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -927,6 +927,7 @@ int arch_domain_soft_reset(struct domain *d)
+ struct page_info *page = virt_to_page(d->shared_info), *new_page;
+ int ret = 0;
+ struct domain *owner;
++ struct vcpu *v;
+ mfn_t mfn;
+ gfn_t gfn;
+ p2m_type_t p2mt;
+@@ -1006,7 +1007,12 @@ int arch_domain_soft_reset(struct domain *d)
+ "Failed to add a page to replace %pd's shared_info frame %"PRI_gfn"\n",
+ d, gfn_x(gfn));
+ free_domheap_page(new_page);
++ goto exit_put_gfn;
+ }
++
++ for_each_vcpu ( d, v )
++ set_xen_guest_handle(v->arch.time_info_guest, NULL);
++
+ exit_put_gfn:
+ put_gfn(d, gfn_x(gfn));
+ exit_put_page:
+--
+2.37.4
+
diff --git a/0079-common-map_vcpu_info-wants-to-unshare-the-underlying.patch b/0079-common-map_vcpu_info-wants-to-unshare-the-underlying.patch
new file mode 100644
index 0000000..2944a80
--- /dev/null
+++ b/0079-common-map_vcpu_info-wants-to-unshare-the-underlying.patch
@@ -0,0 +1,41 @@
+From 317894fa6a067a7903199bc5c1e3e06a0436caf8 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 31 Oct 2022 13:36:50 +0100
+Subject: [PATCH 079/126] common: map_vcpu_info() wants to unshare the
+ underlying page
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Not passing P2M_UNSHARE to get_page_from_gfn() means there won't even be
+an attempt to unshare the referenced page, without any indication to the
+caller (e.g. -EAGAIN). Note that guests have no direct control over
+which of their pages are shared (or paged out), and hence they have no
+way to make sure all on their own that the subsequent obtaining of a
+writable type reference can actually succeed.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+master commit: 48980cf24d5cf41fd644600f99c753419505e735
+master date: 2022-10-28 11:38:32 +0200
+---
+ xen/common/domain.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/common/domain.c b/xen/common/domain.c
+index 17cc32fde373..0fb7f9a6225c 100644
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -1454,7 +1454,7 @@ int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
+ if ( (v != current) && !(v->pause_flags & VPF_down) )
+ return -EINVAL;
+
+- page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
++ page = get_page_from_gfn(d, gfn, NULL, P2M_UNSHARE);
+ if ( !page )
+ return -EINVAL;
+
+--
+2.37.4
+
diff --git a/0080-x86-pv-shim-correctly-ignore-empty-onlining-requests.patch b/0080-x86-pv-shim-correctly-ignore-empty-onlining-requests.patch
new file mode 100644
index 0000000..31aa812
--- /dev/null
+++ b/0080-x86-pv-shim-correctly-ignore-empty-onlining-requests.patch
@@ -0,0 +1,43 @@
+From a46f01fad17173afe3809ac1980cbe4b67a9a8b5 Mon Sep 17 00:00:00 2001
+From: Igor Druzhinin <igor.druzhinin@citrix.com>
+Date: Mon, 31 Oct 2022 13:37:17 +0100
+Subject: [PATCH 080/126] x86/pv-shim: correctly ignore empty onlining requests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Mem-op requests may have zero extents. Such requests need treating as
+no-ops. pv_shim_online_memory(), however, would have tried to take 2³²-1
+order-sized pages from its balloon list (to then populate them),
+typically ending when the entire set of ballooned pages of this order
+was consumed.
+
+Note that pv_shim_offline_memory() does not have such an issue.
+
+Fixes: b2245acc60c3 ("xen/pvshim: memory hotplug")
+Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 9272225ca72801fd9fa5b268a2d1c5adebd19cd9
+master date: 2022-10-28 15:47:59 +0200
+---
+ xen/arch/x86/pv/shim.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index b4e83e077891..104357e2c398 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -922,6 +922,9 @@ void pv_shim_online_memory(unsigned int nr, unsigned int order)
+ struct page_info *page, *tmp;
+ PAGE_LIST_HEAD(list);
+
++ if ( !nr )
++ return;
++
+ spin_lock(&balloon_lock);
+ page_list_for_each_safe ( page, tmp, &balloon )
+ {
+--
+2.37.4
+
diff --git a/0081-x86-pv-shim-correct-ballooning-up-for-compat-guests.patch b/0081-x86-pv-shim-correct-ballooning-up-for-compat-guests.patch
new file mode 100644
index 0000000..cd97334
--- /dev/null
+++ b/0081-x86-pv-shim-correct-ballooning-up-for-compat-guests.patch
@@ -0,0 +1,55 @@
+From b68e3fda8a76fb3ab582b5633727ac5545e4e8b9 Mon Sep 17 00:00:00 2001
+From: Igor Druzhinin <igor.druzhinin@citrix.com>
+Date: Mon, 31 Oct 2022 13:37:42 +0100
+Subject: [PATCH 081/126] x86/pv-shim: correct ballooning up for compat guests
+
+The compat layer for multi-extent memory ops may need to split incoming
+requests. Since the guest handles in the interface structures may not be
+altered, it does so by leveraging do_memory_op()'s continuation
+handling: It hands on non-initial requests with a non-zero start extent,
+with the (native) handle suitably adjusted down. As a result
+do_memory_op() sees only the first of potentially several requests with
+start extent being zero. It's only that case when the function would
+issue a call to pv_shim_online_memory(), yet the range then covers only
+the first sub-range that results from the split.
+
+Address that breakage by making a complementary call to
+pv_shim_online_memory() in compat layer.
+
+Fixes: b2245acc60c3 ("xen/pvshim: memory hotplug")
+Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: a0bfdd201ea12aa5679bb8944d63a4e0d3c23160
+master date: 2022-10-28 15:48:50 +0200
+---
+ xen/common/compat/memory.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c
+index c43fa97cf15f..a0e0562a4033 100644
+--- a/xen/common/compat/memory.c
++++ b/xen/common/compat/memory.c
+@@ -7,6 +7,7 @@ EMIT_FILE;
+ #include <xen/event.h>
+ #include <xen/mem_access.h>
+ #include <asm/current.h>
++#include <asm/guest.h>
+ #include <compat/memory.h>
+
+ #define xen_domid_t domid_t
+@@ -146,7 +147,10 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
+ nat.rsrv->nr_extents = end_extent;
+ ++split;
+ }
+-
++ /* Avoid calling pv_shim_online_memory() when in a continuation. */
++ if ( pv_shim && op != XENMEM_decrease_reservation && !start_extent )
++ pv_shim_online_memory(cmp.rsrv.nr_extents - nat.rsrv->nr_extents,
++ cmp.rsrv.extent_order);
+ break;
+
+ case XENMEM_exchange:
+--
+2.37.4
+
diff --git a/0082-x86-pv-shim-correct-ballooning-down-for-compat-guest.patch b/0082-x86-pv-shim-correct-ballooning-down-for-compat-guest.patch
new file mode 100644
index 0000000..a6d895f
--- /dev/null
+++ b/0082-x86-pv-shim-correct-ballooning-down-for-compat-guest.patch
@@ -0,0 +1,73 @@
+From ddab5b1e001366258c0bfc7d5995b9d548e6042b Mon Sep 17 00:00:00 2001
+From: Igor Druzhinin <igor.druzhinin@citrix.com>
+Date: Mon, 31 Oct 2022 13:38:05 +0100
+Subject: [PATCH 082/126] x86/pv-shim: correct ballooning down for compat
+ guests
+
+The compat layer for multi-extent memory ops may need to split incoming
+requests. Since the guest handles in the interface structures may not be
+altered, it does so by leveraging do_memory_op()'s continuation
+handling: It hands on non-initial requests with a non-zero start extent,
+with the (native) handle suitably adjusted down. As a result
+do_memory_op() sees only the first of potentially several requests with
+start extent being zero. In order to be usable as overall result, the
+function accumulates args.nr_done, i.e. it initialized the field with
+the start extent. Therefore non-initial requests resulting from the
+split would pass too large a number into pv_shim_offline_memory().
+
+Address that breakage by always calling pv_shim_offline_memory()
+regardless of current hypercall preemption status, with a suitably
+adjusted first argument. Note that this is correct also for the native
+guest case: We now simply "commit" what was completed right away, rather
+than at the end of a series of preemption/re-start cycles. In fact this
+improves overall preemption behavior: There's no longer a potentially
+big chunk of work done non-preemptively at the end of the last
+"iteration".
+
+Fixes: b2245acc60c3 ("xen/pvshim: memory hotplug")
+Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 1d7fbc535d1d37bdc2cc53ede360b0f6651f7de1
+master date: 2022-10-28 15:49:33 +0200
+---
+ xen/common/memory.c | 19 +++++++------------
+ 1 file changed, 7 insertions(+), 12 deletions(-)
+
+diff --git a/xen/common/memory.c b/xen/common/memory.c
+index 95b2b934e4a2..a958d94ac3cd 100644
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -1407,22 +1407,17 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
+
+ rc = args.nr_done;
+
+- if ( args.preempted )
+- return hypercall_create_continuation(
+- __HYPERVISOR_memory_op, "lh",
+- op | (rc << MEMOP_EXTENT_SHIFT), arg);
+-
+ #ifdef CONFIG_X86
+ if ( pv_shim && op == XENMEM_decrease_reservation )
+- /*
+- * Only call pv_shim_offline_memory when the hypercall has
+- * finished. Note that nr_done is used to cope in case the
+- * hypercall has failed and only part of the extents where
+- * processed.
+- */
+- pv_shim_offline_memory(args.nr_done, args.extent_order);
++ pv_shim_offline_memory(args.nr_done - start_extent,
++ args.extent_order);
+ #endif
+
++ if ( args.preempted )
++ return hypercall_create_continuation(
++ __HYPERVISOR_memory_op, "lh",
++ op | (rc << MEMOP_EXTENT_SHIFT), arg);
++
+ break;
+
+ case XENMEM_exchange:
+--
+2.37.4
+
diff --git a/0083-tools-xenstore-create_node-Don-t-defer-work-to-undo-.patch b/0083-tools-xenstore-create_node-Don-t-defer-work-to-undo-.patch
new file mode 100644
index 0000000..5204b3f
--- /dev/null
+++ b/0083-tools-xenstore-create_node-Don-t-defer-work-to-undo-.patch
@@ -0,0 +1,120 @@
+From ee03d9b56e6141422b4ef2444f93cf2e88e6a26c Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 13 Sep 2022 07:35:06 +0200
+Subject: [PATCH 083/126] tools/xenstore: create_node: Don't defer work to undo
+ any changes on failure
+
+XSA-115 extended destroy_node() to update the node accounting for the
+connection. The implementation is assuming the connection is the parent
+of the node, however all the nodes are allocated using a separate context
+(see process_message()). This will result to crash (or corrupt) xenstored
+as the pointer is wrongly used.
+
+In case of an error, any changes to the database or update to the
+accounting will now be reverted in create_node() by calling directly
+destroy_node(). This has the nice advantage to remove the loop to unset
+the destructors in case of success.
+
+Take the opportunity to free the nodes right now as they are not
+going to be reachable (the function returns NULL) and are just wasting
+resources.
+
+This is XSA-414 / CVE-2022-42309.
+
+Fixes: 0bfb2101f243 ("tools/xenstore: fix node accounting after failed node creation")
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+(cherry picked from commit 1cd3cc7ea27cda7640a8d895e09617b61c265697)
+---
+ tools/xenstore/xenstored_core.c | 47 ++++++++++++++++++++++-----------
+ 1 file changed, 32 insertions(+), 15 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 9172dd767140..a00c49e404a1 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1054,9 +1054,8 @@ nomem:
+ return NULL;
+ }
+
+-static int destroy_node(void *_node)
++static int destroy_node(struct connection *conn, struct node *node)
+ {
+- struct node *node = _node;
+ TDB_DATA key;
+
+ if (streq(node->name, "/"))
+@@ -1065,7 +1064,7 @@ static int destroy_node(void *_node)
+ set_tdb_key(node->name, &key);
+ tdb_delete(tdb_ctx, key);
+
+- domain_entry_dec(talloc_parent(node), node);
++ domain_entry_dec(conn, node);
+
+ return 0;
+ }
+@@ -1074,7 +1073,8 @@ static struct node *create_node(struct connection *conn, const void *ctx,
+ const char *name,
+ void *data, unsigned int datalen)
+ {
+- struct node *node, *i;
++ struct node *node, *i, *j;
++ int ret;
+
+ node = construct_node(conn, ctx, name);
+ if (!node)
+@@ -1096,23 +1096,40 @@ static struct node *create_node(struct connection *conn, const void *ctx,
+ /* i->parent is set for each new node, so check quota. */
+ if (i->parent &&
+ domain_entry(conn) >= quota_nb_entry_per_domain) {
+- errno = ENOSPC;
+- return NULL;
++ ret = ENOSPC;
++ goto err;
+ }
+- if (write_node(conn, i, false))
+- return NULL;
+
+- /* Account for new node, set destructor for error case. */
+- if (i->parent) {
++ ret = write_node(conn, i, false);
++ if (ret)
++ goto err;
++
++ /* Account for new node */
++ if (i->parent)
+ domain_entry_inc(conn, i);
+- talloc_set_destructor(i, destroy_node);
+- }
+ }
+
+- /* OK, now remove destructors so they stay around */
+- for (i = node; i->parent; i = i->parent)
+- talloc_set_destructor(i, NULL);
+ return node;
++
++err:
++ /*
++ * We failed to update TDB for some of the nodes. Undo any work that
++ * have already been done.
++ */
++ for (j = node; j != i; j = j->parent)
++ destroy_node(conn, j);
++
++ /* We don't need to keep the nodes around, so free them. */
++ i = node;
++ while (i) {
++ j = i;
++ i = i->parent;
++ talloc_free(j);
++ }
++
++ errno = ret;
++
++ return NULL;
+ }
+
+ /* path, data... */
+--
+2.37.4
+
diff --git a/0084-tools-xenstore-Fail-a-transaction-if-it-is-not-possi.patch b/0084-tools-xenstore-Fail-a-transaction-if-it-is-not-possi.patch
new file mode 100644
index 0000000..05936ea
--- /dev/null
+++ b/0084-tools-xenstore-Fail-a-transaction-if-it-is-not-possi.patch
@@ -0,0 +1,145 @@
+From 579e7334b909c22efc65c5df22e8afe414882154 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 13 Sep 2022 07:35:06 +0200
+Subject: [PATCH 084/126] tools/xenstore: Fail a transaction if it is not
+ possible to create a node
+
+Commit f2bebf72c4d5 "xenstore: rework of transaction handling" moved
+out from copying the entire database everytime a new transaction is
+opened to track the list of nodes changed.
+
+The content of all the nodes accessed during a transaction will be
+temporarily stored in TDB using a different key.
+
+The function create_node() may write/update multiple nodes if the child
+doesn't exist. In case of a failure, the function will revert any
+changes (this include any update to TDB). Unfortunately, the function
+which reverts the changes (i.e. destroy_node()) will not use the correct
+key to delete any update or even request the transaction to fail.
+
+This means that if a client decide to go ahead with committing the
+transaction, orphan nodes will be created because they were not linked
+to an existing node (create_node() will write the nodes backwards).
+
+Once some nodes have been partially updated in a transaction, it is not
+easily possible to undo any changes. So rather than continuing and hit
+weird issue while committing, it is much saner to fail the transaction.
+
+This will have an impact on any client that decides to commit even if it
+can't write a node. Although, it is not clear why a normal client would
+want to do that...
+
+Lastly, update destroy_node() to use the correct key for deleting the
+node. Rather than recreating it (this will allocate memory and
+therefore fail), stash the key in the structure node.
+
+This is XSA-415 / CVE-2022-42310.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+(cherry picked from commit 5d71766bd1a4a3a8b2fe952ca2be80e02fe48f34)
+---
+ tools/xenstore/xenstored_core.c | 23 +++++++++++++++--------
+ tools/xenstore/xenstored_core.h | 2 ++
+ tools/xenstore/xenstored_transaction.c | 5 +++++
+ tools/xenstore/xenstored_transaction.h | 3 +++
+ 4 files changed, 25 insertions(+), 8 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index a00c49e404a1..b28c2c66b53b 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -531,15 +531,17 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+ return 0;
+ }
+
++/*
++ * Write the node. If the node is written, caller can find the key used in
++ * node->key. This can later be used if the change needs to be reverted.
++ */
+ static int write_node(struct connection *conn, struct node *node,
+ bool no_quota_check)
+ {
+- TDB_DATA key;
+-
+- if (access_node(conn, node, NODE_ACCESS_WRITE, &key))
++ if (access_node(conn, node, NODE_ACCESS_WRITE, &node->key))
+ return errno;
+
+- return write_node_raw(conn, &key, node, no_quota_check);
++ return write_node_raw(conn, &node->key, node, no_quota_check);
+ }
+
+ enum xs_perm_type perm_for_conn(struct connection *conn,
+@@ -1056,16 +1058,21 @@ nomem:
+
+ static int destroy_node(struct connection *conn, struct node *node)
+ {
+- TDB_DATA key;
+-
+ if (streq(node->name, "/"))
+ corrupt(NULL, "Destroying root node!");
+
+- set_tdb_key(node->name, &key);
+- tdb_delete(tdb_ctx, key);
++ tdb_delete(tdb_ctx, node->key);
+
+ domain_entry_dec(conn, node);
+
++ /*
++ * It is not possible to easily revert the changes in a transaction.
++ * So if the failure happens in a transaction, mark it as fail to
++ * prevent any commit.
++ */
++ if ( conn->transaction )
++ fail_transaction(conn->transaction);
++
+ return 0;
+ }
+
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 0c9a0961b57e..900336afa426 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -148,6 +148,8 @@ struct node_perms {
+
+ struct node {
+ const char *name;
++ /* Key used to update TDB */
++ TDB_DATA key;
+
+ /* Parent (optional) */
+ struct node *parent;
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index cd07fb0f218b..faf6c930e42a 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -580,6 +580,11 @@ void transaction_entry_dec(struct transaction *trans, unsigned int domid)
+ list_add_tail(&d->list, &trans->changed_domains);
+ }
+
++void fail_transaction(struct transaction *trans)
++{
++ trans->fail = true;
++}
++
+ void conn_delete_all_transactions(struct connection *conn)
+ {
+ struct transaction *trans;
+diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h
+index 43a162bea3f3..14062730e3c9 100644
+--- a/tools/xenstore/xenstored_transaction.h
++++ b/tools/xenstore/xenstored_transaction.h
+@@ -46,6 +46,9 @@ int access_node(struct connection *conn, struct node *node,
+ int transaction_prepend(struct connection *conn, const char *name,
+ TDB_DATA *key);
+
++/* Mark the transaction as failed. This will prevent it to be committed. */
++void fail_transaction(struct transaction *trans);
++
+ void conn_delete_all_transactions(struct connection *conn);
+ int check_transactions(struct hashtable *hash);
+
+--
+2.37.4
+
diff --git a/0085-tools-xenstore-split-up-send_reply.patch b/0085-tools-xenstore-split-up-send_reply.patch
new file mode 100644
index 0000000..7420f93
--- /dev/null
+++ b/0085-tools-xenstore-split-up-send_reply.patch
@@ -0,0 +1,213 @@
+From 0d8bea403d4d1763dddb0c1c81d30efebafb6962 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:07 +0200
+Subject: [PATCH 085/126] tools/xenstore: split up send_reply()
+
+Today send_reply() is used for both, normal request replies and watch
+events.
+
+Split it up into send_reply() and send_event(). This will be used to
+add some event specific handling.
+
+add_event() can be merged into send_event(), removing the need for an
+intermediate memory allocation.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 9bfde319dbac2a1321898d2f75a3f075c3eb7b32)
+---
+ tools/xenstore/xenstored_core.c | 74 +++++++++++++++++++-------------
+ tools/xenstore/xenstored_core.h | 1 +
+ tools/xenstore/xenstored_watch.c | 39 +++--------------
+ 3 files changed, 52 insertions(+), 62 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index b28c2c66b53b..01d4a2e440ec 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -733,49 +733,32 @@ static void send_error(struct connection *conn, int error)
+ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len)
+ {
+- struct buffered_data *bdata;
++ struct buffered_data *bdata = conn->in;
++
++ assert(type != XS_WATCH_EVENT);
+
+ if ( len > XENSTORE_PAYLOAD_MAX ) {
+ send_error(conn, E2BIG);
+ return;
+ }
+
+- /* Replies reuse the request buffer, events need a new one. */
+- if (type != XS_WATCH_EVENT) {
+- bdata = conn->in;
+- /* Drop asynchronous responses, e.g. errors for watch events. */
+- if (!bdata)
+- return;
+- bdata->inhdr = true;
+- bdata->used = 0;
+- conn->in = NULL;
+- } else {
+- /* Message is a child of the connection for auto-cleanup. */
+- bdata = new_buffer(conn);
++ if (!bdata)
++ return;
++ bdata->inhdr = true;
++ bdata->used = 0;
+
+- /*
+- * Allocation failure here is unfortunate: we have no way to
+- * tell anybody about it.
+- */
+- if (!bdata)
+- return;
+- }
+ if (len <= DEFAULT_BUFFER_SIZE)
+ bdata->buffer = bdata->default_buffer;
+- else
++ else {
+ bdata->buffer = talloc_array(bdata, char, len);
+- if (!bdata->buffer) {
+- if (type == XS_WATCH_EVENT) {
+- /* Same as above: no way to tell someone. */
+- talloc_free(bdata);
++ if (!bdata->buffer) {
++ send_error(conn, ENOMEM);
+ return;
+ }
+- /* re-establish request buffer for sending ENOMEM. */
+- conn->in = bdata;
+- send_error(conn, ENOMEM);
+- return;
+ }
+
++ conn->in = NULL;
++
+ /* Update relevant header fields and fill in the message body. */
+ bdata->hdr.msg.type = type;
+ bdata->hdr.msg.len = len;
+@@ -783,8 +766,39 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
++}
+
+- return;
++/*
++ * Send a watch event.
++ * As this is not directly related to the current command, errors can't be
++ * reported.
++ */
++void send_event(struct connection *conn, const char *path, const char *token)
++{
++ struct buffered_data *bdata;
++ unsigned int len;
++
++ len = strlen(path) + 1 + strlen(token) + 1;
++ /* Don't try to send over-long events. */
++ if (len > XENSTORE_PAYLOAD_MAX)
++ return;
++
++ bdata = new_buffer(conn);
++ if (!bdata)
++ return;
++
++ bdata->buffer = talloc_array(bdata, char, len);
++ if (!bdata->buffer) {
++ talloc_free(bdata);
++ return;
++ }
++ strcpy(bdata->buffer, path);
++ strcpy(bdata->buffer + strlen(path) + 1, token);
++ bdata->hdr.msg.type = XS_WATCH_EVENT;
++ bdata->hdr.msg.len = len;
++
++ /* Queue for later transmission. */
++ list_add_tail(&bdata->list, &conn->out_list);
+ }
+
+ /* Some routines (write, mkdir, etc) just need a non-error return */
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 900336afa426..38d97fa081a6 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -180,6 +180,7 @@ unsigned int get_string(const struct buffered_data *data, unsigned int offset);
+
+ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len);
++void send_event(struct connection *conn, const char *path, const char *token);
+
+ /* Some routines (write, mkdir, etc) just need a non-error return */
+ void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
+diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
+index db89e0141fce..a116f967dc66 100644
+--- a/tools/xenstore/xenstored_watch.c
++++ b/tools/xenstore/xenstored_watch.c
+@@ -85,35 +85,6 @@ static const char *get_watch_path(const struct watch *watch, const char *name)
+ return path;
+ }
+
+-/*
+- * Send a watch event.
+- * Temporary memory allocations are done with ctx.
+- */
+-static void add_event(struct connection *conn,
+- const void *ctx,
+- struct watch *watch,
+- const char *name)
+-{
+- /* Data to send (node\0token\0). */
+- unsigned int len;
+- char *data;
+-
+- name = get_watch_path(watch, name);
+-
+- len = strlen(name) + 1 + strlen(watch->token) + 1;
+- /* Don't try to send over-long events. */
+- if (len > XENSTORE_PAYLOAD_MAX)
+- return;
+-
+- data = talloc_array(ctx, char, len);
+- if (!data)
+- return;
+- strcpy(data, name);
+- strcpy(data + strlen(name) + 1, watch->token);
+- send_reply(conn, XS_WATCH_EVENT, data, len);
+- talloc_free(data);
+-}
+-
+ /*
+ * Check permissions of a specific watch to fire:
+ * Either the node itself or its parent have to be readable by the connection
+@@ -190,10 +161,14 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
+ list_for_each_entry(watch, &i->watches, list) {
+ if (exact) {
+ if (streq(name, watch->node))
+- add_event(i, ctx, watch, name);
++ send_event(i,
++ get_watch_path(watch, name),
++ watch->token);
+ } else {
+ if (is_child(name, watch->node))
+- add_event(i, ctx, watch, name);
++ send_event(i,
++ get_watch_path(watch, name),
++ watch->token);
+ }
+ }
+ }
+@@ -292,7 +267,7 @@ int do_watch(struct connection *conn, struct buffered_data *in)
+ send_ack(conn, XS_WATCH);
+
+ /* We fire once up front: simplifies clients and restart. */
+- add_event(conn, in, watch, watch->node);
++ send_event(conn, get_watch_path(watch, watch->node), watch->token);
+
+ return 0;
+ }
+--
+2.37.4
+
diff --git a/0086-tools-xenstore-add-helpers-to-free-struct-buffered_d.patch b/0086-tools-xenstore-add-helpers-to-free-struct-buffered_d.patch
new file mode 100644
index 0000000..46ae2d3
--- /dev/null
+++ b/0086-tools-xenstore-add-helpers-to-free-struct-buffered_d.patch
@@ -0,0 +1,117 @@
+From b322923894ea23f397efc58a938cb9213d7dc617 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:07 +0200
+Subject: [PATCH 086/126] tools/xenstore: add helpers to free struct
+ buffered_data
+
+Add two helpers for freeing struct buffered_data: free_buffered_data()
+for freeing one instance and conn_free_buffered_data() for freeing all
+instances for a connection.
+
+This is avoiding duplicated code and will help later when more actions
+are needed when freeing a struct buffered_data.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit ead062a68a9c201a95488e84750a70a107f7b317)
+---
+ tools/xenstore/xenstored_core.c | 26 +++++++++++++++++---------
+ tools/xenstore/xenstored_core.h | 2 ++
+ tools/xenstore/xenstored_domain.c | 7 +------
+ 3 files changed, 20 insertions(+), 15 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 01d4a2e440ec..6498bf603666 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -211,6 +211,21 @@ void reopen_log(void)
+ }
+ }
+
++static void free_buffered_data(struct buffered_data *out,
++ struct connection *conn)
++{
++ list_del(&out->list);
++ talloc_free(out);
++}
++
++void conn_free_buffered_data(struct connection *conn)
++{
++ struct buffered_data *out;
++
++ while ((out = list_top(&conn->out_list, struct buffered_data, list)))
++ free_buffered_data(out, conn);
++}
++
+ static bool write_messages(struct connection *conn)
+ {
+ int ret;
+@@ -254,8 +269,7 @@ static bool write_messages(struct connection *conn)
+
+ trace_io(conn, out, 1);
+
+- list_del(&out->list);
+- talloc_free(out);
++ free_buffered_data(out, conn);
+
+ return true;
+ }
+@@ -1472,18 +1486,12 @@ static struct {
+ */
+ static void ignore_connection(struct connection *conn)
+ {
+- struct buffered_data *out, *tmp;
+-
+ trace("CONN %p ignored\n", conn);
+
+ conn->is_ignored = true;
+ conn_delete_all_watches(conn);
+ conn_delete_all_transactions(conn);
+-
+- list_for_each_entry_safe(out, tmp, &conn->out_list, list) {
+- list_del(&out->list);
+- talloc_free(out);
+- }
++ conn_free_buffered_data(conn);
+
+ talloc_free(conn->in);
+ conn->in = NULL;
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 38d97fa081a6..0ba5b783d4d1 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -270,6 +270,8 @@ int remember_string(struct hashtable *hash, const char *str);
+
+ void set_tdb_key(const char *name, TDB_DATA *key);
+
++void conn_free_buffered_data(struct connection *conn);
++
+ const char *dump_state_global(FILE *fp);
+ const char *dump_state_buffered_data(FILE *fp, const struct connection *c,
+ const struct connection *conn,
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index 3d4d0649a243..72a5cd3b9aaf 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -417,15 +417,10 @@ static struct domain *find_domain_by_domid(unsigned int domid)
+ static void domain_conn_reset(struct domain *domain)
+ {
+ struct connection *conn = domain->conn;
+- struct buffered_data *out;
+
+ conn_delete_all_watches(conn);
+ conn_delete_all_transactions(conn);
+-
+- while ((out = list_top(&conn->out_list, struct buffered_data, list))) {
+- list_del(&out->list);
+- talloc_free(out);
+- }
++ conn_free_buffered_data(conn);
+
+ talloc_free(conn->in);
+
+--
+2.37.4
+
diff --git a/0087-tools-xenstore-reduce-number-of-watch-events.patch b/0087-tools-xenstore-reduce-number-of-watch-events.patch
new file mode 100644
index 0000000..ab6cc92
--- /dev/null
+++ b/0087-tools-xenstore-reduce-number-of-watch-events.patch
@@ -0,0 +1,201 @@
+From 8999db805e5ef55172a85d67695429edc3d78771 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:07 +0200
+Subject: [PATCH 087/126] tools/xenstore: reduce number of watch events
+
+When removing a watched node outside of a transaction, two watch events
+are being produced instead of just a single one.
+
+When finalizing a transaction watch events can be generated for each
+node which is being modified, even if outside a transaction such
+modifications might not have resulted in a watch event.
+
+This happens e.g.:
+
+- for nodes which are only modified due to added/removed child entries
+- for nodes being removed or created implicitly (e.g. creation of a/b/c
+ is implicitly creating a/b, resulting in watch events for a, a/b and
+ a/b/c instead of a/b/c only)
+
+Avoid these additional watch events, in order to reduce the needed
+memory inside Xenstore for queueing them.
+
+This is being achieved by adding event flags to struct accessed_node
+specifying whether an event should be triggered, and whether it should
+be an exact match of the modified path. Both flags can be set from
+fire_watches() instead of implying them only.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 3a96013a3e17baa07410b1b9776225d1d9a74297)
+---
+ tools/xenstore/xenstored_core.c | 19 ++++++------
+ tools/xenstore/xenstored_transaction.c | 41 +++++++++++++++++++++-----
+ tools/xenstore/xenstored_transaction.h | 3 ++
+ tools/xenstore/xenstored_watch.c | 7 +++--
+ 4 files changed, 51 insertions(+), 19 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 6498bf603666..5157a7527f58 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1261,7 +1261,7 @@ static void delete_child(struct connection *conn,
+ }
+
+ static int delete_node(struct connection *conn, const void *ctx,
+- struct node *parent, struct node *node)
++ struct node *parent, struct node *node, bool watch_exact)
+ {
+ char *name;
+
+@@ -1273,7 +1273,7 @@ static int delete_node(struct connection *conn, const void *ctx,
+ node->children);
+ child = name ? read_node(conn, node, name) : NULL;
+ if (child) {
+- if (delete_node(conn, ctx, node, child))
++ if (delete_node(conn, ctx, node, child, true))
+ return errno;
+ } else {
+ trace("delete_node: Error deleting child '%s/%s'!\n",
+@@ -1285,7 +1285,12 @@ static int delete_node(struct connection *conn, const void *ctx,
+ talloc_free(name);
+ }
+
+- fire_watches(conn, ctx, node->name, node, true, NULL);
++ /*
++ * Fire the watches now, when we can still see the node permissions.
++ * This fine as we are single threaded and the next possible read will
++ * be handled only after the node has been really removed.
++ */
++ fire_watches(conn, ctx, node->name, node, watch_exact, NULL);
+ delete_node_single(conn, node);
+ delete_child(conn, parent, basename(node->name));
+ talloc_free(node);
+@@ -1311,13 +1316,7 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node,
+ return (errno == ENOMEM) ? ENOMEM : EINVAL;
+ node->parent = parent;
+
+- /*
+- * Fire the watches now, when we can still see the node permissions.
+- * This fine as we are single threaded and the next possible read will
+- * be handled only after the node has been really removed.
+- */
+- fire_watches(conn, ctx, name, node, false, NULL);
+- return delete_node(conn, ctx, parent, node);
++ return delete_node(conn, ctx, parent, node, false);
+ }
+
+
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index faf6c930e42a..54432907fc76 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -130,6 +130,10 @@ struct accessed_node
+
+ /* Transaction node in data base? */
+ bool ta_node;
++
++ /* Watch event flags. */
++ bool fire_watch;
++ bool watch_exact;
+ };
+
+ struct changed_domain
+@@ -323,6 +327,29 @@ err:
+ return ret;
+ }
+
++/*
++ * A watch event should be fired for a node modified inside a transaction.
++ * Set the corresponding information. A non-exact event is replacing an exact
++ * one, but not the other way round.
++ */
++void queue_watches(struct connection *conn, const char *name, bool watch_exact)
++{
++ struct accessed_node *i;
++
++ i = find_accessed_node(conn->transaction, name);
++ if (!i) {
++ conn->transaction->fail = true;
++ return;
++ }
++
++ if (!i->fire_watch) {
++ i->fire_watch = true;
++ i->watch_exact = watch_exact;
++ } else if (!watch_exact) {
++ i->watch_exact = false;
++ }
++}
++
+ /*
+ * Finalize transaction:
+ * Walk through accessed nodes and check generation against global data.
+@@ -377,15 +404,15 @@ static int finalize_transaction(struct connection *conn,
+ ret = tdb_store(tdb_ctx, key, data,
+ TDB_REPLACE);
+ talloc_free(data.dptr);
+- if (ret)
+- goto err;
+- fire_watches(conn, trans, i->node, NULL, false,
+- i->perms.p ? &i->perms : NULL);
+ } else {
+- fire_watches(conn, trans, i->node, NULL, false,
++ ret = tdb_delete(tdb_ctx, key);
++ }
++ if (ret)
++ goto err;
++ if (i->fire_watch) {
++ fire_watches(conn, trans, i->node, NULL,
++ i->watch_exact,
+ i->perms.p ? &i->perms : NULL);
+- if (tdb_delete(tdb_ctx, key))
+- goto err;
+ }
+ }
+
+diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h
+index 14062730e3c9..0093cac807e3 100644
+--- a/tools/xenstore/xenstored_transaction.h
++++ b/tools/xenstore/xenstored_transaction.h
+@@ -42,6 +42,9 @@ void transaction_entry_dec(struct transaction *trans, unsigned int domid);
+ int access_node(struct connection *conn, struct node *node,
+ enum node_access_type type, TDB_DATA *key);
+
++/* Queue watches for a modified node. */
++void queue_watches(struct connection *conn, const char *name, bool watch_exact);
++
+ /* Prepend the transaction to name if appropriate. */
+ int transaction_prepend(struct connection *conn, const char *name,
+ TDB_DATA *key);
+diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
+index a116f967dc66..bc6d833028a3 100644
+--- a/tools/xenstore/xenstored_watch.c
++++ b/tools/xenstore/xenstored_watch.c
+@@ -29,6 +29,7 @@
+ #include "xenstore_lib.h"
+ #include "utils.h"
+ #include "xenstored_domain.h"
++#include "xenstored_transaction.h"
+
+ extern int quota_nb_watch_per_domain;
+
+@@ -143,9 +144,11 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
+ struct connection *i;
+ struct watch *watch;
+
+- /* During transactions, don't fire watches. */
+- if (conn && conn->transaction)
++ /* During transactions, don't fire watches, but queue them. */
++ if (conn && conn->transaction) {
++ queue_watches(conn, name, exact);
+ return;
++ }
+
+ /* Create an event for each watch. */
+ list_for_each_entry(i, &connections, list) {
+--
+2.37.4
+
diff --git a/0088-tools-xenstore-let-unread-watch-events-time-out.patch b/0088-tools-xenstore-let-unread-watch-events-time-out.patch
new file mode 100644
index 0000000..03419c6
--- /dev/null
+++ b/0088-tools-xenstore-let-unread-watch-events-time-out.patch
@@ -0,0 +1,309 @@
+From 53a77b82717530d836300f1de0ad037de85477dd Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:07 +0200
+Subject: [PATCH 088/126] tools/xenstore: let unread watch events time out
+
+A future modification will limit the number of outstanding requests
+for a domain, where "outstanding" means that the response of the
+request or any resulting watch event hasn't been consumed yet.
+
+In order to avoid a malicious guest being capable to block other guests
+by not reading watch events, add a timeout for watch events. In case a
+watch event hasn't been consumed after this timeout, it is being
+deleted. Set the default timeout to 20 seconds (a random value being
+not too high).
+
+In order to support to specify other timeout values in future, use a
+generic command line option for that purpose:
+
+--timeout|-w watch-event=<seconds>
+
+This is part of XSA-326 / CVE-2022-42311.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 5285dcb1a5c01695c11e6397c95d906b5e765c98)
+---
+ tools/xenstore/xenstored_core.c | 133 +++++++++++++++++++++++++++++++-
+ tools/xenstore/xenstored_core.h | 6 ++
+ 2 files changed, 138 insertions(+), 1 deletion(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 5157a7527f58..ee3396fefa94 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -108,6 +108,8 @@ int quota_max_transaction = 10;
+ int quota_nb_perms_per_node = 5;
+ int quota_max_path_len = XENSTORE_REL_PATH_MAX;
+
++unsigned int timeout_watch_event_msec = 20000;
++
+ void trace(const char *fmt, ...)
+ {
+ va_list arglist;
+@@ -211,19 +213,92 @@ void reopen_log(void)
+ }
+ }
+
++static uint64_t get_now_msec(void)
++{
++ struct timespec now_ts;
++
++ if (clock_gettime(CLOCK_MONOTONIC, &now_ts))
++ barf_perror("Could not find time (clock_gettime failed)");
++
++ return now_ts.tv_sec * 1000 + now_ts.tv_nsec / 1000000;
++}
++
+ static void free_buffered_data(struct buffered_data *out,
+ struct connection *conn)
+ {
++ struct buffered_data *req;
++
+ list_del(&out->list);
++
++ /*
++ * Update conn->timeout_msec with the next found timeout value in the
++ * queued pending requests.
++ */
++ if (out->timeout_msec) {
++ conn->timeout_msec = 0;
++ list_for_each_entry(req, &conn->out_list, list) {
++ if (req->timeout_msec) {
++ conn->timeout_msec = req->timeout_msec;
++ break;
++ }
++ }
++ }
++
+ talloc_free(out);
+ }
+
++static void check_event_timeout(struct connection *conn, uint64_t msecs,
++ int *ptimeout)
++{
++ uint64_t delta;
++ struct buffered_data *out, *tmp;
++
++ if (!conn->timeout_msec)
++ return;
++
++ delta = conn->timeout_msec - msecs;
++ if (conn->timeout_msec <= msecs) {
++ delta = 0;
++ list_for_each_entry_safe(out, tmp, &conn->out_list, list) {
++ /*
++ * Only look at buffers with timeout and no data
++ * already written to the ring.
++ */
++ if (out->timeout_msec && out->inhdr && !out->used) {
++ if (out->timeout_msec > msecs) {
++ conn->timeout_msec = out->timeout_msec;
++ delta = conn->timeout_msec - msecs;
++ break;
++ }
++
++ /*
++ * Free out without updating conn->timeout_msec,
++ * as the update is done in this loop already.
++ */
++ out->timeout_msec = 0;
++ trace("watch event path %s for domain %u timed out\n",
++ out->buffer, conn->id);
++ free_buffered_data(out, conn);
++ }
++ }
++ if (!delta) {
++ conn->timeout_msec = 0;
++ return;
++ }
++ }
++
++ if (*ptimeout == -1 || *ptimeout > delta)
++ *ptimeout = delta;
++}
++
+ void conn_free_buffered_data(struct connection *conn)
+ {
+ struct buffered_data *out;
+
+ while ((out = list_top(&conn->out_list, struct buffered_data, list)))
+ free_buffered_data(out, conn);
++
++ conn->timeout_msec = 0;
+ }
+
+ static bool write_messages(struct connection *conn)
+@@ -382,6 +457,7 @@ static void initialize_fds(int *p_sock_pollfd_idx, int *ptimeout)
+ {
+ struct connection *conn;
+ struct wrl_timestampt now;
++ uint64_t msecs;
+
+ if (fds)
+ memset(fds, 0, sizeof(struct pollfd) * current_array_size);
+@@ -402,10 +478,12 @@ static void initialize_fds(int *p_sock_pollfd_idx, int *ptimeout)
+
+ wrl_gettime_now(&now);
+ wrl_log_periodic(now);
++ msecs = get_now_msec();
+
+ list_for_each_entry(conn, &connections, list) {
+ if (conn->domain) {
+ wrl_check_timeout(conn->domain, now, ptimeout);
++ check_event_timeout(conn, msecs, ptimeout);
+ if (domain_can_read(conn) ||
+ (domain_can_write(conn) &&
+ !list_empty(&conn->out_list)))
+@@ -760,6 +838,7 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ return;
+ bdata->inhdr = true;
+ bdata->used = 0;
++ bdata->timeout_msec = 0;
+
+ if (len <= DEFAULT_BUFFER_SIZE)
+ bdata->buffer = bdata->default_buffer;
+@@ -811,6 +890,12 @@ void send_event(struct connection *conn, const char *path, const char *token)
+ bdata->hdr.msg.type = XS_WATCH_EVENT;
+ bdata->hdr.msg.len = len;
+
++ if (timeout_watch_event_msec && domain_is_unprivileged(conn)) {
++ bdata->timeout_msec = get_now_msec() + timeout_watch_event_msec;
++ if (!conn->timeout_msec)
++ conn->timeout_msec = bdata->timeout_msec;
++ }
++
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
+ }
+@@ -2099,6 +2184,9 @@ static void usage(void)
+ " -t, --transaction <nb> limit the number of transaction allowed per domain,\n"
+ " -A, --perm-nb <nb> limit the number of permissions per node,\n"
+ " -M, --path-max <chars> limit the allowed Xenstore node path length,\n"
++" -w, --timeout <what>=<seconds> set the timeout in seconds for <what>,\n"
++" allowed timeout candidates are:\n"
++" watch-event: time a watch-event is kept pending\n"
+ " -R, --no-recovery to request that no recovery should be attempted when\n"
+ " the store is corrupted (debug only),\n"
+ " -I, --internal-db store database in memory, not on disk\n"
+@@ -2121,6 +2209,7 @@ static struct option options[] = {
+ { "transaction", 1, NULL, 't' },
+ { "perm-nb", 1, NULL, 'A' },
+ { "path-max", 1, NULL, 'M' },
++ { "timeout", 1, NULL, 'w' },
+ { "no-recovery", 0, NULL, 'R' },
+ { "internal-db", 0, NULL, 'I' },
+ { "verbose", 0, NULL, 'V' },
+@@ -2135,6 +2224,39 @@ int dom0_domid = 0;
+ int dom0_event = 0;
+ int priv_domid = 0;
+
++static int get_optval_int(const char *arg)
++{
++ char *end;
++ long val;
++
++ val = strtol(arg, &end, 10);
++ if (!*arg || *end || val < 0 || val > INT_MAX)
++ barf("invalid parameter value \"%s\"\n", arg);
++
++ return val;
++}
++
++static bool what_matches(const char *arg, const char *what)
++{
++ unsigned int what_len = strlen(what);
++
++ return !strncmp(arg, what, what_len) && arg[what_len] == '=';
++}
++
++static void set_timeout(const char *arg)
++{
++ const char *eq = strchr(arg, '=');
++ int val;
++
++ if (!eq)
++ barf("quotas must be specified via <what>=<seconds>\n");
++ val = get_optval_int(eq + 1);
++ if (what_matches(arg, "watch-event"))
++ timeout_watch_event_msec = val * 1000;
++ else
++ barf("unknown timeout \"%s\"\n", arg);
++}
++
+ int main(int argc, char *argv[])
+ {
+ int opt;
+@@ -2149,7 +2271,7 @@ int main(int argc, char *argv[])
+ orig_argc = argc;
+ orig_argv = argv;
+
+- while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:U", options,
++ while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:w:U", options,
+ NULL)) != -1) {
+ switch (opt) {
+ case 'D':
+@@ -2198,6 +2320,9 @@ int main(int argc, char *argv[])
+ quota_max_path_len = min(XENSTORE_REL_PATH_MAX,
+ quota_max_path_len);
+ break;
++ case 'w':
++ set_timeout(optarg);
++ break;
+ case 'e':
+ dom0_event = strtol(optarg, NULL, 10);
+ break;
+@@ -2642,6 +2767,12 @@ static void add_buffered_data(struct buffered_data *bdata,
+ barf("error restoring buffered data");
+
+ memcpy(bdata->buffer, data, len);
++ if (bdata->hdr.msg.type == XS_WATCH_EVENT && timeout_watch_event_msec &&
++ domain_is_unprivileged(conn)) {
++ bdata->timeout_msec = get_now_msec() + timeout_watch_event_msec;
++ if (!conn->timeout_msec)
++ conn->timeout_msec = bdata->timeout_msec;
++ }
+
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 0ba5b783d4d1..2db577928fc6 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -27,6 +27,7 @@
+ #include <dirent.h>
+ #include <stdbool.h>
+ #include <stdint.h>
++#include <time.h>
+ #include <errno.h>
+
+ #include "xenstore_lib.h"
+@@ -67,6 +68,8 @@ struct buffered_data
+ char raw[sizeof(struct xsd_sockmsg)];
+ } hdr;
+
++ uint64_t timeout_msec;
++
+ /* The actual data. */
+ char *buffer;
+ char default_buffer[DEFAULT_BUFFER_SIZE];
+@@ -110,6 +113,7 @@ struct connection
+
+ /* Buffered output data */
+ struct list_head out_list;
++ uint64_t timeout_msec;
+
+ /* Transaction context for current request (NULL if none). */
+ struct transaction *transaction;
+@@ -237,6 +241,8 @@ extern int dom0_event;
+ extern int priv_domid;
+ extern int quota_nb_entry_per_domain;
+
++extern unsigned int timeout_watch_event_msec;
++
+ /* Map the kernel's xenstore page. */
+ void *xenbus_map(void);
+ void unmap_xenbus(void *interface);
+--
+2.37.4
+
diff --git a/0089-tools-xenstore-limit-outstanding-requests.patch b/0089-tools-xenstore-limit-outstanding-requests.patch
new file mode 100644
index 0000000..2e110b0
--- /dev/null
+++ b/0089-tools-xenstore-limit-outstanding-requests.patch
@@ -0,0 +1,453 @@
+From 56300e8e1781cee1b6a514e5f2bea234a7885d55 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:08 +0200
+Subject: [PATCH 089/126] tools/xenstore: limit outstanding requests
+
+Add another quota for limiting the number of outstanding requests of a
+guest. As the way to specify quotas on the command line is becoming
+rather nasty, switch to a new scheme using [--quota|-Q] <what>=<val>
+allowing to add more quotas in future easily.
+
+Set the default value to 20 (basically a random value not seeming to
+be too high or too low).
+
+A request is said to be outstanding if any message generated by this
+request (the direct response plus potential watch events) is not yet
+completely stored into a ring buffer. The initial watch event sent as
+a result of registering a watch is an exception.
+
+Note that across a live update the relation to buffered watch events
+for other domains is lost.
+
+Use talloc_zero() for allocating the domain structure in order to have
+all per-domain quota zeroed initially.
+
+This is part of XSA-326 / CVE-2022-42312.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 36de433a273f55d614c83b89c9a8972287a1e475)
+---
+ tools/xenstore/xenstored_core.c | 88 +++++++++++++++++++++++++++++--
+ tools/xenstore/xenstored_core.h | 20 ++++++-
+ tools/xenstore/xenstored_domain.c | 38 ++++++++++---
+ tools/xenstore/xenstored_domain.h | 3 ++
+ tools/xenstore/xenstored_watch.c | 15 ++++--
+ 5 files changed, 150 insertions(+), 14 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index ee3396fefa94..d871f217af9c 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -107,6 +107,7 @@ int quota_max_entry_size = 2048; /* 2K */
+ int quota_max_transaction = 10;
+ int quota_nb_perms_per_node = 5;
+ int quota_max_path_len = XENSTORE_REL_PATH_MAX;
++int quota_req_outstanding = 20;
+
+ unsigned int timeout_watch_event_msec = 20000;
+
+@@ -223,12 +224,24 @@ static uint64_t get_now_msec(void)
+ return now_ts.tv_sec * 1000 + now_ts.tv_nsec / 1000000;
+ }
+
++/*
++ * Remove a struct buffered_data from the list of outgoing data.
++ * A struct buffered_data related to a request having caused watch events to be
++ * sent is kept until all those events have been written out.
++ * Each watch event is referencing the related request via pend.req, while the
++ * number of watch events caused by a request is kept in pend.ref.event_cnt
++ * (those two cases are mutually exclusive, so the two fields can share memory
++ * via a union).
++ * The struct buffered_data is freed only if no related watch event is
++ * referencing it. The related return data can be freed right away.
++ */
+ static void free_buffered_data(struct buffered_data *out,
+ struct connection *conn)
+ {
+ struct buffered_data *req;
+
+ list_del(&out->list);
++ out->on_out_list = false;
+
+ /*
+ * Update conn->timeout_msec with the next found timeout value in the
+@@ -244,6 +257,30 @@ static void free_buffered_data(struct buffered_data *out,
+ }
+ }
+
++ if (out->hdr.msg.type == XS_WATCH_EVENT) {
++ req = out->pend.req;
++ if (req) {
++ req->pend.ref.event_cnt--;
++ if (!req->pend.ref.event_cnt && !req->on_out_list) {
++ if (req->on_ref_list) {
++ domain_outstanding_domid_dec(
++ req->pend.ref.domid);
++ list_del(&req->list);
++ }
++ talloc_free(req);
++ }
++ }
++ } else if (out->pend.ref.event_cnt) {
++ /* Hang out off from conn. */
++ talloc_steal(NULL, out);
++ if (out->buffer != out->default_buffer)
++ talloc_free(out->buffer);
++ list_add(&out->list, &conn->ref_list);
++ out->on_ref_list = true;
++ return;
++ } else
++ domain_outstanding_dec(conn);
++
+ talloc_free(out);
+ }
+
+@@ -399,6 +436,7 @@ int delay_request(struct connection *conn, struct buffered_data *in,
+ static int destroy_conn(void *_conn)
+ {
+ struct connection *conn = _conn;
++ struct buffered_data *req;
+
+ /* Flush outgoing if possible, but don't block. */
+ if (!conn->domain) {
+@@ -412,6 +450,11 @@ static int destroy_conn(void *_conn)
+ break;
+ close(conn->fd);
+ }
++
++ conn_free_buffered_data(conn);
++ list_for_each_entry(req, &conn->ref_list, list)
++ req->on_ref_list = false;
++
+ if (conn->target)
+ talloc_unlink(conn, conn->target);
+ list_del(&conn->list);
+@@ -859,6 +902,8 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
++ bdata->on_out_list = true;
++ domain_outstanding_inc(conn);
+ }
+
+ /*
+@@ -866,7 +911,8 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ * As this is not directly related to the current command, errors can't be
+ * reported.
+ */
+-void send_event(struct connection *conn, const char *path, const char *token)
++void send_event(struct buffered_data *req, struct connection *conn,
++ const char *path, const char *token)
+ {
+ struct buffered_data *bdata;
+ unsigned int len;
+@@ -896,8 +942,13 @@ void send_event(struct connection *conn, const char *path, const char *token)
+ conn->timeout_msec = bdata->timeout_msec;
+ }
+
++ bdata->pend.req = req;
++ if (req)
++ req->pend.ref.event_cnt++;
++
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
++ bdata->on_out_list = true;
+ }
+
+ /* Some routines (write, mkdir, etc) just need a non-error return */
+@@ -1658,6 +1709,7 @@ static void handle_input(struct connection *conn)
+ return;
+ }
+ in = conn->in;
++ in->pend.ref.domid = conn->id;
+
+ /* Not finished header yet? */
+ if (in->inhdr) {
+@@ -1727,6 +1779,7 @@ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
+ new->is_ignored = false;
+ new->transaction_started = 0;
+ INIT_LIST_HEAD(&new->out_list);
++ INIT_LIST_HEAD(&new->ref_list);
+ INIT_LIST_HEAD(&new->watches);
+ INIT_LIST_HEAD(&new->transaction_list);
+ INIT_LIST_HEAD(&new->delayed);
+@@ -2184,6 +2237,9 @@ static void usage(void)
+ " -t, --transaction <nb> limit the number of transaction allowed per domain,\n"
+ " -A, --perm-nb <nb> limit the number of permissions per node,\n"
+ " -M, --path-max <chars> limit the allowed Xenstore node path length,\n"
++" -Q, --quota <what>=<nb> set the quota <what> to the value <nb>, allowed\n"
++" quotas are:\n"
++" outstanding: number of outstanding requests\n"
+ " -w, --timeout <what>=<seconds> set the timeout in seconds for <what>,\n"
+ " allowed timeout candidates are:\n"
+ " watch-event: time a watch-event is kept pending\n"
+@@ -2209,6 +2265,7 @@ static struct option options[] = {
+ { "transaction", 1, NULL, 't' },
+ { "perm-nb", 1, NULL, 'A' },
+ { "path-max", 1, NULL, 'M' },
++ { "quota", 1, NULL, 'Q' },
+ { "timeout", 1, NULL, 'w' },
+ { "no-recovery", 0, NULL, 'R' },
+ { "internal-db", 0, NULL, 'I' },
+@@ -2257,6 +2314,20 @@ static void set_timeout(const char *arg)
+ barf("unknown timeout \"%s\"\n", arg);
+ }
+
++static void set_quota(const char *arg)
++{
++ const char *eq = strchr(arg, '=');
++ int val;
++
++ if (!eq)
++ barf("quotas must be specified via <what>=<nb>\n");
++ val = get_optval_int(eq + 1);
++ if (what_matches(arg, "outstanding"))
++ quota_req_outstanding = val;
++ else
++ barf("unknown quota \"%s\"\n", arg);
++}
++
+ int main(int argc, char *argv[])
+ {
+ int opt;
+@@ -2271,8 +2342,8 @@ int main(int argc, char *argv[])
+ orig_argc = argc;
+ orig_argv = argv;
+
+- while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:T:RVW:w:U", options,
+- NULL)) != -1) {
++ while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:T:RVW:w:U",
++ options, NULL)) != -1) {
+ switch (opt) {
+ case 'D':
+ no_domain_init = true;
+@@ -2320,6 +2391,9 @@ int main(int argc, char *argv[])
+ quota_max_path_len = min(XENSTORE_REL_PATH_MAX,
+ quota_max_path_len);
+ break;
++ case 'Q':
++ set_quota(optarg);
++ break;
+ case 'w':
+ set_timeout(optarg);
+ break;
+@@ -2776,6 +2850,14 @@ static void add_buffered_data(struct buffered_data *bdata,
+
+ /* Queue for later transmission. */
+ list_add_tail(&bdata->list, &conn->out_list);
++ bdata->on_out_list = true;
++ /*
++ * Watch events are never "outstanding", but the request causing them
++ * are instead kept "outstanding" until all watch events caused by that
++ * request have been delivered.
++ */
++ if (bdata->hdr.msg.type != XS_WATCH_EVENT)
++ domain_outstanding_inc(conn);
+ }
+
+ void read_state_buffered_data(const void *ctx, struct connection *conn,
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 2db577928fc6..fcb27399f116 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -56,6 +56,8 @@ struct xs_state_connection;
+ struct buffered_data
+ {
+ struct list_head list;
++ bool on_out_list;
++ bool on_ref_list;
+
+ /* Are we still doing the header? */
+ bool inhdr;
+@@ -63,6 +65,17 @@ struct buffered_data
+ /* How far are we? */
+ unsigned int used;
+
++ /* Outstanding request accounting. */
++ union {
++ /* ref is being used for requests. */
++ struct {
++ unsigned int event_cnt; /* # of outstanding events. */
++ unsigned int domid; /* domid of request. */
++ } ref;
++ /* req is being used for watch events. */
++ struct buffered_data *req; /* request causing event. */
++ } pend;
++
+ union {
+ struct xsd_sockmsg msg;
+ char raw[sizeof(struct xsd_sockmsg)];
+@@ -115,6 +128,9 @@ struct connection
+ struct list_head out_list;
+ uint64_t timeout_msec;
+
++ /* Referenced requests no longer pending. */
++ struct list_head ref_list;
++
+ /* Transaction context for current request (NULL if none). */
+ struct transaction *transaction;
+
+@@ -184,7 +200,8 @@ unsigned int get_string(const struct buffered_data *data, unsigned int offset);
+
+ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len);
+-void send_event(struct connection *conn, const char *path, const char *token);
++void send_event(struct buffered_data *req, struct connection *conn,
++ const char *path, const char *token);
+
+ /* Some routines (write, mkdir, etc) just need a non-error return */
+ void send_ack(struct connection *conn, enum xsd_sockmsg_type type);
+@@ -240,6 +257,7 @@ extern int dom0_domid;
+ extern int dom0_event;
+ extern int priv_domid;
+ extern int quota_nb_entry_per_domain;
++extern int quota_req_outstanding;
+
+ extern unsigned int timeout_watch_event_msec;
+
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index 72a5cd3b9aaf..979f8c629835 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -78,6 +78,9 @@ struct domain
+ /* number of watch for this domain */
+ int nbwatch;
+
++ /* Number of outstanding requests. */
++ int nboutstanding;
++
+ /* write rate limit */
+ wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */
+ struct wrl_timestampt wrl_timestamp;
+@@ -287,8 +290,12 @@ bool domain_can_read(struct connection *conn)
+ {
+ struct xenstore_domain_interface *intf = conn->domain->interface;
+
+- if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0)
+- return false;
++ if (domain_is_unprivileged(conn)) {
++ if (conn->domain->wrl_credit < 0)
++ return false;
++ if (conn->domain->nboutstanding >= quota_req_outstanding)
++ return false;
++ }
+
+ if (conn->is_ignored)
+ return false;
+@@ -337,7 +344,7 @@ static struct domain *alloc_domain(const void *context, unsigned int domid)
+ {
+ struct domain *domain;
+
+- domain = talloc(context, struct domain);
++ domain = talloc_zero(context, struct domain);
+ if (!domain) {
+ errno = ENOMEM;
+ return NULL;
+@@ -398,9 +405,6 @@ static int new_domain(struct domain *domain, int port, bool restore)
+ domain->conn->domain = domain;
+ domain->conn->id = domain->domid;
+
+- domain->nbentry = 0;
+- domain->nbwatch = 0;
+-
+ return 0;
+ }
+
+@@ -944,6 +948,28 @@ int domain_watch(struct connection *conn)
+ : 0;
+ }
+
++void domain_outstanding_inc(struct connection *conn)
++{
++ if (!conn || !conn->domain)
++ return;
++ conn->domain->nboutstanding++;
++}
++
++void domain_outstanding_dec(struct connection *conn)
++{
++ if (!conn || !conn->domain)
++ return;
++ conn->domain->nboutstanding--;
++}
++
++void domain_outstanding_domid_dec(unsigned int domid)
++{
++ struct domain *d = find_domain_by_domid(domid);
++
++ if (d)
++ d->nboutstanding--;
++}
++
+ static wrl_creditt wrl_config_writecost = WRL_FACTOR;
+ static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR;
+ static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR;
+diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
+index dc9759171317..5757a6557146 100644
+--- a/tools/xenstore/xenstored_domain.h
++++ b/tools/xenstore/xenstored_domain.h
+@@ -68,6 +68,9 @@ int domain_entry(struct connection *conn);
+ void domain_watch_inc(struct connection *conn);
+ void domain_watch_dec(struct connection *conn);
+ int domain_watch(struct connection *conn);
++void domain_outstanding_inc(struct connection *conn);
++void domain_outstanding_dec(struct connection *conn);
++void domain_outstanding_domid_dec(unsigned int domid);
+
+ /* Special node permission handling. */
+ int set_perms_special(struct connection *conn, const char *name,
+diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
+index bc6d833028a3..1d664e3d6b72 100644
+--- a/tools/xenstore/xenstored_watch.c
++++ b/tools/xenstore/xenstored_watch.c
+@@ -142,6 +142,7 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
+ struct node *node, bool exact, struct node_perms *perms)
+ {
+ struct connection *i;
++ struct buffered_data *req;
+ struct watch *watch;
+
+ /* During transactions, don't fire watches, but queue them. */
+@@ -150,6 +151,8 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
+ return;
+ }
+
++ req = domain_is_unprivileged(conn) ? conn->in : NULL;
++
+ /* Create an event for each watch. */
+ list_for_each_entry(i, &connections, list) {
+ /* introduce/release domain watches */
+@@ -164,12 +167,12 @@ void fire_watches(struct connection *conn, const void *ctx, const char *name,
+ list_for_each_entry(watch, &i->watches, list) {
+ if (exact) {
+ if (streq(name, watch->node))
+- send_event(i,
++ send_event(req, i,
+ get_watch_path(watch, name),
+ watch->token);
+ } else {
+ if (is_child(name, watch->node))
+- send_event(i,
++ send_event(req, i,
+ get_watch_path(watch, name),
+ watch->token);
+ }
+@@ -269,8 +272,12 @@ int do_watch(struct connection *conn, struct buffered_data *in)
+ trace_create(watch, "watch");
+ send_ack(conn, XS_WATCH);
+
+- /* We fire once up front: simplifies clients and restart. */
+- send_event(conn, get_watch_path(watch, watch->node), watch->token);
++ /*
++ * We fire once up front: simplifies clients and restart.
++ * This event will not be linked to the XS_WATCH request.
++ */
++ send_event(NULL, conn, get_watch_path(watch, watch->node),
++ watch->token);
+
+ return 0;
+ }
+--
+2.37.4
+
diff --git a/0090-tools-xenstore-don-t-buffer-multiple-identical-watch.patch b/0090-tools-xenstore-don-t-buffer-multiple-identical-watch.patch
new file mode 100644
index 0000000..305d8ac
--- /dev/null
+++ b/0090-tools-xenstore-don-t-buffer-multiple-identical-watch.patch
@@ -0,0 +1,93 @@
+From 97c251f953c58aec7620499ac12924054b7cd758 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:08 +0200
+Subject: [PATCH 090/126] tools/xenstore: don't buffer multiple identical watch
+ events
+
+A guest not reading its Xenstore response buffer fast enough might
+pile up lots of Xenstore watch events buffered. Reduce the generated
+load by dropping new events which already have an identical copy
+pending.
+
+The special events "@..." are excluded from that handling as there are
+known use cases where the handler is relying on each event to be sent
+individually.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit b5c0bdb96d33e18c324c13d8e33c08732d77eaa2)
+---
+ tools/xenstore/xenstored_core.c | 20 +++++++++++++++++++-
+ tools/xenstore/xenstored_core.h | 3 +++
+ 2 files changed, 22 insertions(+), 1 deletion(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index d871f217af9c..6ea06e20df91 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -882,6 +882,7 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ bdata->inhdr = true;
+ bdata->used = 0;
+ bdata->timeout_msec = 0;
++ bdata->watch_event = false;
+
+ if (len <= DEFAULT_BUFFER_SIZE)
+ bdata->buffer = bdata->default_buffer;
+@@ -914,7 +915,7 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ void send_event(struct buffered_data *req, struct connection *conn,
+ const char *path, const char *token)
+ {
+- struct buffered_data *bdata;
++ struct buffered_data *bdata, *bd;
+ unsigned int len;
+
+ len = strlen(path) + 1 + strlen(token) + 1;
+@@ -936,12 +937,29 @@ void send_event(struct buffered_data *req, struct connection *conn,
+ bdata->hdr.msg.type = XS_WATCH_EVENT;
+ bdata->hdr.msg.len = len;
+
++ /*
++ * Check whether an identical event is pending already.
++ * Special events are excluded from that check.
++ */
++ if (path[0] != '@') {
++ list_for_each_entry(bd, &conn->out_list, list) {
++ if (bd->watch_event && bd->hdr.msg.len == len &&
++ !memcmp(bdata->buffer, bd->buffer, len)) {
++ trace("dropping duplicate watch %s %s for domain %u\n",
++ path, token, conn->id);
++ talloc_free(bdata);
++ return;
++ }
++ }
++ }
++
+ if (timeout_watch_event_msec && domain_is_unprivileged(conn)) {
+ bdata->timeout_msec = get_now_msec() + timeout_watch_event_msec;
+ if (!conn->timeout_msec)
+ conn->timeout_msec = bdata->timeout_msec;
+ }
+
++ bdata->watch_event = true;
+ bdata->pend.req = req;
+ if (req)
+ req->pend.ref.event_cnt++;
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index fcb27399f116..afbd982c2654 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -62,6 +62,9 @@ struct buffered_data
+ /* Are we still doing the header? */
+ bool inhdr;
+
++ /* Is this a watch event? */
++ bool watch_event;
++
+ /* How far are we? */
+ unsigned int used;
+
+--
+2.37.4
+
diff --git a/0091-tools-xenstore-fix-connection-id-usage.patch b/0091-tools-xenstore-fix-connection-id-usage.patch
new file mode 100644
index 0000000..dd7f382
--- /dev/null
+++ b/0091-tools-xenstore-fix-connection-id-usage.patch
@@ -0,0 +1,61 @@
+From 3e51699fcc578c7c005fd4add70cf7c8117d0af9 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:08 +0200
+Subject: [PATCH 091/126] tools/xenstore: fix connection->id usage
+
+Don't use conn->id for privilege checks, but domain_is_unprivileged().
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 3047df38e1991510bc295e3e1bb6b6b6c4a97831)
+---
+ tools/xenstore/xenstored_control.c | 2 +-
+ tools/xenstore/xenstored_core.h | 2 +-
+ tools/xenstore/xenstored_transaction.c | 3 ++-
+ 3 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_control.c b/tools/xenstore/xenstored_control.c
+index 8e470f2b2056..211fe1fd9b37 100644
+--- a/tools/xenstore/xenstored_control.c
++++ b/tools/xenstore/xenstored_control.c
+@@ -821,7 +821,7 @@ int do_control(struct connection *conn, struct buffered_data *in)
+ unsigned int cmd, num, off;
+ char **vec = NULL;
+
+- if (conn->id != 0)
++ if (domain_is_unprivileged(conn))
+ return EACCES;
+
+ off = get_string(in, 0);
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index afbd982c2654..c0a056ce13fe 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -118,7 +118,7 @@ struct connection
+ /* The index of pollfd in global pollfd array */
+ int pollfd_idx;
+
+- /* Who am I? 0 for socket connections. */
++ /* Who am I? Domid of connection. */
+ unsigned int id;
+
+ /* Is this connection ignored? */
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index 54432907fc76..ee1b09031a3b 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -477,7 +477,8 @@ int do_transaction_start(struct connection *conn, struct buffered_data *in)
+ if (conn->transaction)
+ return EBUSY;
+
+- if (conn->id && conn->transaction_started > quota_max_transaction)
++ if (domain_is_unprivileged(conn) &&
++ conn->transaction_started > quota_max_transaction)
+ return ENOSPC;
+
+ /* Attach transaction to input for autofree until it's complete */
+--
+2.37.4
+
diff --git a/0092-tools-xenstore-simplify-and-fix-per-domain-node-acco.patch b/0092-tools-xenstore-simplify-and-fix-per-domain-node-acco.patch
new file mode 100644
index 0000000..01f29b1
--- /dev/null
+++ b/0092-tools-xenstore-simplify-and-fix-per-domain-node-acco.patch
@@ -0,0 +1,336 @@
+From 8ee7ed7c1ef435f43edc08be07c036d81642d8e1 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:08 +0200
+Subject: [PATCH 092/126] tools/xenstore: simplify and fix per domain node
+ accounting
+
+The accounting of nodes can be simplified now that each connection
+holds the associated domid.
+
+Fix the node accounting to cover nodes created for a domain before it
+has been introduced. This requires to react properly to an allocation
+failure inside domain_entry_inc() by returning an error code.
+
+Especially in error paths the node accounting has to be fixed in some
+cases.
+
+This is part of XSA-326 / CVE-2022-42313.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit dbef1f7482894c572d90cd73d99ed689c891e863)
+---
+ tools/xenstore/xenstored_core.c | 43 ++++++++--
+ tools/xenstore/xenstored_domain.c | 105 ++++++++++++++++---------
+ tools/xenstore/xenstored_domain.h | 4 +-
+ tools/xenstore/xenstored_transaction.c | 8 +-
+ 4 files changed, 109 insertions(+), 51 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 6ea06e20df91..85c0d2f38fac 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -603,7 +603,7 @@ struct node *read_node(struct connection *conn, const void *ctx,
+
+ /* Permissions are struct xs_permissions. */
+ node->perms.p = hdr->perms;
+- if (domain_adjust_node_perms(node)) {
++ if (domain_adjust_node_perms(conn, node)) {
+ talloc_free(node);
+ return NULL;
+ }
+@@ -625,7 +625,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+ void *p;
+ struct xs_tdb_record_hdr *hdr;
+
+- if (domain_adjust_node_perms(node))
++ if (domain_adjust_node_perms(conn, node))
+ return errno;
+
+ data.dsize = sizeof(*hdr)
+@@ -1238,13 +1238,17 @@ nomem:
+ return NULL;
+ }
+
+-static int destroy_node(struct connection *conn, struct node *node)
++static void destroy_node_rm(struct node *node)
+ {
+ if (streq(node->name, "/"))
+ corrupt(NULL, "Destroying root node!");
+
+ tdb_delete(tdb_ctx, node->key);
++}
+
++static int destroy_node(struct connection *conn, struct node *node)
++{
++ destroy_node_rm(node);
+ domain_entry_dec(conn, node);
+
+ /*
+@@ -1294,8 +1298,12 @@ static struct node *create_node(struct connection *conn, const void *ctx,
+ goto err;
+
+ /* Account for new node */
+- if (i->parent)
+- domain_entry_inc(conn, i);
++ if (i->parent) {
++ if (domain_entry_inc(conn, i)) {
++ destroy_node_rm(i);
++ return NULL;
++ }
++ }
+ }
+
+ return node;
+@@ -1580,10 +1588,27 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
+ old_perms = node->perms;
+ domain_entry_dec(conn, node);
+ node->perms = perms;
+- domain_entry_inc(conn, node);
++ if (domain_entry_inc(conn, node)) {
++ node->perms = old_perms;
++ /*
++ * This should never fail because we had a reference on the
++ * domain before and Xenstored is single-threaded.
++ */
++ domain_entry_inc(conn, node);
++ return ENOMEM;
++ }
+
+- if (write_node(conn, node, false))
++ if (write_node(conn, node, false)) {
++ int saved_errno = errno;
++
++ domain_entry_dec(conn, node);
++ node->perms = old_perms;
++ /* No failure possible as above. */
++ domain_entry_inc(conn, node);
++
++ errno = saved_errno;
+ return errno;
++ }
+
+ fire_watches(conn, in, name, node, false, &old_perms);
+ send_ack(conn, XS_SET_PERMS);
+@@ -3003,7 +3028,9 @@ void read_state_node(const void *ctx, const void *state)
+ set_tdb_key(name, &key);
+ if (write_node_raw(NULL, &key, node, true))
+ barf("write node error restoring node");
+- domain_entry_inc(&conn, node);
++
++ if (domain_entry_inc(&conn, node))
++ barf("node accounting error restoring node");
+
+ talloc_free(node);
+ }
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index 979f8c629835..3c27973fb836 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -16,6 +16,7 @@
+ along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
++#include <assert.h>
+ #include <stdio.h>
+ #include <sys/mman.h>
+ #include <unistd.h>
+@@ -369,6 +370,18 @@ static struct domain *find_or_alloc_domain(const void *ctx, unsigned int domid)
+ return domain ? : alloc_domain(ctx, domid);
+ }
+
++static struct domain *find_or_alloc_existing_domain(unsigned int domid)
++{
++ struct domain *domain;
++ xc_dominfo_t dominfo;
++
++ domain = find_domain_struct(domid);
++ if (!domain && get_domain_info(domid, &dominfo))
++ domain = alloc_domain(NULL, domid);
++
++ return domain;
++}
++
+ static int new_domain(struct domain *domain, int port, bool restore)
+ {
+ int rc;
+@@ -788,30 +801,28 @@ void domain_deinit(void)
+ xenevtchn_unbind(xce_handle, virq_port);
+ }
+
+-void domain_entry_inc(struct connection *conn, struct node *node)
++int domain_entry_inc(struct connection *conn, struct node *node)
+ {
+ struct domain *d;
++ unsigned int domid;
+
+ if (!conn)
+- return;
++ return 0;
+
+- if (node->perms.p && node->perms.p[0].id != conn->id) {
+- if (conn->transaction) {
+- transaction_entry_inc(conn->transaction,
+- node->perms.p[0].id);
+- } else {
+- d = find_domain_by_domid(node->perms.p[0].id);
+- if (d)
+- d->nbentry++;
+- }
+- } else if (conn->domain) {
+- if (conn->transaction) {
+- transaction_entry_inc(conn->transaction,
+- conn->domain->domid);
+- } else {
+- conn->domain->nbentry++;
+- }
++ domid = node->perms.p ? node->perms.p[0].id : conn->id;
++
++ if (conn->transaction) {
++ transaction_entry_inc(conn->transaction, domid);
++ } else {
++ d = (domid == conn->id && conn->domain) ? conn->domain
++ : find_or_alloc_existing_domain(domid);
++ if (d)
++ d->nbentry++;
++ else
++ return ENOMEM;
+ }
++
++ return 0;
+ }
+
+ /*
+@@ -847,7 +858,7 @@ static int chk_domain_generation(unsigned int domid, uint64_t gen)
+ * Remove permissions for no longer existing domains in order to avoid a new
+ * domain with the same domid inheriting the permissions.
+ */
+-int domain_adjust_node_perms(struct node *node)
++int domain_adjust_node_perms(struct connection *conn, struct node *node)
+ {
+ unsigned int i;
+ int ret;
+@@ -857,8 +868,14 @@ int domain_adjust_node_perms(struct node *node)
+ return errno;
+
+ /* If the owner doesn't exist any longer give it to priv domain. */
+- if (!ret)
++ if (!ret) {
++ /*
++ * In theory we'd need to update the number of dom0 nodes here,
++ * but we could be called for a read of the node. So better
++ * avoid the risk to overflow the node count of dom0.
++ */
+ node->perms.p[0].id = priv_domid;
++ }
+
+ for (i = 1; i < node->perms.num; i++) {
+ if (node->perms.p[i].perms & XS_PERM_IGNORE)
+@@ -877,25 +894,25 @@ int domain_adjust_node_perms(struct node *node)
+ void domain_entry_dec(struct connection *conn, struct node *node)
+ {
+ struct domain *d;
++ unsigned int domid;
+
+ if (!conn)
+ return;
+
+- if (node->perms.p && node->perms.p[0].id != conn->id) {
+- if (conn->transaction) {
+- transaction_entry_dec(conn->transaction,
+- node->perms.p[0].id);
++ domid = node->perms.p ? node->perms.p[0].id : conn->id;
++
++ if (conn->transaction) {
++ transaction_entry_dec(conn->transaction, domid);
++ } else {
++ d = (domid == conn->id && conn->domain) ? conn->domain
++ : find_domain_struct(domid);
++ if (d) {
++ d->nbentry--;
+ } else {
+- d = find_domain_by_domid(node->perms.p[0].id);
+- if (d && d->nbentry)
+- d->nbentry--;
+- }
+- } else if (conn->domain && conn->domain->nbentry) {
+- if (conn->transaction) {
+- transaction_entry_dec(conn->transaction,
+- conn->domain->domid);
+- } else {
+- conn->domain->nbentry--;
++ errno = ENOENT;
++ corrupt(conn,
++ "Node \"%s\" owned by non-existing domain %u\n",
++ node->name, domid);
+ }
+ }
+ }
+@@ -905,13 +922,23 @@ int domain_entry_fix(unsigned int domid, int num, bool update)
+ struct domain *d;
+ int cnt;
+
+- d = find_domain_by_domid(domid);
+- if (!d)
+- return 0;
++ if (update) {
++ d = find_domain_struct(domid);
++ assert(d);
++ } else {
++ /*
++ * We are called first with update == false in order to catch
++ * any error. So do a possible allocation and check for error
++ * only in this case, as in the case of update == true nothing
++ * can go wrong anymore as the allocation already happened.
++ */
++ d = find_or_alloc_existing_domain(domid);
++ if (!d)
++ return -1;
++ }
+
+ cnt = d->nbentry + num;
+- if (cnt < 0)
+- cnt = 0;
++ assert(cnt >= 0);
+
+ if (update)
+ d->nbentry = cnt;
+diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
+index 5757a6557146..cce13d14f016 100644
+--- a/tools/xenstore/xenstored_domain.h
++++ b/tools/xenstore/xenstored_domain.h
+@@ -58,10 +58,10 @@ bool domain_can_write(struct connection *conn);
+ bool domain_is_unprivileged(struct connection *conn);
+
+ /* Remove node permissions for no longer existing domains. */
+-int domain_adjust_node_perms(struct node *node);
++int domain_adjust_node_perms(struct connection *conn, struct node *node);
+
+ /* Quota manipulation */
+-void domain_entry_inc(struct connection *conn, struct node *);
++int domain_entry_inc(struct connection *conn, struct node *);
+ void domain_entry_dec(struct connection *conn, struct node *);
+ int domain_entry_fix(unsigned int domid, int num, bool update);
+ int domain_entry(struct connection *conn);
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index ee1b09031a3b..86caf6c398be 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -519,8 +519,12 @@ static int transaction_fix_domains(struct transaction *trans, bool update)
+
+ list_for_each_entry(d, &trans->changed_domains, list) {
+ cnt = domain_entry_fix(d->domid, d->nbentry, update);
+- if (!update && cnt >= quota_nb_entry_per_domain)
+- return ENOSPC;
++ if (!update) {
++ if (cnt >= quota_nb_entry_per_domain)
++ return ENOSPC;
++ if (cnt < 0)
++ return ENOMEM;
++ }
+ }
+
+ return 0;
+--
+2.37.4
+
diff --git a/0093-tools-xenstore-limit-max-number-of-nodes-accessed-in.patch b/0093-tools-xenstore-limit-max-number-of-nodes-accessed-in.patch
new file mode 100644
index 0000000..f064355
--- /dev/null
+++ b/0093-tools-xenstore-limit-max-number-of-nodes-accessed-in.patch
@@ -0,0 +1,255 @@
+From 1035371fee5552b8cfe9819c4058a4c9e695ba5e Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:09 +0200
+Subject: [PATCH 093/126] tools/xenstore: limit max number of nodes accessed in
+ a transaction
+
+Today a guest is free to access as many nodes in a single transaction
+as it wants. This can lead to unbounded memory consumption in Xenstore
+as there is the need to keep track of all nodes having been accessed
+during a transaction.
+
+In oxenstored the number of requests in a transaction is being limited
+via a quota maxrequests (default is 1024). As multiple accesses of a
+node are not problematic in C Xenstore, limit the number of accessed
+nodes.
+
+In order to let read_node() detect a quota error in case too many nodes
+are being accessed, check the return value of access_node() and return
+NULL in case an error has been seen. Introduce __must_check and add it
+to the access_node() prototype.
+
+This is part of XSA-326 / CVE-2022-42314.
+
+Suggested-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 268369d8e322d227a74a899009c5748d7b0ea142)
+---
+ tools/include/xen-tools/libs.h | 4 +++
+ tools/xenstore/xenstored_core.c | 50 ++++++++++++++++++--------
+ tools/xenstore/xenstored_core.h | 1 +
+ tools/xenstore/xenstored_transaction.c | 9 +++++
+ tools/xenstore/xenstored_transaction.h | 4 +--
+ 5 files changed, 52 insertions(+), 16 deletions(-)
+
+diff --git a/tools/include/xen-tools/libs.h b/tools/include/xen-tools/libs.h
+index a16e0c380709..bafc90e2f603 100644
+--- a/tools/include/xen-tools/libs.h
++++ b/tools/include/xen-tools/libs.h
+@@ -63,4 +63,8 @@
+ #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
+ #endif
+
++#ifndef __must_check
++#define __must_check __attribute__((__warn_unused_result__))
++#endif
++
+ #endif /* __XEN_TOOLS_LIBS__ */
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 85c0d2f38fac..050d6f651ae9 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -106,6 +106,7 @@ int quota_nb_watch_per_domain = 128;
+ int quota_max_entry_size = 2048; /* 2K */
+ int quota_max_transaction = 10;
+ int quota_nb_perms_per_node = 5;
++int quota_trans_nodes = 1024;
+ int quota_max_path_len = XENSTORE_REL_PATH_MAX;
+ int quota_req_outstanding = 20;
+
+@@ -560,6 +561,7 @@ struct node *read_node(struct connection *conn, const void *ctx,
+ TDB_DATA key, data;
+ struct xs_tdb_record_hdr *hdr;
+ struct node *node;
++ int err;
+
+ node = talloc(ctx, struct node);
+ if (!node) {
+@@ -581,14 +583,13 @@ struct node *read_node(struct connection *conn, const void *ctx,
+ if (data.dptr == NULL) {
+ if (tdb_error(tdb_ctx) == TDB_ERR_NOEXIST) {
+ node->generation = NO_GENERATION;
+- access_node(conn, node, NODE_ACCESS_READ, NULL);
+- errno = ENOENT;
++ err = access_node(conn, node, NODE_ACCESS_READ, NULL);
++ errno = err ? : ENOENT;
+ } else {
+ log("TDB error on read: %s", tdb_errorstr(tdb_ctx));
+ errno = EIO;
+ }
+- talloc_free(node);
+- return NULL;
++ goto error;
+ }
+
+ node->parent = NULL;
+@@ -603,19 +604,36 @@ struct node *read_node(struct connection *conn, const void *ctx,
+
+ /* Permissions are struct xs_permissions. */
+ node->perms.p = hdr->perms;
+- if (domain_adjust_node_perms(conn, node)) {
+- talloc_free(node);
+- return NULL;
+- }
++ if (domain_adjust_node_perms(conn, node))
++ goto error;
+
+ /* Data is binary blob (usually ascii, no nul). */
+ node->data = node->perms.p + hdr->num_perms;
+ /* Children is strings, nul separated. */
+ node->children = node->data + node->datalen;
+
+- access_node(conn, node, NODE_ACCESS_READ, NULL);
++ if (access_node(conn, node, NODE_ACCESS_READ, NULL))
++ goto error;
+
+ return node;
++
++ error:
++ err = errno;
++ talloc_free(node);
++ errno = err;
++ return NULL;
++}
++
++static bool read_node_can_propagate_errno(void)
++{
++ /*
++ * 2 error cases for read_node() can always be propagated up:
++ * ENOMEM, because this has nothing to do with the node being in the
++ * data base or not, but is caused by a general lack of memory.
++ * ENOSPC, because this is related to hitting quota limits which need
++ * to be respected.
++ */
++ return errno == ENOMEM || errno == ENOSPC;
+ }
+
+ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+@@ -732,7 +750,7 @@ static int ask_parents(struct connection *conn, const void *ctx,
+ node = read_node(conn, ctx, name);
+ if (node)
+ break;
+- if (errno == ENOMEM)
++ if (read_node_can_propagate_errno())
+ return errno;
+ } while (!streq(name, "/"));
+
+@@ -795,7 +813,7 @@ static struct node *get_node(struct connection *conn,
+ }
+ }
+ /* Clean up errno if they weren't supposed to know. */
+- if (!node && errno != ENOMEM)
++ if (!node && !read_node_can_propagate_errno())
+ errno = errno_from_parents(conn, ctx, name, errno, perm);
+ return node;
+ }
+@@ -1201,7 +1219,7 @@ static struct node *construct_node(struct connection *conn, const void *ctx,
+
+ /* If parent doesn't exist, create it. */
+ parent = read_node(conn, parentname, parentname);
+- if (!parent)
++ if (!parent && errno == ENOENT)
+ parent = construct_node(conn, ctx, parentname);
+ if (!parent)
+ return NULL;
+@@ -1475,7 +1493,7 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node,
+
+ parent = read_node(conn, ctx, parentname);
+ if (!parent)
+- return (errno == ENOMEM) ? ENOMEM : EINVAL;
++ return read_node_can_propagate_errno() ? errno : EINVAL;
+ node->parent = parent;
+
+ return delete_node(conn, ctx, parent, node, false);
+@@ -1505,7 +1523,7 @@ static int do_rm(struct connection *conn, struct buffered_data *in)
+ return 0;
+ }
+ /* Restore errno, just in case. */
+- if (errno != ENOMEM)
++ if (!read_node_can_propagate_errno())
+ errno = ENOENT;
+ }
+ return errno;
+@@ -2282,6 +2300,8 @@ static void usage(void)
+ " -M, --path-max <chars> limit the allowed Xenstore node path length,\n"
+ " -Q, --quota <what>=<nb> set the quota <what> to the value <nb>, allowed\n"
+ " quotas are:\n"
++" transaction-nodes: number of accessed node per\n"
++" transaction\n"
+ " outstanding: number of outstanding requests\n"
+ " -w, --timeout <what>=<seconds> set the timeout in seconds for <what>,\n"
+ " allowed timeout candidates are:\n"
+@@ -2367,6 +2387,8 @@ static void set_quota(const char *arg)
+ val = get_optval_int(eq + 1);
+ if (what_matches(arg, "outstanding"))
+ quota_req_outstanding = val;
++ else if (what_matches(arg, "transaction-nodes"))
++ quota_trans_nodes = val;
+ else
+ barf("unknown quota \"%s\"\n", arg);
+ }
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index c0a056ce13fe..1b3bd5ca563a 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -261,6 +261,7 @@ extern int dom0_event;
+ extern int priv_domid;
+ extern int quota_nb_entry_per_domain;
+ extern int quota_req_outstanding;
++extern int quota_trans_nodes;
+
+ extern unsigned int timeout_watch_event_msec;
+
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index 86caf6c398be..7bd41eb475e3 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -156,6 +156,9 @@ struct transaction
+ /* Connection-local identifier for this transaction. */
+ uint32_t id;
+
++ /* Node counter. */
++ unsigned int nodes;
++
+ /* Generation when transaction started. */
+ uint64_t generation;
+
+@@ -260,6 +263,11 @@ int access_node(struct connection *conn, struct node *node,
+
+ i = find_accessed_node(trans, node->name);
+ if (!i) {
++ if (trans->nodes >= quota_trans_nodes &&
++ domain_is_unprivileged(conn)) {
++ ret = ENOSPC;
++ goto err;
++ }
+ i = talloc_zero(trans, struct accessed_node);
+ if (!i)
+ goto nomem;
+@@ -297,6 +305,7 @@ int access_node(struct connection *conn, struct node *node,
+ i->ta_node = true;
+ }
+ }
++ trans->nodes++;
+ list_add_tail(&i->list, &trans->accessed);
+ }
+
+diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h
+index 0093cac807e3..e3cbd6b23095 100644
+--- a/tools/xenstore/xenstored_transaction.h
++++ b/tools/xenstore/xenstored_transaction.h
+@@ -39,8 +39,8 @@ void transaction_entry_inc(struct transaction *trans, unsigned int domid);
+ void transaction_entry_dec(struct transaction *trans, unsigned int domid);
+
+ /* This node was accessed. */
+-int access_node(struct connection *conn, struct node *node,
+- enum node_access_type type, TDB_DATA *key);
++int __must_check access_node(struct connection *conn, struct node *node,
++ enum node_access_type type, TDB_DATA *key);
+
+ /* Queue watches for a modified node. */
+ void queue_watches(struct connection *conn, const char *name, bool watch_exact);
+--
+2.37.4
+
diff --git a/0094-tools-xenstore-move-the-call-of-setup_structure-to-d.patch b/0094-tools-xenstore-move-the-call-of-setup_structure-to-d.patch
new file mode 100644
index 0000000..4cebe89
--- /dev/null
+++ b/0094-tools-xenstore-move-the-call-of-setup_structure-to-d.patch
@@ -0,0 +1,96 @@
+From ccef72b6a885714dae0b6f1accb33042ee40e108 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:09 +0200
+Subject: [PATCH 094/126] tools/xenstore: move the call of setup_structure() to
+ dom0 introduction
+
+Setting up the basic structure when introducing dom0 has the advantage
+to be able to add proper node memory accounting for the added nodes
+later.
+
+This makes it possible to do proper node accounting, too.
+
+An additional requirement to make that work fine is to correct the
+owner of the created nodes to be dom0_domid instead of domid 0.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 60e2f6020dea7f616857b8fc1141b1c085d88761)
+---
+ tools/xenstore/xenstored_core.c | 9 ++++-----
+ tools/xenstore/xenstored_core.h | 1 +
+ tools/xenstore/xenstored_domain.c | 3 +++
+ 3 files changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 050d6f651ae9..51af74390cbe 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1940,7 +1940,8 @@ static int tdb_flags;
+ static void manual_node(const char *name, const char *child)
+ {
+ struct node *node;
+- struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
++ struct xs_permissions perms = { .id = dom0_domid,
++ .perms = XS_PERM_NONE };
+
+ node = talloc_zero(NULL, struct node);
+ if (!node)
+@@ -1979,7 +1980,7 @@ static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...)
+ }
+ }
+
+-static void setup_structure(bool live_update)
++void setup_structure(bool live_update)
+ {
+ char *tdbname;
+
+@@ -2002,6 +2003,7 @@ static void setup_structure(bool live_update)
+ manual_node("/", "tool");
+ manual_node("/tool", "xenstored");
+ manual_node("/tool/xenstored", NULL);
++ domain_entry_fix(dom0_domid, 3, true);
+ }
+
+ check_store();
+@@ -2512,9 +2514,6 @@ int main(int argc, char *argv[])
+
+ init_pipe(reopen_log_pipe);
+
+- /* Setup the database */
+- setup_structure(live_update);
+-
+ /* Listen to hypervisor. */
+ if (!no_domain_init && !live_update) {
+ domain_init(-1);
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 1b3bd5ca563a..459698d8407a 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -224,6 +224,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+ struct node *read_node(struct connection *conn, const void *ctx,
+ const char *name);
+
++void setup_structure(bool live_update);
+ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read);
+ struct connection *get_connection_by_id(unsigned int conn_id);
+ void check_store(void);
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index 3c27973fb836..0dd75a6a2194 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -476,6 +476,9 @@ static struct domain *introduce_domain(const void *ctx,
+ }
+ domain->interface = interface;
+
++ if (is_master_domain)
++ setup_structure(restore);
++
+ /* Now domain belongs to its connection. */
+ talloc_steal(domain->conn, domain);
+
+--
+2.37.4
+
diff --git a/0095-tools-xenstore-add-infrastructure-to-keep-track-of-p.patch b/0095-tools-xenstore-add-infrastructure-to-keep-track-of-p.patch
new file mode 100644
index 0000000..f826f80
--- /dev/null
+++ b/0095-tools-xenstore-add-infrastructure-to-keep-track-of-p.patch
@@ -0,0 +1,289 @@
+From aa29eb624797fb6825e4a23071c88417672868a4 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:09 +0200
+Subject: [PATCH 095/126] tools/xenstore: add infrastructure to keep track of
+ per domain memory usage
+
+The amount of memory a domain can consume in Xenstore is limited by
+various quota today, but even with sane quota a domain can still
+consume rather large memory quantities.
+
+Add the infrastructure for keeping track of the amount of memory a
+domain is consuming in Xenstore. Note that this is only the memory a
+domain has direct control over, so any internal administration data
+needed by Xenstore only is not being accounted for.
+
+There are two quotas defined: a soft quota which will result in a
+warning issued via syslog() when it is exceeded, and a hard quota
+resulting in a stop of accepting further requests or watch events as
+long as the hard quota would be violated by accepting those.
+
+Setting any of those quotas to 0 will disable it.
+
+As default values use 2MB per domain for the soft limit (this basically
+covers the allowed case to create 1000 nodes needing 2kB each), and
+2.5MB for the hard limit.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 0d4a8ec7a93faedbe54fd197db146de628459e77)
+---
+ tools/xenstore/xenstored_core.c | 30 ++++++++--
+ tools/xenstore/xenstored_core.h | 2 +
+ tools/xenstore/xenstored_domain.c | 93 +++++++++++++++++++++++++++++++
+ tools/xenstore/xenstored_domain.h | 20 +++++++
+ 4 files changed, 139 insertions(+), 6 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 51af74390cbe..eeb0d893e8c3 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -109,6 +109,8 @@ int quota_nb_perms_per_node = 5;
+ int quota_trans_nodes = 1024;
+ int quota_max_path_len = XENSTORE_REL_PATH_MAX;
+ int quota_req_outstanding = 20;
++int quota_memory_per_domain_soft = 2 * 1024 * 1024; /* 2 MB */
++int quota_memory_per_domain_hard = 2 * 1024 * 1024 + 512 * 1024; /* 2.5 MB */
+
+ unsigned int timeout_watch_event_msec = 20000;
+
+@@ -2304,7 +2306,14 @@ static void usage(void)
+ " quotas are:\n"
+ " transaction-nodes: number of accessed node per\n"
+ " transaction\n"
++" memory: total used memory per domain for nodes,\n"
++" transactions, watches and requests, above\n"
++" which Xenstore will stop talking to domain\n"
+ " outstanding: number of outstanding requests\n"
++" -q, --quota-soft <what>=<nb> set a soft quota <what> to the value <nb>,\n"
++" causing a warning to be issued via syslog() if the\n"
++" limit is violated, allowed quotas are:\n"
++" memory: see above\n"
+ " -w, --timeout <what>=<seconds> set the timeout in seconds for <what>,\n"
+ " allowed timeout candidates are:\n"
+ " watch-event: time a watch-event is kept pending\n"
+@@ -2331,6 +2340,7 @@ static struct option options[] = {
+ { "perm-nb", 1, NULL, 'A' },
+ { "path-max", 1, NULL, 'M' },
+ { "quota", 1, NULL, 'Q' },
++ { "quota-soft", 1, NULL, 'q' },
+ { "timeout", 1, NULL, 'w' },
+ { "no-recovery", 0, NULL, 'R' },
+ { "internal-db", 0, NULL, 'I' },
+@@ -2379,7 +2389,7 @@ static void set_timeout(const char *arg)
+ barf("unknown timeout \"%s\"\n", arg);
+ }
+
+-static void set_quota(const char *arg)
++static void set_quota(const char *arg, bool soft)
+ {
+ const char *eq = strchr(arg, '=');
+ int val;
+@@ -2387,11 +2397,16 @@ static void set_quota(const char *arg)
+ if (!eq)
+ barf("quotas must be specified via <what>=<nb>\n");
+ val = get_optval_int(eq + 1);
+- if (what_matches(arg, "outstanding"))
++ if (what_matches(arg, "outstanding") && !soft)
+ quota_req_outstanding = val;
+- else if (what_matches(arg, "transaction-nodes"))
++ else if (what_matches(arg, "transaction-nodes") && !soft)
+ quota_trans_nodes = val;
+- else
++ else if (what_matches(arg, "memory")) {
++ if (soft)
++ quota_memory_per_domain_soft = val;
++ else
++ quota_memory_per_domain_hard = val;
++ } else
+ barf("unknown quota \"%s\"\n", arg);
+ }
+
+@@ -2409,7 +2424,7 @@ int main(int argc, char *argv[])
+ orig_argc = argc;
+ orig_argv = argv;
+
+- while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:T:RVW:w:U",
++ while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:q:T:RVW:w:U",
+ options, NULL)) != -1) {
+ switch (opt) {
+ case 'D':
+@@ -2459,7 +2474,10 @@ int main(int argc, char *argv[])
+ quota_max_path_len);
+ break;
+ case 'Q':
+- set_quota(optarg);
++ set_quota(optarg, false);
++ break;
++ case 'q':
++ set_quota(optarg, true);
+ break;
+ case 'w':
+ set_timeout(optarg);
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 459698d8407a..2fb37dbfe847 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -263,6 +263,8 @@ extern int priv_domid;
+ extern int quota_nb_entry_per_domain;
+ extern int quota_req_outstanding;
+ extern int quota_trans_nodes;
++extern int quota_memory_per_domain_soft;
++extern int quota_memory_per_domain_hard;
+
+ extern unsigned int timeout_watch_event_msec;
+
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index 0dd75a6a2194..ec542df6a67e 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -76,6 +76,13 @@ struct domain
+ /* number of entry from this domain in the store */
+ int nbentry;
+
++ /* Amount of memory allocated for this domain. */
++ int memory;
++ bool soft_quota_reported;
++ bool hard_quota_reported;
++ time_t mem_last_msg;
++#define MEM_WARN_MINTIME_SEC 10
++
+ /* number of watch for this domain */
+ int nbwatch;
+
+@@ -296,6 +303,9 @@ bool domain_can_read(struct connection *conn)
+ return false;
+ if (conn->domain->nboutstanding >= quota_req_outstanding)
+ return false;
++ if (conn->domain->memory >= quota_memory_per_domain_hard &&
++ quota_memory_per_domain_hard)
++ return false;
+ }
+
+ if (conn->is_ignored)
+@@ -956,6 +966,89 @@ int domain_entry(struct connection *conn)
+ : 0;
+ }
+
++static bool domain_chk_quota(struct domain *domain, int mem)
++{
++ time_t now;
++
++ if (!domain || !domid_is_unprivileged(domain->domid) ||
++ (domain->conn && domain->conn->is_ignored))
++ return false;
++
++ now = time(NULL);
++
++ if (mem >= quota_memory_per_domain_hard &&
++ quota_memory_per_domain_hard) {
++ if (domain->hard_quota_reported)
++ return true;
++ syslog(LOG_ERR, "Domain %u exceeds hard memory quota, Xenstore interface to domain stalled\n",
++ domain->domid);
++ domain->mem_last_msg = now;
++ domain->hard_quota_reported = true;
++ return true;
++ }
++
++ if (now - domain->mem_last_msg >= MEM_WARN_MINTIME_SEC) {
++ if (domain->hard_quota_reported) {
++ domain->mem_last_msg = now;
++ domain->hard_quota_reported = false;
++ syslog(LOG_INFO, "Domain %u below hard memory quota again\n",
++ domain->domid);
++ }
++ if (mem >= quota_memory_per_domain_soft &&
++ quota_memory_per_domain_soft &&
++ !domain->soft_quota_reported) {
++ domain->mem_last_msg = now;
++ domain->soft_quota_reported = true;
++ syslog(LOG_WARNING, "Domain %u exceeds soft memory quota\n",
++ domain->domid);
++ }
++ if (mem < quota_memory_per_domain_soft &&
++ domain->soft_quota_reported) {
++ domain->mem_last_msg = now;
++ domain->soft_quota_reported = false;
++ syslog(LOG_INFO, "Domain %u below soft memory quota again\n",
++ domain->domid);
++ }
++
++ }
++
++ return false;
++}
++
++int domain_memory_add(unsigned int domid, int mem, bool no_quota_check)
++{
++ struct domain *domain;
++
++ domain = find_domain_struct(domid);
++ if (domain) {
++ /*
++ * domain_chk_quota() will print warning and also store whether
++ * the soft/hard quota has been hit. So check no_quota_check
++ * *after*.
++ */
++ if (domain_chk_quota(domain, domain->memory + mem) &&
++ !no_quota_check)
++ return ENOMEM;
++ domain->memory += mem;
++ } else {
++ /*
++ * The domain the memory is to be accounted for should always
++ * exist, as accounting is done either for a domain related to
++ * the current connection, or for the domain owning a node
++ * (which is always existing, as the owner of the node is
++ * tested to exist and replaced by domid 0 if not).
++ * So not finding the related domain MUST be an error in the
++ * data base.
++ */
++ errno = ENOENT;
++ corrupt(NULL, "Accounting called for non-existing domain %u\n",
++ domid);
++ return ENOENT;
++ }
++
++ return 0;
++}
++
+ void domain_watch_inc(struct connection *conn)
+ {
+ if (!conn || !conn->domain)
+diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
+index cce13d14f016..571aa46d158e 100644
+--- a/tools/xenstore/xenstored_domain.h
++++ b/tools/xenstore/xenstored_domain.h
+@@ -65,6 +65,26 @@ int domain_entry_inc(struct connection *conn, struct node *);
+ void domain_entry_dec(struct connection *conn, struct node *);
+ int domain_entry_fix(unsigned int domid, int num, bool update);
+ int domain_entry(struct connection *conn);
++int domain_memory_add(unsigned int domid, int mem, bool no_quota_check);
++
++/*
++ * domain_memory_add_chk(): to be used when memory quota should be checked.
++ * Not to be used when specifying a negative mem value, as lowering the used
++ * memory should always be allowed.
++ */
++static inline int domain_memory_add_chk(unsigned int domid, int mem)
++{
++ return domain_memory_add(domid, mem, false);
++}
++/*
++ * domain_memory_add_nochk(): to be used when memory quota should not be
++ * checked, e.g. when lowering memory usage, or in an error case for undoing
++ * a previous memory adjustment.
++ */
++static inline void domain_memory_add_nochk(unsigned int domid, int mem)
++{
++ domain_memory_add(domid, mem, true);
++}
+ void domain_watch_inc(struct connection *conn);
+ void domain_watch_dec(struct connection *conn);
+ int domain_watch(struct connection *conn);
+--
+2.37.4
+
diff --git a/0096-tools-xenstore-add-memory-accounting-for-responses.patch b/0096-tools-xenstore-add-memory-accounting-for-responses.patch
new file mode 100644
index 0000000..6174433
--- /dev/null
+++ b/0096-tools-xenstore-add-memory-accounting-for-responses.patch
@@ -0,0 +1,82 @@
+From 0113aacb3d791600668cd7703f6f12ed94fc6d03 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:09 +0200
+Subject: [PATCH 096/126] tools/xenstore: add memory accounting for responses
+
+Add the memory accounting for queued responses.
+
+In case adding a watch event for a guest is causing the hard memory
+quota of that guest to be violated, the event is dropped. This will
+ensure that it is impossible to drive another guest past its memory
+quota by generating insane amounts of events for that guest. This is
+especially important for protecting driver domains from that attack
+vector.
+
+This is part of XSA-326 / CVE-2022-42315.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit f6d00133643a524d2138c9e3f192bbde719050ba)
+---
+ tools/xenstore/xenstored_core.c | 22 +++++++++++++++++++---
+ 1 file changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index eeb0d893e8c3..2e02b577c912 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -260,6 +260,8 @@ static void free_buffered_data(struct buffered_data *out,
+ }
+ }
+
++ domain_memory_add_nochk(conn->id, -out->hdr.msg.len - sizeof(out->hdr));
++
+ if (out->hdr.msg.type == XS_WATCH_EVENT) {
+ req = out->pend.req;
+ if (req) {
+@@ -904,11 +906,14 @@ void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ bdata->timeout_msec = 0;
+ bdata->watch_event = false;
+
+- if (len <= DEFAULT_BUFFER_SIZE)
++ if (len <= DEFAULT_BUFFER_SIZE) {
+ bdata->buffer = bdata->default_buffer;
+- else {
++ /* Don't check quota, path might be used for returning error. */
++ domain_memory_add_nochk(conn->id, len + sizeof(bdata->hdr));
++ } else {
+ bdata->buffer = talloc_array(bdata, char, len);
+- if (!bdata->buffer) {
++ if (!bdata->buffer ||
++ domain_memory_add_chk(conn->id, len + sizeof(bdata->hdr))) {
+ send_error(conn, ENOMEM);
+ return;
+ }
+@@ -973,6 +978,11 @@ void send_event(struct buffered_data *req, struct connection *conn,
+ }
+ }
+
++ if (domain_memory_add_chk(conn->id, len + sizeof(bdata->hdr))) {
++ talloc_free(bdata);
++ return;
++ }
++
+ if (timeout_watch_event_msec && domain_is_unprivileged(conn)) {
+ bdata->timeout_msec = get_now_msec() + timeout_watch_event_msec;
+ if (!conn->timeout_msec)
+@@ -2940,6 +2950,12 @@ static void add_buffered_data(struct buffered_data *bdata,
+ */
+ if (bdata->hdr.msg.type != XS_WATCH_EVENT)
+ domain_outstanding_inc(conn);
++ /*
++ * We are restoring the state after Live-Update and the new quota may
++ * be smaller. So ignore it. The limit will be applied for any resource
++ * after the state has been fully restored.
++ */
++ domain_memory_add_nochk(conn->id, len + sizeof(bdata->hdr));
+ }
+
+ void read_state_buffered_data(const void *ctx, struct connection *conn,
+--
+2.37.4
+
diff --git a/0097-tools-xenstore-add-memory-accounting-for-watches.patch b/0097-tools-xenstore-add-memory-accounting-for-watches.patch
new file mode 100644
index 0000000..dd2ed61
--- /dev/null
+++ b/0097-tools-xenstore-add-memory-accounting-for-watches.patch
@@ -0,0 +1,96 @@
+From 9c2e71fe0611da9ed2ebbf2362a9bb05d42bf0c3 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:10 +0200
+Subject: [PATCH 097/126] tools/xenstore: add memory accounting for watches
+
+Add the memory accounting for registered watches.
+
+When a socket connection is destroyed, the associated watches are
+removed, too. In order to keep memory accounting correct the watches
+must be removed explicitly via a call of conn_delete_all_watches() from
+destroy_conn().
+
+This is part of XSA-326 / CVE-2022-42315.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 7f9978a2cc37aaffab2fb09593bc598c0712a69b)
+---
+ tools/xenstore/xenstored_core.c | 1 +
+ tools/xenstore/xenstored_watch.c | 13 ++++++++++---
+ 2 files changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 2e02b577c912..b1a4575929bd 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -457,6 +457,7 @@ static int destroy_conn(void *_conn)
+ }
+
+ conn_free_buffered_data(conn);
++ conn_delete_all_watches(conn);
+ list_for_each_entry(req, &conn->ref_list, list)
+ req->on_ref_list = false;
+
+diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
+index 1d664e3d6b72..0d5858df5bdd 100644
+--- a/tools/xenstore/xenstored_watch.c
++++ b/tools/xenstore/xenstored_watch.c
+@@ -211,7 +211,7 @@ static int check_watch_path(struct connection *conn, const void *ctx,
+ }
+
+ static struct watch *add_watch(struct connection *conn, char *path, char *token,
+- bool relative)
++ bool relative, bool no_quota_check)
+ {
+ struct watch *watch;
+
+@@ -222,6 +222,9 @@ static struct watch *add_watch(struct connection *conn, char *path, char *token,
+ watch->token = talloc_strdup(watch, token);
+ if (!watch->node || !watch->token)
+ goto nomem;
++ if (domain_memory_add(conn->id, strlen(path) + strlen(token),
++ no_quota_check))
++ goto nomem;
+
+ if (relative)
+ watch->relative_path = get_implicit_path(conn);
+@@ -265,7 +268,7 @@ int do_watch(struct connection *conn, struct buffered_data *in)
+ if (domain_watch(conn) > quota_nb_watch_per_domain)
+ return E2BIG;
+
+- watch = add_watch(conn, vec[0], vec[1], relative);
++ watch = add_watch(conn, vec[0], vec[1], relative, false);
+ if (!watch)
+ return errno;
+
+@@ -296,6 +299,8 @@ int do_unwatch(struct connection *conn, struct buffered_data *in)
+ list_for_each_entry(watch, &conn->watches, list) {
+ if (streq(watch->node, node) && streq(watch->token, vec[1])) {
+ list_del(&watch->list);
++ domain_memory_add_nochk(conn->id, -strlen(watch->node) -
++ strlen(watch->token));
+ talloc_free(watch);
+ domain_watch_dec(conn);
+ send_ack(conn, XS_UNWATCH);
+@@ -311,6 +316,8 @@ void conn_delete_all_watches(struct connection *conn)
+
+ while ((watch = list_top(&conn->watches, struct watch, list))) {
+ list_del(&watch->list);
++ domain_memory_add_nochk(conn->id, -strlen(watch->node) -
++ strlen(watch->token));
+ talloc_free(watch);
+ domain_watch_dec(conn);
+ }
+@@ -373,7 +380,7 @@ void read_state_watch(const void *ctx, const void *state)
+ if (!path)
+ barf("allocation error for read watch");
+
+- if (!add_watch(conn, path, token, relative))
++ if (!add_watch(conn, path, token, relative, true))
+ barf("error adding watch");
+ }
+
+--
+2.37.4
+
diff --git a/0098-tools-xenstore-add-memory-accounting-for-nodes.patch b/0098-tools-xenstore-add-memory-accounting-for-nodes.patch
new file mode 100644
index 0000000..f2f8e4f
--- /dev/null
+++ b/0098-tools-xenstore-add-memory-accounting-for-nodes.patch
@@ -0,0 +1,342 @@
+From 32efe29a00efab2896cc973e966a35ecad556495 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:10 +0200
+Subject: [PATCH 098/126] tools/xenstore: add memory accounting for nodes
+
+Add the memory accounting for Xenstore nodes. In order to make this
+not too complicated allow for some sloppiness when writing nodes. Any
+hard quota violation will result in no further requests to be accepted.
+
+This is part of XSA-326 / CVE-2022-42315.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 00e9e32d022be1afc144b75acdaeba8393e63315)
+---
+ tools/xenstore/xenstored_core.c | 140 ++++++++++++++++++++++---
+ tools/xenstore/xenstored_core.h | 12 +++
+ tools/xenstore/xenstored_transaction.c | 16 ++-
+ 3 files changed, 151 insertions(+), 17 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index b1a4575929bd..f27d5c0101bc 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -556,6 +556,117 @@ void set_tdb_key(const char *name, TDB_DATA *key)
+ key->dsize = strlen(name);
+ }
+
++static void get_acc_data(TDB_DATA *key, struct node_account_data *acc)
++{
++ TDB_DATA old_data;
++ struct xs_tdb_record_hdr *hdr;
++
++ if (acc->memory < 0) {
++ old_data = tdb_fetch(tdb_ctx, *key);
++ /* No check for error, as the node might not exist. */
++ if (old_data.dptr == NULL) {
++ acc->memory = 0;
++ } else {
++ hdr = (void *)old_data.dptr;
++ acc->memory = old_data.dsize;
++ acc->domid = hdr->perms[0].id;
++ }
++ talloc_free(old_data.dptr);
++ }
++}
++
++/*
++ * Per-transaction nodes need to be accounted for the transaction owner.
++ * Those nodes are stored in the data base with the transaction generation
++ * count prepended (e.g. 123/local/domain/...). So testing for the node's
++ * key not to start with "/" is sufficient.
++ */
++static unsigned int get_acc_domid(struct connection *conn, TDB_DATA *key,
++ unsigned int domid)
++{
++ return (!conn || key->dptr[0] == '/') ? domid : conn->id;
++}
++
++int do_tdb_write(struct connection *conn, TDB_DATA *key, TDB_DATA *data,
++ struct node_account_data *acc, bool no_quota_check)
++{
++ struct xs_tdb_record_hdr *hdr = (void *)data->dptr;
++ struct node_account_data old_acc = {};
++ unsigned int old_domid, new_domid;
++ int ret;
++
++ if (!acc)
++ old_acc.memory = -1;
++ else
++ old_acc = *acc;
++
++ get_acc_data(key, &old_acc);
++ old_domid = get_acc_domid(conn, key, old_acc.domid);
++ new_domid = get_acc_domid(conn, key, hdr->perms[0].id);
++
++ /*
++ * Don't check for ENOENT, as we want to be able to switch orphaned
++ * nodes to new owners.
++ */
++ if (old_acc.memory)
++ domain_memory_add_nochk(old_domid,
++ -old_acc.memory - key->dsize);
++ ret = domain_memory_add(new_domid, data->dsize + key->dsize,
++ no_quota_check);
++ if (ret) {
++ /* Error path, so no quota check. */
++ if (old_acc.memory)
++ domain_memory_add_nochk(old_domid,
++ old_acc.memory + key->dsize);
++ return ret;
++ }
++
++ /* TDB should set errno, but doesn't even set ecode AFAICT. */
++ if (tdb_store(tdb_ctx, *key, *data, TDB_REPLACE) != 0) {
++ domain_memory_add_nochk(new_domid, -data->dsize - key->dsize);
++ /* Error path, so no quota check. */
++ if (old_acc.memory)
++ domain_memory_add_nochk(old_domid,
++ old_acc.memory + key->dsize);
++ errno = EIO;
++ return errno;
++ }
++
++ if (acc) {
++ /* Don't use new_domid, as it might be a transaction node. */
++ acc->domid = hdr->perms[0].id;
++ acc->memory = data->dsize;
++ }
++
++ return 0;
++}
++
++int do_tdb_delete(struct connection *conn, TDB_DATA *key,
++ struct node_account_data *acc)
++{
++ struct node_account_data tmp_acc;
++ unsigned int domid;
++
++ if (!acc) {
++ acc = &tmp_acc;
++ acc->memory = -1;
++ }
++
++ get_acc_data(key, acc);
++
++ if (tdb_delete(tdb_ctx, *key)) {
++ errno = EIO;
++ return errno;
++ }
++
++ if (acc->memory) {
++ domid = get_acc_domid(conn, key, acc->domid);
++ domain_memory_add_nochk(domid, -acc->memory - key->dsize);
++ }
++
++ return 0;
++}
++
+ /*
+ * If it fails, returns NULL and sets errno.
+ * Temporary memory allocations will be done with ctx.
+@@ -609,9 +720,15 @@ struct node *read_node(struct connection *conn, const void *ctx,
+
+ /* Permissions are struct xs_permissions. */
+ node->perms.p = hdr->perms;
++ node->acc.domid = node->perms.p[0].id;
++ node->acc.memory = data.dsize;
+ if (domain_adjust_node_perms(conn, node))
+ goto error;
+
++ /* If owner is gone reset currently accounted memory size. */
++ if (node->acc.domid != node->perms.p[0].id)
++ node->acc.memory = 0;
++
+ /* Data is binary blob (usually ascii, no nul). */
+ node->data = node->perms.p + hdr->num_perms;
+ /* Children is strings, nul separated. */
+@@ -680,12 +797,9 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+ p += node->datalen;
+ memcpy(p, node->children, node->childlen);
+
+- /* TDB should set errno, but doesn't even set ecode AFAICT. */
+- if (tdb_store(tdb_ctx, *key, data, TDB_REPLACE) != 0) {
+- corrupt(conn, "Write of %s failed", key->dptr);
+- errno = EIO;
+- return errno;
+- }
++ if (do_tdb_write(conn, key, &data, &node->acc, no_quota_check))
++ return EIO;
++
+ return 0;
+ }
+
+@@ -1188,7 +1302,7 @@ static void delete_node_single(struct connection *conn, struct node *node)
+ if (access_node(conn, node, NODE_ACCESS_DELETE, &key))
+ return;
+
+- if (tdb_delete(tdb_ctx, key) != 0) {
++ if (do_tdb_delete(conn, &key, &node->acc) != 0) {
+ corrupt(conn, "Could not delete '%s'", node->name);
+ return;
+ }
+@@ -1261,6 +1375,7 @@ static struct node *construct_node(struct connection *conn, const void *ctx,
+ /* No children, no data */
+ node->children = node->data = NULL;
+ node->childlen = node->datalen = 0;
++ node->acc.memory = 0;
+ node->parent = parent;
+ return node;
+
+@@ -1269,17 +1384,17 @@ nomem:
+ return NULL;
+ }
+
+-static void destroy_node_rm(struct node *node)
++static void destroy_node_rm(struct connection *conn, struct node *node)
+ {
+ if (streq(node->name, "/"))
+ corrupt(NULL, "Destroying root node!");
+
+- tdb_delete(tdb_ctx, node->key);
++ do_tdb_delete(conn, &node->key, &node->acc);
+ }
+
+ static int destroy_node(struct connection *conn, struct node *node)
+ {
+- destroy_node_rm(node);
++ destroy_node_rm(conn, node);
+ domain_entry_dec(conn, node);
+
+ /*
+@@ -1331,7 +1446,7 @@ static struct node *create_node(struct connection *conn, const void *ctx,
+ /* Account for new node */
+ if (i->parent) {
+ if (domain_entry_inc(conn, i)) {
+- destroy_node_rm(i);
++ destroy_node_rm(conn, i);
+ return NULL;
+ }
+ }
+@@ -2192,7 +2307,7 @@ static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
+ if (!hashtable_search(reachable, name)) {
+ log("clean_store: '%s' is orphaned!", name);
+ if (recovery) {
+- tdb_delete(tdb, key);
++ do_tdb_delete(NULL, &key, NULL);
+ }
+ }
+
+@@ -3030,6 +3145,7 @@ void read_state_node(const void *ctx, const void *state)
+ if (!node)
+ barf("allocation error restoring node");
+
++ node->acc.memory = 0;
+ node->name = name;
+ node->generation = ++generation;
+ node->datalen = sn->data_len;
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 2fb37dbfe847..5c1b574bffe6 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -169,6 +169,11 @@ struct node_perms {
+ struct xs_permissions *p;
+ };
+
++struct node_account_data {
++ unsigned int domid;
++ int memory; /* -1 if unknown */
++};
++
+ struct node {
+ const char *name;
+ /* Key used to update TDB */
+@@ -191,6 +196,9 @@ struct node {
+ /* Children, each nul-terminated. */
+ unsigned int childlen;
+ char *children;
++
++ /* Allocation information for node currently in store. */
++ struct node_account_data acc;
+ };
+
+ /* Return the only argument in the input. */
+@@ -300,6 +308,10 @@ extern xengnttab_handle **xgt_handle;
+ int remember_string(struct hashtable *hash, const char *str);
+
+ void set_tdb_key(const char *name, TDB_DATA *key);
++int do_tdb_write(struct connection *conn, TDB_DATA *key, TDB_DATA *data,
++ struct node_account_data *acc, bool no_quota_check);
++int do_tdb_delete(struct connection *conn, TDB_DATA *key,
++ struct node_account_data *acc);
+
+ void conn_free_buffered_data(struct connection *conn);
+
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index 7bd41eb475e3..ace9a11d77bb 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -153,6 +153,9 @@ struct transaction
+ /* List of all transactions active on this connection. */
+ struct list_head list;
+
++ /* Connection this transaction is associated with. */
++ struct connection *conn;
++
+ /* Connection-local identifier for this transaction. */
+ uint32_t id;
+
+@@ -286,6 +289,8 @@ int access_node(struct connection *conn, struct node *node,
+
+ introduce = true;
+ i->ta_node = false;
++ /* acc.memory < 0 means "unknown, get size from TDB". */
++ node->acc.memory = -1;
+
+ /*
+ * Additional transaction-specific node for read type. We only
+@@ -410,11 +415,11 @@ static int finalize_transaction(struct connection *conn,
+ goto err;
+ hdr = (void *)data.dptr;
+ hdr->generation = ++generation;
+- ret = tdb_store(tdb_ctx, key, data,
+- TDB_REPLACE);
++ ret = do_tdb_write(conn, &key, &data, NULL,
++ true);
+ talloc_free(data.dptr);
+ } else {
+- ret = tdb_delete(tdb_ctx, key);
++ ret = do_tdb_delete(conn, &key, NULL);
+ }
+ if (ret)
+ goto err;
+@@ -425,7 +430,7 @@ static int finalize_transaction(struct connection *conn,
+ }
+ }
+
+- if (i->ta_node && tdb_delete(tdb_ctx, ta_key))
++ if (i->ta_node && do_tdb_delete(conn, &ta_key, NULL))
+ goto err;
+ list_del(&i->list);
+ talloc_free(i);
+@@ -453,7 +458,7 @@ static int destroy_transaction(void *_transaction)
+ i->node);
+ if (trans_name) {
+ set_tdb_key(trans_name, &key);
+- tdb_delete(tdb_ctx, key);
++ do_tdb_delete(trans->conn, &key, NULL);
+ }
+ }
+ list_del(&i->list);
+@@ -497,6 +502,7 @@ int do_transaction_start(struct connection *conn, struct buffered_data *in)
+
+ INIT_LIST_HEAD(&trans->accessed);
+ INIT_LIST_HEAD(&trans->changed_domains);
++ trans->conn = conn;
+ trans->fail = false;
+ trans->generation = ++generation;
+
+--
+2.37.4
+
diff --git a/0099-tools-xenstore-add-exports-for-quota-variables.patch b/0099-tools-xenstore-add-exports-for-quota-variables.patch
new file mode 100644
index 0000000..98f341f
--- /dev/null
+++ b/0099-tools-xenstore-add-exports-for-quota-variables.patch
@@ -0,0 +1,62 @@
+From 1fc3ecc9bfead0a50d8e05de983ed2a8f02fa03c Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:10 +0200
+Subject: [PATCH 099/126] tools/xenstore: add exports for quota variables
+
+Some quota variables are not exported via header files.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 1da16d5990b5f7752657fca3e948f735177ea9ad)
+---
+ tools/xenstore/xenstored_core.h | 5 +++++
+ tools/xenstore/xenstored_transaction.c | 1 -
+ tools/xenstore/xenstored_watch.c | 2 --
+ 3 files changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 5c1b574bffe6..1eb3708f82dd 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -268,6 +268,11 @@ extern TDB_CONTEXT *tdb_ctx;
+ extern int dom0_domid;
+ extern int dom0_event;
+ extern int priv_domid;
++extern int quota_nb_watch_per_domain;
++extern int quota_max_transaction;
++extern int quota_max_entry_size;
++extern int quota_nb_perms_per_node;
++extern int quota_max_path_len;
+ extern int quota_nb_entry_per_domain;
+ extern int quota_req_outstanding;
+ extern int quota_trans_nodes;
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index ace9a11d77bb..28774813de83 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -175,7 +175,6 @@ struct transaction
+ bool fail;
+ };
+
+-extern int quota_max_transaction;
+ uint64_t generation;
+
+ static struct accessed_node *find_accessed_node(struct transaction *trans,
+diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
+index 0d5858df5bdd..4970e9f1a1b9 100644
+--- a/tools/xenstore/xenstored_watch.c
++++ b/tools/xenstore/xenstored_watch.c
+@@ -31,8 +31,6 @@
+ #include "xenstored_domain.h"
+ #include "xenstored_transaction.h"
+
+-extern int quota_nb_watch_per_domain;
+-
+ struct watch
+ {
+ /* Watches on this connection */
+--
+2.37.4
+
diff --git a/0100-tools-xenstore-add-control-command-for-setting-and-s.patch b/0100-tools-xenstore-add-control-command-for-setting-and-s.patch
new file mode 100644
index 0000000..e721645
--- /dev/null
+++ b/0100-tools-xenstore-add-control-command-for-setting-and-s.patch
@@ -0,0 +1,248 @@
+From 4d30175fdadb75c55acb8abb186727eda7cd5585 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:10 +0200
+Subject: [PATCH 100/126] tools/xenstore: add control command for setting and
+ showing quota
+
+Add a xenstore-control command "quota" to:
+- show current quota settings
+- change quota settings
+- show current quota related values of a domain
+
+Note that in the case the new quota is lower than existing one,
+Xenstored may continue to handle requests from a domain exceeding the
+new limit (depends on which one has been broken) and the amount of
+resource used will not change. However the domain will not be able to
+create more resource (associated to the quota) until it is back to below
+the limit.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 9c484bef83496b683b0087e3bd2a560da4aa37af)
+---
+ docs/misc/xenstore.txt | 11 +++
+ tools/xenstore/xenstored_control.c | 111 +++++++++++++++++++++++++++++
+ tools/xenstore/xenstored_domain.c | 33 +++++++++
+ tools/xenstore/xenstored_domain.h | 2 +
+ 4 files changed, 157 insertions(+)
+
+diff --git a/docs/misc/xenstore.txt b/docs/misc/xenstore.txt
+index 334dc8b6fdf5..a7d006519ae8 100644
+--- a/docs/misc/xenstore.txt
++++ b/docs/misc/xenstore.txt
+@@ -366,6 +366,17 @@ CONTROL <command>|[<parameters>|]
+ print|<string>
+ print <string> to syslog (xenstore runs as daemon) or
+ to console (xenstore runs as stubdom)
++ quota|[set <name> <val>|<domid>]
++ without parameters: print the current quota settings
++ with "set <name> <val>": set the quota <name> to new value
++ <val> (The admin should make sure all the domain usage is
++ below the quota. If it is not, then Xenstored may continue to
++ handle requests from the domain as long as the resource
++ violating the new quota setting isn't increased further)
++ with "<domid>": print quota related accounting data for
++ the domain <domid>
++ quota-soft|[set <name> <val>]
++ like the "quota" command, but for soft-quota.
+ help <supported-commands>
+ return list of supported commands for CONTROL
+
+diff --git a/tools/xenstore/xenstored_control.c b/tools/xenstore/xenstored_control.c
+index 211fe1fd9b37..980279fa53ff 100644
+--- a/tools/xenstore/xenstored_control.c
++++ b/tools/xenstore/xenstored_control.c
+@@ -148,6 +148,115 @@ static int do_control_log(void *ctx, struct connection *conn,
+ return 0;
+ }
+
++struct quota {
++ const char *name;
++ int *quota;
++ const char *descr;
++};
++
++static const struct quota hard_quotas[] = {
++ { "nodes", &quota_nb_entry_per_domain, "Nodes per domain" },
++ { "watches", &quota_nb_watch_per_domain, "Watches per domain" },
++ { "transactions", &quota_max_transaction, "Transactions per domain" },
++ { "outstanding", &quota_req_outstanding,
++ "Outstanding requests per domain" },
++ { "transaction-nodes", &quota_trans_nodes,
++ "Max. number of accessed nodes per transaction" },
++ { "memory", &quota_memory_per_domain_hard,
++ "Total Xenstore memory per domain (error level)" },
++ { "node-size", &quota_max_entry_size, "Max. size of a node" },
++ { "path-max", &quota_max_path_len, "Max. length of a node path" },
++ { "permissions", &quota_nb_perms_per_node,
++ "Max. number of permissions per node" },
++ { NULL, NULL, NULL }
++};
++
++static const struct quota soft_quotas[] = {
++ { "memory", &quota_memory_per_domain_soft,
++ "Total Xenstore memory per domain (warning level)" },
++ { NULL, NULL, NULL }
++};
++
++static int quota_show_current(const void *ctx, struct connection *conn,
++ const struct quota *quotas)
++{
++ char *resp;
++ unsigned int i;
++
++ resp = talloc_strdup(ctx, "Quota settings:\n");
++ if (!resp)
++ return ENOMEM;
++
++ for (i = 0; quotas[i].quota; i++) {
++ resp = talloc_asprintf_append(resp, "%-17s: %8d %s\n",
++ quotas[i].name, *quotas[i].quota,
++ quotas[i].descr);
++ if (!resp)
++ return ENOMEM;
++ }
++
++ send_reply(conn, XS_CONTROL, resp, strlen(resp) + 1);
++
++ return 0;
++}
++
++static int quota_set(const void *ctx, struct connection *conn,
++ char **vec, int num, const struct quota *quotas)
++{
++ unsigned int i;
++ int val;
++
++ if (num != 2)
++ return EINVAL;
++
++ val = atoi(vec[1]);
++ if (val < 1)
++ return EINVAL;
++
++ for (i = 0; quotas[i].quota; i++) {
++ if (!strcmp(vec[0], quotas[i].name)) {
++ *quotas[i].quota = val;
++ send_ack(conn, XS_CONTROL);
++ return 0;
++ }
++ }
++
++ return EINVAL;
++}
++
++static int quota_get(const void *ctx, struct connection *conn,
++ char **vec, int num)
++{
++ if (num != 1)
++ return EINVAL;
++
++ return domain_get_quota(ctx, conn, atoi(vec[0]));
++}
++
++static int do_control_quota(void *ctx, struct connection *conn,
++ char **vec, int num)
++{
++ if (num == 0)
++ return quota_show_current(ctx, conn, hard_quotas);
++
++ if (!strcmp(vec[0], "set"))
++ return quota_set(ctx, conn, vec + 1, num - 1, hard_quotas);
++
++ return quota_get(ctx, conn, vec, num);
++}
++
++static int do_control_quota_s(void *ctx, struct connection *conn,
++ char **vec, int num)
++{
++ if (num == 0)
++ return quota_show_current(ctx, conn, soft_quotas);
++
++ if (!strcmp(vec[0], "set"))
++ return quota_set(ctx, conn, vec + 1, num - 1, soft_quotas);
++
++ return EINVAL;
++}
++
+ #ifdef __MINIOS__
+ static int do_control_memreport(void *ctx, struct connection *conn,
+ char **vec, int num)
+@@ -777,6 +886,8 @@ static struct cmd_s cmds[] = {
+ { "memreport", do_control_memreport, "[<file>]" },
+ #endif
+ { "print", do_control_print, "<string>" },
++ { "quota", do_control_quota, "[set <name> <val>|<domid>]" },
++ { "quota-soft", do_control_quota_s, "[set <name> <val>]" },
+ { "help", do_control_help, "" },
+ };
+
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index ec542df6a67e..3d5142581332 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -31,6 +31,7 @@
+ #include "xenstored_domain.h"
+ #include "xenstored_transaction.h"
+ #include "xenstored_watch.h"
++#include "xenstored_control.h"
+
+ #include <xenevtchn.h>
+ #include <xenctrl.h>
+@@ -351,6 +352,38 @@ static struct domain *find_domain_struct(unsigned int domid)
+ return NULL;
+ }
+
++int domain_get_quota(const void *ctx, struct connection *conn,
++ unsigned int domid)
++{
++ struct domain *d = find_domain_struct(domid);
++ char *resp;
++ int ta;
++
++ if (!d)
++ return ENOENT;
++
++ ta = d->conn ? d->conn->transaction_started : 0;
++ resp = talloc_asprintf(ctx, "Domain %u:\n", domid);
++ if (!resp)
++ return ENOMEM;
++
++#define ent(t, e) \
++ resp = talloc_asprintf_append(resp, "%-16s: %8d\n", #t, e); \
++ if (!resp) return ENOMEM
++
++ ent(nodes, d->nbentry);
++ ent(watches, d->nbwatch);
++ ent(transactions, ta);
++ ent(outstanding, d->nboutstanding);
++ ent(memory, d->memory);
++
++#undef ent
++
++ send_reply(conn, XS_CONTROL, resp, strlen(resp) + 1);
++
++ return 0;
++}
++
+ static struct domain *alloc_domain(const void *context, unsigned int domid)
+ {
+ struct domain *domain;
+diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
+index 571aa46d158e..0f883936f413 100644
+--- a/tools/xenstore/xenstored_domain.h
++++ b/tools/xenstore/xenstored_domain.h
+@@ -91,6 +91,8 @@ int domain_watch(struct connection *conn);
+ void domain_outstanding_inc(struct connection *conn);
+ void domain_outstanding_dec(struct connection *conn);
+ void domain_outstanding_domid_dec(unsigned int domid);
++int domain_get_quota(const void *ctx, struct connection *conn,
++ unsigned int domid);
+
+ /* Special node permission handling. */
+ int set_perms_special(struct connection *conn, const char *name,
+--
+2.37.4
+
diff --git a/0101-tools-ocaml-xenstored-Synchronise-defaults-with-oxen.patch b/0101-tools-ocaml-xenstored-Synchronise-defaults-with-oxen.patch
new file mode 100644
index 0000000..7df76b1
--- /dev/null
+++ b/0101-tools-ocaml-xenstored-Synchronise-defaults-with-oxen.patch
@@ -0,0 +1,63 @@
+From 8fabb963e662a544a397cb2afefb2b15af07ace9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 12 Oct 2022 19:13:01 +0100
+Subject: [PATCH 101/126] tools/ocaml/xenstored: Synchronise defaults with
+ oxenstore.conf.in
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We currently have 2 different set of defaults in upstream Xen git tree:
+* defined in the source code, only used if there is no config file
+* defined in the oxenstored.conf.in upstream Xen
+
+An oxenstored.conf file is not mandatory, and if missing, maxrequests in
+particular has an unsafe default.
+
+Resync the defaults from oxenstored.conf.in into the source code.
+
+This is part of XSA-326 / CVE-2022-42316.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit 84734955d4bf629ba459a74773afcde50a52236f)
+---
+ tools/ocaml/xenstored/define.ml | 6 +++---
+ tools/ocaml/xenstored/quota.ml | 4 ++--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml
+index ebe18b8e312c..6b06f808595b 100644
+--- a/tools/ocaml/xenstored/define.ml
++++ b/tools/ocaml/xenstored/define.ml
+@@ -21,9 +21,9 @@ let xs_daemon_socket = Paths.xen_run_stored ^ "/socket"
+
+ let default_config_dir = Paths.xen_config_dir
+
+-let maxwatch = ref (50)
+-let maxtransaction = ref (20)
+-let maxrequests = ref (-1) (* maximum requests per transaction *)
++let maxwatch = ref (100)
++let maxtransaction = ref (10)
++let maxrequests = ref (1024) (* maximum requests per transaction *)
+
+ let conflict_burst_limit = ref 5.0
+ let conflict_max_history_seconds = ref 0.05
+diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml
+index abcac912805a..6e3d6401ae89 100644
+--- a/tools/ocaml/xenstored/quota.ml
++++ b/tools/ocaml/xenstored/quota.ml
+@@ -20,8 +20,8 @@ exception Transaction_opened
+
+ let warn fmt = Logging.warn "quota" fmt
+ let activate = ref true
+-let maxent = ref (10000)
+-let maxsize = ref (4096)
++let maxent = ref (1000)
++let maxsize = ref (2048)
+
+ type t = {
+ maxent: int; (* max entities per domU *)
+--
+2.37.4
+
diff --git a/0102-tools-ocaml-xenstored-Check-for-maxrequests-before-p.patch b/0102-tools-ocaml-xenstored-Check-for-maxrequests-before-p.patch
new file mode 100644
index 0000000..bc741ae
--- /dev/null
+++ b/0102-tools-ocaml-xenstored-Check-for-maxrequests-before-p.patch
@@ -0,0 +1,101 @@
+From 45816222bb3da04f4cd3388efc46d127d48b8906 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Thu, 28 Jul 2022 17:08:15 +0100
+Subject: [PATCH 102/126] tools/ocaml/xenstored: Check for maxrequests before
+ performing operations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Previously we'd perform the operation, record the updated tree in the
+transaction record, then try to insert a watchop path and the reply packet.
+
+If we exceeded max requests we would've returned EQUOTA, but still:
+* have performed the operation on the transaction's tree
+* have recorded the watchop, making this queue effectively unbounded
+
+It is better if we check whether we'd have room to store the operation before
+performing the transaction, and raise EQUOTA there. Then the transaction
+record won't grow.
+
+This is part of XSA-326 / CVE-2022-42317.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit 329f4d1a6535c6c5a34025ca0d03fc5c7228fcff)
+---
+ tools/ocaml/xenstored/process.ml | 4 +++-
+ tools/ocaml/xenstored/transaction.ml | 16 ++++++++++++----
+ 2 files changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
+index 27790d4a5c41..dd58e6979cf9 100644
+--- a/tools/ocaml/xenstored/process.ml
++++ b/tools/ocaml/xenstored/process.ml
+@@ -389,6 +389,7 @@ let input_handle_error ~cons ~doms ~fct ~con ~t ~req =
+ let reply_error e =
+ Packet.Error e in
+ try
++ Transaction.check_quota_exn ~perm:(Connection.get_perm con) t;
+ fct con t doms cons req.Packet.data
+ with
+ | Define.Invalid_path -> reply_error "EINVAL"
+@@ -681,9 +682,10 @@ let process_packet ~store ~cons ~doms ~con ~req =
+ in
+
+ let response = try
++ Transaction.check_quota_exn ~perm:(Connection.get_perm con) t;
+ if tid <> Transaction.none then
+ (* Remember the request and response for this operation in case we need to replay the transaction *)
+- Transaction.add_operation ~perm:(Connection.get_perm con) t req response;
++ Transaction.add_operation t req response;
+ response
+ with Quota.Limit_reached ->
+ Packet.Error "EQUOTA"
+diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml
+index 17b1bdf2eaf9..294143e2335b 100644
+--- a/tools/ocaml/xenstored/transaction.ml
++++ b/tools/ocaml/xenstored/transaction.ml
+@@ -85,6 +85,7 @@ type t = {
+ oldroot: Store.Node.t;
+ mutable paths: (Xenbus.Xb.Op.operation * Store.Path.t) list;
+ mutable operations: (Packet.request * Packet.response) list;
++ mutable quota_reached: bool;
+ mutable read_lowpath: Store.Path.t option;
+ mutable write_lowpath: Store.Path.t option;
+ }
+@@ -127,6 +128,7 @@ let make ?(internal=false) id store =
+ oldroot = Store.get_root store;
+ paths = [];
+ operations = [];
++ quota_reached = false;
+ read_lowpath = None;
+ write_lowpath = None;
+ } in
+@@ -143,13 +145,19 @@ let get_root t = Store.get_root t.store
+
+ let is_read_only t = t.paths = []
+ let add_wop t ty path = t.paths <- (ty, path) :: t.paths
+-let add_operation ~perm t request response =
++let get_operations t = List.rev t.operations
++
++let check_quota_exn ~perm t =
+ if !Define.maxrequests >= 0
+ && not (Perms.Connection.is_dom0 perm)
+- && List.length t.operations >= !Define.maxrequests
+- then raise Quota.Limit_reached;
++ && (t.quota_reached || List.length t.operations >= !Define.maxrequests)
++ then begin
++ t.quota_reached <- true;
++ raise Quota.Limit_reached;
++ end
++
++let add_operation t request response =
+ t.operations <- (request, response) :: t.operations
+-let get_operations t = List.rev t.operations
+ let set_read_lowpath t path = t.read_lowpath <- get_lowest path t.read_lowpath
+ let set_write_lowpath t path = t.write_lowpath <- get_lowest path t.write_lowpath
+
+--
+2.37.4
+
diff --git a/0103-tools-ocaml-GC-parameter-tuning.patch b/0103-tools-ocaml-GC-parameter-tuning.patch
new file mode 100644
index 0000000..d1473df
--- /dev/null
+++ b/0103-tools-ocaml-GC-parameter-tuning.patch
@@ -0,0 +1,126 @@
+From 9f89883fabd53cb7873cc31778887ba2a1228dd8 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 12 Oct 2022 19:13:07 +0100
+Subject: [PATCH 103/126] tools/ocaml: GC parameter tuning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+By default the OCaml garbage collector would return memory to the OS only
+after unused memory is 5x live memory. Tweak this to 120% instead, which
+would match the major GC speed.
+
+This is part of XSA-326.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit 4a8bacff20b857ca0d628ef5525877ade11f2a42)
+---
+ tools/ocaml/xenstored/define.ml | 1 +
+ tools/ocaml/xenstored/xenstored.ml | 64 ++++++++++++++++++++++++++++++
+ 2 files changed, 65 insertions(+)
+
+diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml
+index 6b06f808595b..ba63a8147e09 100644
+--- a/tools/ocaml/xenstored/define.ml
++++ b/tools/ocaml/xenstored/define.ml
+@@ -25,6 +25,7 @@ let maxwatch = ref (100)
+ let maxtransaction = ref (10)
+ let maxrequests = ref (1024) (* maximum requests per transaction *)
+
++let gc_max_overhead = ref 120 (* 120% see comment in xenstored.ml *)
+ let conflict_burst_limit = ref 5.0
+ let conflict_max_history_seconds = ref 0.05
+ let conflict_rate_limit_is_aggregate = ref true
+diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
+index d44ae673c42a..3b57ad016dfb 100644
+--- a/tools/ocaml/xenstored/xenstored.ml
++++ b/tools/ocaml/xenstored/xenstored.ml
+@@ -104,6 +104,7 @@ let parse_config filename =
+ ("quota-maxsize", Config.Set_int Quota.maxsize);
+ ("quota-maxrequests", Config.Set_int Define.maxrequests);
+ ("quota-path-max", Config.Set_int Define.path_max);
++ ("gc-max-overhead", Config.Set_int Define.gc_max_overhead);
+ ("test-eagain", Config.Set_bool Transaction.test_eagain);
+ ("persistent", Config.Set_bool Disk.enable);
+ ("xenstored-log-file", Config.String Logging.set_xenstored_log_destination);
+@@ -265,6 +266,67 @@ let to_file store cons fds file =
+ (fun () -> close_out channel)
+ end
+
++(*
++ By default OCaml's GC only returns memory to the OS when it exceeds a
++ configurable 'max overhead' setting.
++ The default is 500%, that is 5/6th of the OCaml heap needs to be free
++ and only 1/6th live for a compaction to be triggerred that would
++ release memory back to the OS.
++ If the limit is not hit then the OCaml process can reuse that memory
++ for its own purposes, but other processes won't be able to use it.
++
++ There is also a 'space overhead' setting that controls how much work
++ each major GC slice does, and by default aims at having no more than
++ 80% or 120% (depending on version) garbage values compared to live
++ values.
++ This doesn't have as much relevance to memory returned to the OS as
++ long as space_overhead <= max_overhead, because compaction is only
++ triggerred at the end of major GC cycles.
++
++ The defaults are too large once the program starts using ~100MiB of
++ memory, at which point ~500MiB would be unavailable to other processes
++ (which would be fine if this was the main process in this VM, but it is
++ not).
++
++ Max overhead can also be set to 0, however this is for testing purposes
++ only (setting it lower than 'space overhead' wouldn't help because the
++ major GC wouldn't run fast enough, and compaction does have a
++ performance cost: we can only compact contiguous regions, so memory has
++ to be moved around).
++
++ Max overhead controls how often the heap is compacted, which is useful
++ if there are burst of activity followed by long periods of idle state,
++ or if a domain quits, etc. Compaction returns memory to the OS.
++
++ wasted = live * space_overhead / 100
++
++ For globally overriding the GC settings one can use OCAMLRUNPARAM,
++ however we provide a config file override to be consistent with other
++ oxenstored settings.
++
++ One might want to dynamically adjust the overhead setting based on used
++ memory, i.e. to use a fixed upper bound in bytes, not percentage. However
++ measurements show that such adjustments increase GC overhead massively,
++ while still not guaranteeing that memory is returned any more quickly
++ than with a percentage based setting.
++
++ The allocation policy could also be tweaked, e.g. first fit would reduce
++ fragmentation and thus memory usage, but the documentation warns that it
++ can be sensibly slower, and indeed one of our own testcases can trigger
++ such a corner case where it is multiple times slower, so it is best to keep
++ the default allocation policy (next-fit/best-fit depending on version).
++
++ There are other tweaks that can be attempted in the future, e.g. setting
++ 'ulimit -v' to 75% of RAM, however getting the kernel to actually return
++ NULL from allocations is difficult even with that setting, and without a
++ NULL the emergency GC won't be triggerred.
++ Perhaps cgroup limits could help, but for now tweak the safest only.
++*)
++
++let tweak_gc () =
++ Gc.set { (Gc.get ()) with Gc.max_overhead = !Define.gc_max_overhead }
++
++
+ let _ =
+ let cf = do_argv in
+ let pidfile =
+@@ -274,6 +336,8 @@ let _ =
+ default_pidfile
+ in
+
++ tweak_gc ();
++
+ (try
+ Unixext.mkdir_rec (Filename.dirname pidfile) 0o755
+ with _ ->
+--
+2.37.4
+
diff --git a/0104-tools-ocaml-libs-xb-hide-type-of-Xb.t.patch b/0104-tools-ocaml-libs-xb-hide-type-of-Xb.t.patch
new file mode 100644
index 0000000..15f69b0
--- /dev/null
+++ b/0104-tools-ocaml-libs-xb-hide-type-of-Xb.t.patch
@@ -0,0 +1,92 @@
+From bbb4ceab25124646fa845855f3cb95ae15d0c3f2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Fri, 29 Jul 2022 18:53:29 +0100
+Subject: [PATCH 104/126] tools/ocaml/libs/xb: hide type of Xb.t
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Hiding the type will make it easier to change the implementation
+in the future without breaking code that relies on it.
+
+No functional change.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit 7ade30a1451734d041363c750a65d322e25b47ba)
+---
+ tools/ocaml/libs/xb/xb.ml | 3 +++
+ tools/ocaml/libs/xb/xb.mli | 9 ++-------
+ tools/ocaml/xenstored/connection.ml | 8 ++------
+ 3 files changed, 7 insertions(+), 13 deletions(-)
+
+diff --git a/tools/ocaml/libs/xb/xb.ml b/tools/ocaml/libs/xb/xb.ml
+index 104d319d7747..8404ddd8a682 100644
+--- a/tools/ocaml/libs/xb/xb.ml
++++ b/tools/ocaml/libs/xb/xb.ml
+@@ -196,6 +196,9 @@ let peek_output con = Queue.peek con.pkt_out
+ let input_len con = Queue.length con.pkt_in
+ let has_in_packet con = Queue.length con.pkt_in > 0
+ let get_in_packet con = Queue.pop con.pkt_in
++let has_partial_input con = match con.partial_in with
++ | HaveHdr _ -> true
++ | NoHdr (n, _) -> n < Partial.header_size ()
+ let has_more_input con =
+ match con.backend with
+ | Fd _ -> false
+diff --git a/tools/ocaml/libs/xb/xb.mli b/tools/ocaml/libs/xb/xb.mli
+index 3a00da6cddc1..794e35bb343e 100644
+--- a/tools/ocaml/libs/xb/xb.mli
++++ b/tools/ocaml/libs/xb/xb.mli
+@@ -66,13 +66,7 @@ type backend_mmap = {
+ type backend_fd = { fd : Unix.file_descr; }
+ type backend = Fd of backend_fd | Xenmmap of backend_mmap
+ type partial_buf = HaveHdr of Partial.pkt | NoHdr of int * bytes
+-type t = {
+- backend : backend;
+- pkt_in : Packet.t Queue.t;
+- pkt_out : Packet.t Queue.t;
+- mutable partial_in : partial_buf;
+- mutable partial_out : string;
+-}
++type t
+ val init_partial_in : unit -> partial_buf
+ val reconnect : t -> unit
+ val queue : t -> Packet.t -> unit
+@@ -97,6 +91,7 @@ val has_output : t -> bool
+ val peek_output : t -> Packet.t
+ val input_len : t -> int
+ val has_in_packet : t -> bool
++val has_partial_input : t -> bool
+ val get_in_packet : t -> Packet.t
+ val has_more_input : t -> bool
+ val is_selectable : t -> bool
+diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml
+index 65f99ea6f28a..38b47363a173 100644
+--- a/tools/ocaml/xenstored/connection.ml
++++ b/tools/ocaml/xenstored/connection.ml
+@@ -125,9 +125,7 @@ let get_perm con =
+ let set_target con target_domid =
+ con.perm <- Perms.Connection.set_target (get_perm con) ~perms:[Perms.READ; Perms.WRITE] target_domid
+
+-let is_backend_mmap con = match con.xb.Xenbus.Xb.backend with
+- | Xenbus.Xb.Xenmmap _ -> true
+- | _ -> false
++let is_backend_mmap con = Xenbus.Xb.is_mmap con.xb
+
+ let send_reply con tid rid ty data =
+ if (String.length data) > xenstore_payload_max && (is_backend_mmap con) then
+@@ -280,9 +278,7 @@ let get_transaction con tid =
+
+ let do_input con = Xenbus.Xb.input con.xb
+ let has_input con = Xenbus.Xb.has_in_packet con.xb
+-let has_partial_input con = match con.xb.Xenbus.Xb.partial_in with
+- | HaveHdr _ -> true
+- | NoHdr (n, _) -> n < Xenbus.Partial.header_size ()
++let has_partial_input con = Xenbus.Xb.has_partial_input con.xb
+ let pop_in con = Xenbus.Xb.get_in_packet con.xb
+ let has_more_input con = Xenbus.Xb.has_more_input con.xb
+
+--
+2.37.4
+
diff --git a/0105-tools-ocaml-Change-Xb.input-to-return-Packet.t-optio.patch b/0105-tools-ocaml-Change-Xb.input-to-return-Packet.t-optio.patch
new file mode 100644
index 0000000..2691ae4
--- /dev/null
+++ b/0105-tools-ocaml-Change-Xb.input-to-return-Packet.t-optio.patch
@@ -0,0 +1,225 @@
+From fccdca83a4425b0e30ec9e29e9a5909e1a55b80d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 12 Oct 2022 19:13:02 +0100
+Subject: [PATCH 105/126] tools/ocaml: Change Xb.input to return Packet.t
+ option
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The queue here would only ever hold at most one element. This will simplify
+follow-up patches.
+
+This is part of XSA-326.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit c0a86a462721008eca5ff733660de094d3c34bc7)
+---
+ tools/ocaml/libs/xb/xb.ml | 18 +++++-------------
+ tools/ocaml/libs/xb/xb.mli | 5 +----
+ tools/ocaml/libs/xs/xsraw.ml | 20 ++++++--------------
+ tools/ocaml/xenstored/connection.ml | 4 +---
+ tools/ocaml/xenstored/process.ml | 15 +++++++--------
+ 5 files changed, 20 insertions(+), 42 deletions(-)
+
+diff --git a/tools/ocaml/libs/xb/xb.ml b/tools/ocaml/libs/xb/xb.ml
+index 8404ddd8a682..165fd4a1edf4 100644
+--- a/tools/ocaml/libs/xb/xb.ml
++++ b/tools/ocaml/libs/xb/xb.ml
+@@ -45,7 +45,6 @@ type partial_buf = HaveHdr of Partial.pkt | NoHdr of int * bytes
+ type t =
+ {
+ backend: backend;
+- pkt_in: Packet.t Queue.t;
+ pkt_out: Packet.t Queue.t;
+ mutable partial_in: partial_buf;
+ mutable partial_out: string;
+@@ -62,7 +61,6 @@ let reconnect t = match t.backend with
+ Xs_ring.close backend.mmap;
+ backend.eventchn_notify ();
+ (* Clear our old connection state *)
+- Queue.clear t.pkt_in;
+ Queue.clear t.pkt_out;
+ t.partial_in <- init_partial_in ();
+ t.partial_out <- ""
+@@ -124,7 +122,6 @@ let output con =
+
+ (* NB: can throw Reconnect *)
+ let input con =
+- let newpacket = ref false in
+ let to_read =
+ match con.partial_in with
+ | HaveHdr partial_pkt -> Partial.to_complete partial_pkt
+@@ -143,21 +140,19 @@ let input con =
+ if Partial.to_complete partial_pkt = 0 then (
+ let pkt = Packet.of_partialpkt partial_pkt in
+ con.partial_in <- init_partial_in ();
+- Queue.push pkt con.pkt_in;
+- newpacket := true
+- )
++ Some pkt
++ ) else None
+ | NoHdr (i, buf) ->
+ (* we complete the partial header *)
+ if sz > 0 then
+ Bytes.blit b 0 buf (Partial.header_size () - i) sz;
+ con.partial_in <- if sz = i then
+- HaveHdr (Partial.of_string (Bytes.to_string buf)) else NoHdr (i - sz, buf)
+- );
+- !newpacket
++ HaveHdr (Partial.of_string (Bytes.to_string buf)) else NoHdr (i - sz, buf);
++ None
++ )
+
+ let newcon backend = {
+ backend = backend;
+- pkt_in = Queue.create ();
+ pkt_out = Queue.create ();
+ partial_in = init_partial_in ();
+ partial_out = "";
+@@ -193,9 +188,6 @@ let has_output con = has_new_output con || has_old_output con
+
+ let peek_output con = Queue.peek con.pkt_out
+
+-let input_len con = Queue.length con.pkt_in
+-let has_in_packet con = Queue.length con.pkt_in > 0
+-let get_in_packet con = Queue.pop con.pkt_in
+ let has_partial_input con = match con.partial_in with
+ | HaveHdr _ -> true
+ | NoHdr (n, _) -> n < Partial.header_size ()
+diff --git a/tools/ocaml/libs/xb/xb.mli b/tools/ocaml/libs/xb/xb.mli
+index 794e35bb343e..91c682162cea 100644
+--- a/tools/ocaml/libs/xb/xb.mli
++++ b/tools/ocaml/libs/xb/xb.mli
+@@ -77,7 +77,7 @@ val write_fd : backend_fd -> 'a -> string -> int -> int
+ val write_mmap : backend_mmap -> 'a -> string -> int -> int
+ val write : t -> string -> int -> int
+ val output : t -> bool
+-val input : t -> bool
++val input : t -> Packet.t option
+ val newcon : backend -> t
+ val open_fd : Unix.file_descr -> t
+ val open_mmap : Xenmmap.mmap_interface -> (unit -> unit) -> t
+@@ -89,10 +89,7 @@ val has_new_output : t -> bool
+ val has_old_output : t -> bool
+ val has_output : t -> bool
+ val peek_output : t -> Packet.t
+-val input_len : t -> int
+-val has_in_packet : t -> bool
+ val has_partial_input : t -> bool
+-val get_in_packet : t -> Packet.t
+ val has_more_input : t -> bool
+ val is_selectable : t -> bool
+ val get_fd : t -> Unix.file_descr
+diff --git a/tools/ocaml/libs/xs/xsraw.ml b/tools/ocaml/libs/xs/xsraw.ml
+index d982fb24dbb1..451f8b38dbcc 100644
+--- a/tools/ocaml/libs/xs/xsraw.ml
++++ b/tools/ocaml/libs/xs/xsraw.ml
+@@ -94,26 +94,18 @@ let pkt_send con =
+ done
+
+ (* receive one packet - can sleep *)
+-let pkt_recv con =
+- let workdone = ref false in
+- while not !workdone
+- do
+- workdone := Xb.input con.xb
+- done;
+- Xb.get_in_packet con.xb
++let rec pkt_recv con =
++ match Xb.input con.xb with
++ | Some packet -> packet
++ | None -> pkt_recv con
+
+ let pkt_recv_timeout con timeout =
+ let fd = Xb.get_fd con.xb in
+ let r, _, _ = Unix.select [ fd ] [] [] timeout in
+ if r = [] then
+ true, None
+- else (
+- let workdone = Xb.input con.xb in
+- if workdone then
+- false, (Some (Xb.get_in_packet con.xb))
+- else
+- false, None
+- )
++ else
++ false, Xb.input con.xb
+
+ let queue_watchevent con data =
+ let ls = split_string ~limit:2 '\000' data in
+diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml
+index 38b47363a173..cc20e047d2b9 100644
+--- a/tools/ocaml/xenstored/connection.ml
++++ b/tools/ocaml/xenstored/connection.ml
+@@ -277,9 +277,7 @@ let get_transaction con tid =
+ Hashtbl.find con.transactions tid
+
+ let do_input con = Xenbus.Xb.input con.xb
+-let has_input con = Xenbus.Xb.has_in_packet con.xb
+ let has_partial_input con = Xenbus.Xb.has_partial_input con.xb
+-let pop_in con = Xenbus.Xb.get_in_packet con.xb
+ let has_more_input con = Xenbus.Xb.has_more_input con.xb
+
+ let has_output con = Xenbus.Xb.has_output con.xb
+@@ -307,7 +305,7 @@ let is_bad con = match con.dom with None -> false | Some dom -> Domain.is_bad_do
+ Restrictions below can be relaxed once xenstored learns to dump more
+ of its live state in a safe way *)
+ let has_extra_connection_data con =
+- let has_in = has_input con || has_partial_input con in
++ let has_in = has_partial_input con in
+ let has_out = has_output con in
+ let has_socket = con.dom = None in
+ let has_nondefault_perms = make_perm con.dom <> con.perm in
+diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
+index dd58e6979cf9..cbf708213796 100644
+--- a/tools/ocaml/xenstored/process.ml
++++ b/tools/ocaml/xenstored/process.ml
+@@ -195,10 +195,9 @@ let parse_live_update args =
+ | _ when Unix.gettimeofday () < t.deadline -> false
+ | l ->
+ warn "timeout reached: have to wait, migrate or shutdown %d domains:" (List.length l);
+- let msgs = List.rev_map (fun con -> Printf.sprintf "%s: %d tx, in: %b, out: %b, perm: %s"
++ let msgs = List.rev_map (fun con -> Printf.sprintf "%s: %d tx, out: %b, perm: %s"
+ (Connection.get_domstr con)
+ (Connection.number_of_transactions con)
+- (Connection.has_input con)
+ (Connection.has_output con)
+ (Connection.get_perm con |> Perms.Connection.to_string)
+ ) l in
+@@ -706,16 +705,17 @@ let do_input store cons doms con =
+ info "%s requests a reconnect" (Connection.get_domstr con);
+ History.reconnect con;
+ info "%s reconnection complete" (Connection.get_domstr con);
+- false
++ None
+ | Failure exp ->
+ error "caught exception %s" exp;
+ error "got a bad client %s" (sprintf "%-8s" (Connection.get_domstr con));
+ Connection.mark_as_bad con;
+- false
++ None
+ in
+
+- if newpacket then (
+- let packet = Connection.pop_in con in
++ match newpacket with
++ | None -> ()
++ | Some packet ->
+ let tid, rid, ty, data = Xenbus.Xb.Packet.unpack packet in
+ let req = {Packet.tid=tid; Packet.rid=rid; Packet.ty=ty; Packet.data=data} in
+
+@@ -725,8 +725,7 @@ let do_input store cons doms con =
+ (Xenbus.Xb.Op.to_string ty) (sanitize_data data); *)
+ process_packet ~store ~cons ~doms ~con ~req;
+ write_access_log ~ty ~tid ~con:(Connection.get_domstr con) ~data;
+- Connection.incr_ops con;
+- )
++ Connection.incr_ops con
+
+ let do_output _store _cons _doms con =
+ if Connection.has_output con then (
+--
+2.37.4
+
diff --git a/0106-tools-ocaml-xb-Add-BoundedQueue.patch b/0106-tools-ocaml-xb-Add-BoundedQueue.patch
new file mode 100644
index 0000000..c1f0385
--- /dev/null
+++ b/0106-tools-ocaml-xb-Add-BoundedQueue.patch
@@ -0,0 +1,133 @@
+From 9e5290daf923e84ca56a6f3d9fc6a333175ef0f9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 12 Oct 2022 19:13:03 +0100
+Subject: [PATCH 106/126] tools/ocaml/xb: Add BoundedQueue
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Ensures we cannot store more than [capacity] elements in a [Queue]. Replacing
+all Queue with this module will then ensure at compile time that all Queues
+are correctly bound checked.
+
+Each element in the queue has a class with its own limits. This, in a
+subsequent change, will ensure that command responses can proceed during a
+flood of watch events.
+
+No functional change.
+
+This is part of XSA-326.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit 19171fb5d888b4467a7073e8febc5e05540956e9)
+---
+ tools/ocaml/libs/xb/xb.ml | 92 +++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 92 insertions(+)
+
+diff --git a/tools/ocaml/libs/xb/xb.ml b/tools/ocaml/libs/xb/xb.ml
+index 165fd4a1edf4..4197a3888a68 100644
+--- a/tools/ocaml/libs/xb/xb.ml
++++ b/tools/ocaml/libs/xb/xb.ml
+@@ -17,6 +17,98 @@
+ module Op = struct include Op end
+ module Packet = struct include Packet end
+
++module BoundedQueue : sig
++ type ('a, 'b) t
++
++ (** [create ~capacity ~classify ~limit] creates a queue with maximum [capacity] elements.
++ This is burst capacity, each element is further classified according to [classify],
++ and each class can have its own [limit].
++ [capacity] is enforced as an overall limit.
++ The [limit] can be dynamic, and can be smaller than the number of elements already queued of that class,
++ in which case those elements are considered to use "burst capacity".
++ *)
++ val create: capacity:int -> classify:('a -> 'b) -> limit:('b -> int) -> ('a, 'b) t
++
++ (** [clear q] discards all elements from [q] *)
++ val clear: ('a, 'b) t -> unit
++
++ (** [can_push q] when [length q < capacity]. *)
++ val can_push: ('a, 'b) t -> 'b -> bool
++
++ (** [push e q] adds [e] at the end of queue [q] if [can_push q], or returns [None]. *)
++ val push: 'a -> ('a, 'b) t -> unit option
++
++ (** [pop q] removes and returns first element in [q], or raises [Queue.Empty]. *)
++ val pop: ('a, 'b) t -> 'a
++
++ (** [peek q] returns the first element in [q], or raises [Queue.Empty]. *)
++ val peek : ('a, 'b) t -> 'a
++
++ (** [length q] returns the current number of elements in [q] *)
++ val length: ('a, 'b) t -> int
++
++ (** [debug string_of_class q] prints queue usage statistics in an unspecified internal format. *)
++ val debug: ('b -> string) -> (_, 'b) t -> string
++end = struct
++ type ('a, 'b) t =
++ { q: 'a Queue.t
++ ; capacity: int
++ ; classify: 'a -> 'b
++ ; limit: 'b -> int
++ ; class_count: ('b, int) Hashtbl.t
++ }
++
++ let create ~capacity ~classify ~limit =
++ { capacity; q = Queue.create (); classify; limit; class_count = Hashtbl.create 3 }
++
++ let get_count t classification = try Hashtbl.find t.class_count classification with Not_found -> 0
++
++ let can_push_internal t classification class_count =
++ Queue.length t.q < t.capacity && class_count < t.limit classification
++
++ let ok = Some ()
++
++ let push e t =
++ let classification = t.classify e in
++ let class_count = get_count t classification in
++ if can_push_internal t classification class_count then begin
++ Queue.push e t.q;
++ Hashtbl.replace t.class_count classification (class_count + 1);
++ ok
++ end
++ else
++ None
++
++ let can_push t classification =
++ can_push_internal t classification @@ get_count t classification
++
++ let clear t =
++ Queue.clear t.q;
++ Hashtbl.reset t.class_count
++
++ let pop t =
++ let e = Queue.pop t.q in
++ let classification = t.classify e in
++ let () = match get_count t classification - 1 with
++ | 0 -> Hashtbl.remove t.class_count classification (* reduces memusage *)
++ | n -> Hashtbl.replace t.class_count classification n
++ in
++ e
++
++ let peek t = Queue.peek t.q
++ let length t = Queue.length t.q
++
++ let debug string_of_class t =
++ let b = Buffer.create 128 in
++ Printf.bprintf b "BoundedQueue capacity: %d, used: {" t.capacity;
++ Hashtbl.iter (fun packet_class count ->
++ Printf.bprintf b " %s: %d" (string_of_class packet_class) count
++ ) t.class_count;
++ Printf.bprintf b "}";
++ Buffer.contents b
++end
++
++
+ exception End_of_file
+ exception Eagain
+ exception Noent
+--
+2.37.4
+
diff --git a/0107-tools-ocaml-Limit-maximum-in-flight-requests-outstan.patch b/0107-tools-ocaml-Limit-maximum-in-flight-requests-outstan.patch
new file mode 100644
index 0000000..5f5c4b6
--- /dev/null
+++ b/0107-tools-ocaml-Limit-maximum-in-flight-requests-outstan.patch
@@ -0,0 +1,888 @@
+From 64048b4c218099b6adcf46cd7b4d1dc9c658009e Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 12 Oct 2022 19:13:04 +0100
+Subject: [PATCH 107/126] tools/ocaml: Limit maximum in-flight requests /
+ outstanding replies
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce a limit on the number of outstanding reply packets in the xenbus
+queue. This limits the number of in-flight requests: when the output queue is
+full we'll stop processing inputs until the output queue has room again.
+
+To avoid a busy loop on the Unix socket we only add it to the watched input
+file descriptor set if we'd be able to call `input` on it. Even though Dom0
+is trusted and exempt from quotas a flood of events might cause a backlog
+where events are produced faster than daemons in Dom0 can consume them, which
+could lead to an unbounded queue size and OOM.
+
+Therefore the xenbus queue limit must apply to all connections, Dom0 is not
+exempt from it, although if everything works correctly it will eventually
+catch up.
+
+This prevents a malicious guest from sending more commands while it has
+outstanding watch events or command replies in its input ring. However if it
+can cause the generation of watch events by other means (e.g. by Dom0, or
+another cooperative guest) and stop reading its own ring then watch events
+would've queued up without limit.
+
+The xenstore protocol doesn't have a back-pressure mechanism, and doesn't
+allow dropping watch events. In fact, dropping watch events is known to break
+some pieces of normal functionality. This leaves little choice to safely
+implement the xenstore protocol without exposing the xenstore daemon to
+out-of-memory attacks.
+
+Implement the fix as pipes with bounded buffers:
+* Use a bounded buffer for watch events
+* The watch structure will have a bounded receiving pipe of watch events
+* The source will have an "overflow" pipe of pending watch events it couldn't
+ deliver
+
+Items are queued up on one end and are sent as far along the pipe as possible:
+
+ source domain -> watch -> xenbus of target -> xenstore ring/socket of target
+
+If the pipe is "full" at any point then back-pressure is applied and we prevent
+more items from being queued up. For the source domain this means that we'll
+stop accepting new commands as long as its pipe buffer is not empty.
+
+Before we try to enqueue an item we first check whether it is possible to send
+it further down the pipe, by attempting to recursively flush the pipes. This
+ensures that we retain the order of events as much as possible.
+
+We might break causality of watch events if the target domain's queue is full
+and we need to start using the watch's queue. This is a breaking change in
+the xenstore protocol, but only for domains which are not processing their
+incoming ring as expected.
+
+When a watch is deleted its entire pending queue is dropped (no code is needed
+for that, because it is part of the 'watch' type).
+
+There is a cache of watches that have pending events that we attempt to flush
+at every cycle if possible.
+
+Introduce 3 limits here:
+* quota-maxwatchevents on watch event destination: when this is hit the
+ source will not be allowed to queue up more watch events.
+* quota-maxoustanding which is the number of responses not read from the ring:
+ once exceeded, no more inputs are processed until all outstanding replies
+ are consumed by the client.
+* overflow queue on the watch event source: all watches that cannot be stored
+ on destination are queued up here, a single command can trigger multiple
+ watches (e.g. due to recursion).
+
+The overflow queue currently doesn't have an upper bound, it is difficult to
+accurately calculate one as it depends on whether you are Dom0 and how many
+watches each path has registered and how many watch events you can trigger
+with a single command (e.g. a commit). However these events were already
+using memory, this just moves them elsewhere, and as long as we correctly
+block a domain it shouldn't result in unbounded memory usage.
+
+Note that Dom0 is not excluded from these checks, it is important that Dom0 is
+especially not excluded when it is the source, since there are many ways in
+which a guest could trigger Dom0 to send it watch events.
+
+This should protect against malicious frontends as long as the backend follows
+the PV xenstore protocol and only exposes paths needed by the frontend, and
+changes those paths at most once as a reaction to guest events, or protocol
+state.
+
+The queue limits are per watch, and per domain-pair, so even if one
+communication channel would be "blocked", others would keep working, and the
+domain itself won't get blocked as long as it doesn't overflow the queue of
+watch events.
+
+Similarly a malicious backend could cause the frontend to get blocked, but
+this watch queue protects the frontend as well as long as it follows the PV
+protocol. (Although note that protection against malicious backends is only a
+best effort at the moment)
+
+This is part of XSA-326 / CVE-2022-42318.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit 9284ae0c40fb5b9606947eaaec23dc71d0540e96)
+---
+ tools/ocaml/libs/xb/xb.ml | 61 +++++++--
+ tools/ocaml/libs/xb/xb.mli | 11 +-
+ tools/ocaml/libs/xs/queueop.ml | 25 ++--
+ tools/ocaml/libs/xs/xsraw.ml | 4 +-
+ tools/ocaml/xenstored/connection.ml | 155 +++++++++++++++++++++--
+ tools/ocaml/xenstored/connections.ml | 57 +++++++--
+ tools/ocaml/xenstored/define.ml | 7 +
+ tools/ocaml/xenstored/oxenstored.conf.in | 2 +
+ tools/ocaml/xenstored/process.ml | 31 ++++-
+ tools/ocaml/xenstored/xenstored.ml | 2 +
+ 10 files changed, 296 insertions(+), 59 deletions(-)
+
+diff --git a/tools/ocaml/libs/xb/xb.ml b/tools/ocaml/libs/xb/xb.ml
+index 4197a3888a68..b292ed7a874d 100644
+--- a/tools/ocaml/libs/xb/xb.ml
++++ b/tools/ocaml/libs/xb/xb.ml
+@@ -134,14 +134,44 @@ type backend = Fd of backend_fd | Xenmmap of backend_mmap
+
+ type partial_buf = HaveHdr of Partial.pkt | NoHdr of int * bytes
+
++(*
++ separate capacity reservation for replies and watch events:
++ this allows a domain to keep working even when under a constant flood of
++ watch events
++*)
++type capacity = { maxoutstanding: int; maxwatchevents: int }
++
++module Queue = BoundedQueue
++
++type packet_class =
++ | CommandReply
++ | Watchevent
++
++let string_of_packet_class = function
++ | CommandReply -> "command_reply"
++ | Watchevent -> "watch_event"
++
+ type t =
+ {
+ backend: backend;
+- pkt_out: Packet.t Queue.t;
++ pkt_out: (Packet.t, packet_class) Queue.t;
+ mutable partial_in: partial_buf;
+ mutable partial_out: string;
++ capacity: capacity
+ }
+
++let to_read con =
++ match con.partial_in with
++ | HaveHdr partial_pkt -> Partial.to_complete partial_pkt
++ | NoHdr (i, _) -> i
++
++let debug t =
++ Printf.sprintf "XenBus state: partial_in: %d needed, partial_out: %d bytes, pkt_out: %d packets, %s"
++ (to_read t)
++ (String.length t.partial_out)
++ (Queue.length t.pkt_out)
++ (BoundedQueue.debug string_of_packet_class t.pkt_out)
++
+ let init_partial_in () = NoHdr
+ (Partial.header_size (), Bytes.make (Partial.header_size()) '\000')
+
+@@ -199,7 +229,8 @@ let output con =
+ let s = if String.length con.partial_out > 0 then
+ con.partial_out
+ else if Queue.length con.pkt_out > 0 then
+- Packet.to_string (Queue.pop con.pkt_out)
++ let pkt = Queue.pop con.pkt_out in
++ Packet.to_string pkt
+ else
+ "" in
+ (* send data from s, and save the unsent data to partial_out *)
+@@ -212,12 +243,15 @@ let output con =
+ (* after sending one packet, partial is empty *)
+ con.partial_out = ""
+
++(* we can only process an input packet if we're guaranteed to have room
++ to store the response packet *)
++let can_input con = Queue.can_push con.pkt_out CommandReply
++
+ (* NB: can throw Reconnect *)
+ let input con =
+- let to_read =
+- match con.partial_in with
+- | HaveHdr partial_pkt -> Partial.to_complete partial_pkt
+- | NoHdr (i, _) -> i in
++ if not (can_input con) then None
++ else
++ let to_read = to_read con in
+
+ (* try to get more data from input stream *)
+ let b = Bytes.make to_read '\000' in
+@@ -243,11 +277,22 @@ let input con =
+ None
+ )
+
+-let newcon backend = {
++let classify t =
++ match t.Packet.ty with
++ | Op.Watchevent -> Watchevent
++ | _ -> CommandReply
++
++let newcon ~capacity backend =
++ let limit = function
++ | CommandReply -> capacity.maxoutstanding
++ | Watchevent -> capacity.maxwatchevents
++ in
++ {
+ backend = backend;
+- pkt_out = Queue.create ();
++ pkt_out = Queue.create ~capacity:(capacity.maxoutstanding + capacity.maxwatchevents) ~classify ~limit;
+ partial_in = init_partial_in ();
+ partial_out = "";
++ capacity = capacity;
+ }
+
+ let open_fd fd = newcon (Fd { fd = fd; })
+diff --git a/tools/ocaml/libs/xb/xb.mli b/tools/ocaml/libs/xb/xb.mli
+index 91c682162cea..71b2754ca788 100644
+--- a/tools/ocaml/libs/xb/xb.mli
++++ b/tools/ocaml/libs/xb/xb.mli
+@@ -66,10 +66,11 @@ type backend_mmap = {
+ type backend_fd = { fd : Unix.file_descr; }
+ type backend = Fd of backend_fd | Xenmmap of backend_mmap
+ type partial_buf = HaveHdr of Partial.pkt | NoHdr of int * bytes
++type capacity = { maxoutstanding: int; maxwatchevents: int }
+ type t
+ val init_partial_in : unit -> partial_buf
+ val reconnect : t -> unit
+-val queue : t -> Packet.t -> unit
++val queue : t -> Packet.t -> unit option
+ val read_fd : backend_fd -> 'a -> bytes -> int -> int
+ val read_mmap : backend_mmap -> 'a -> bytes -> int -> int
+ val read : t -> bytes -> int -> int
+@@ -78,13 +79,14 @@ val write_mmap : backend_mmap -> 'a -> string -> int -> int
+ val write : t -> string -> int -> int
+ val output : t -> bool
+ val input : t -> Packet.t option
+-val newcon : backend -> t
+-val open_fd : Unix.file_descr -> t
+-val open_mmap : Xenmmap.mmap_interface -> (unit -> unit) -> t
++val newcon : capacity:capacity -> backend -> t
++val open_fd : Unix.file_descr -> capacity:capacity -> t
++val open_mmap : Xenmmap.mmap_interface -> (unit -> unit) -> capacity:capacity -> t
+ val close : t -> unit
+ val is_fd : t -> bool
+ val is_mmap : t -> bool
+ val output_len : t -> int
++val can_input: t -> bool
+ val has_new_output : t -> bool
+ val has_old_output : t -> bool
+ val has_output : t -> bool
+@@ -93,3 +95,4 @@ val has_partial_input : t -> bool
+ val has_more_input : t -> bool
+ val is_selectable : t -> bool
+ val get_fd : t -> Unix.file_descr
++val debug: t -> string
+diff --git a/tools/ocaml/libs/xs/queueop.ml b/tools/ocaml/libs/xs/queueop.ml
+index 9ff5bbd529ce..4e532cdaeacb 100644
+--- a/tools/ocaml/libs/xs/queueop.ml
++++ b/tools/ocaml/libs/xs/queueop.ml
+@@ -16,9 +16,10 @@
+ open Xenbus
+
+ let data_concat ls = (String.concat "\000" ls) ^ "\000"
++let queue con pkt = let r = Xb.queue con pkt in assert (r <> None)
+ let queue_path ty (tid: int) (path: string) con =
+ let data = data_concat [ path; ] in
+- Xb.queue con (Xb.Packet.create tid 0 ty data)
++ queue con (Xb.Packet.create tid 0 ty data)
+
+ (* operations *)
+ let directory tid path con = queue_path Xb.Op.Directory tid path con
+@@ -27,48 +28,48 @@ let read tid path con = queue_path Xb.Op.Read tid path con
+ let getperms tid path con = queue_path Xb.Op.Getperms tid path con
+
+ let debug commands con =
+- Xb.queue con (Xb.Packet.create 0 0 Xb.Op.Debug (data_concat commands))
++ queue con (Xb.Packet.create 0 0 Xb.Op.Debug (data_concat commands))
+
+ let watch path data con =
+ let data = data_concat [ path; data; ] in
+- Xb.queue con (Xb.Packet.create 0 0 Xb.Op.Watch data)
++ queue con (Xb.Packet.create 0 0 Xb.Op.Watch data)
+
+ let unwatch path data con =
+ let data = data_concat [ path; data; ] in
+- Xb.queue con (Xb.Packet.create 0 0 Xb.Op.Unwatch data)
++ queue con (Xb.Packet.create 0 0 Xb.Op.Unwatch data)
+
+ let transaction_start con =
+- Xb.queue con (Xb.Packet.create 0 0 Xb.Op.Transaction_start (data_concat []))
++ queue con (Xb.Packet.create 0 0 Xb.Op.Transaction_start (data_concat []))
+
+ let transaction_end tid commit con =
+ let data = data_concat [ (if commit then "T" else "F"); ] in
+- Xb.queue con (Xb.Packet.create tid 0 Xb.Op.Transaction_end data)
++ queue con (Xb.Packet.create tid 0 Xb.Op.Transaction_end data)
+
+ let introduce domid mfn port con =
+ let data = data_concat [ Printf.sprintf "%u" domid;
+ Printf.sprintf "%nu" mfn;
+ string_of_int port; ] in
+- Xb.queue con (Xb.Packet.create 0 0 Xb.Op.Introduce data)
++ queue con (Xb.Packet.create 0 0 Xb.Op.Introduce data)
+
+ let release domid con =
+ let data = data_concat [ Printf.sprintf "%u" domid; ] in
+- Xb.queue con (Xb.Packet.create 0 0 Xb.Op.Release data)
++ queue con (Xb.Packet.create 0 0 Xb.Op.Release data)
+
+ let resume domid con =
+ let data = data_concat [ Printf.sprintf "%u" domid; ] in
+- Xb.queue con (Xb.Packet.create 0 0 Xb.Op.Resume data)
++ queue con (Xb.Packet.create 0 0 Xb.Op.Resume data)
+
+ let getdomainpath domid con =
+ let data = data_concat [ Printf.sprintf "%u" domid; ] in
+- Xb.queue con (Xb.Packet.create 0 0 Xb.Op.Getdomainpath data)
++ queue con (Xb.Packet.create 0 0 Xb.Op.Getdomainpath data)
+
+ let write tid path value con =
+ let data = path ^ "\000" ^ value (* no NULL at the end *) in
+- Xb.queue con (Xb.Packet.create tid 0 Xb.Op.Write data)
++ queue con (Xb.Packet.create tid 0 Xb.Op.Write data)
+
+ let mkdir tid path con = queue_path Xb.Op.Mkdir tid path con
+ let rm tid path con = queue_path Xb.Op.Rm tid path con
+
+ let setperms tid path perms con =
+ let data = data_concat [ path; perms ] in
+- Xb.queue con (Xb.Packet.create tid 0 Xb.Op.Setperms data)
++ queue con (Xb.Packet.create tid 0 Xb.Op.Setperms data)
+diff --git a/tools/ocaml/libs/xs/xsraw.ml b/tools/ocaml/libs/xs/xsraw.ml
+index 451f8b38dbcc..cbd17280600c 100644
+--- a/tools/ocaml/libs/xs/xsraw.ml
++++ b/tools/ocaml/libs/xs/xsraw.ml
+@@ -36,8 +36,10 @@ type con = {
+ let close con =
+ Xb.close con.xb
+
++let capacity = { Xb.maxoutstanding = 1; maxwatchevents = 0; }
++
+ let open_fd fd = {
+- xb = Xb.open_fd fd;
++ xb = Xb.open_fd ~capacity fd;
+ watchevents = Queue.create ();
+ }
+
+diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml
+index cc20e047d2b9..9624a5f9da2c 100644
+--- a/tools/ocaml/xenstored/connection.ml
++++ b/tools/ocaml/xenstored/connection.ml
+@@ -20,12 +20,84 @@ open Stdext
+
+ let xenstore_payload_max = 4096 (* xen/include/public/io/xs_wire.h *)
+
++type 'a bounded_sender = 'a -> unit option
++(** a bounded sender accepts an ['a] item and returns:
++ None - if there is no room to accept the item
++ Some () - if it has successfully accepted/sent the item
++ *)
++
++module BoundedPipe : sig
++ type 'a t
++
++ (** [create ~capacity ~destination] creates a bounded pipe with a
++ local buffer holding at most [capacity] items. Once the buffer is
++ full it will not accept further items. items from the pipe are
++ flushed into [destination] as long as it accepts items. The
++ destination could be another pipe.
++ *)
++ val create: capacity:int -> destination:'a bounded_sender -> 'a t
++
++ (** [is_empty t] returns whether the local buffer of [t] is empty. *)
++ val is_empty : _ t -> bool
++
++ (** [length t] the number of items in the internal buffer *)
++ val length: _ t -> int
++
++ (** [flush_pipe t] sends as many items from the local buffer as possible,
++ which could be none. *)
++ val flush_pipe: _ t -> unit
++
++ (** [push t item] tries to [flush_pipe] and then push [item]
++ into the pipe if its [capacity] allows.
++ Returns [None] if there is no more room
++ *)
++ val push : 'a t -> 'a bounded_sender
++end = struct
++ (* items are enqueued in [q], and then flushed to [connect_to] *)
++ type 'a t =
++ { q: 'a Queue.t
++ ; destination: 'a bounded_sender
++ ; capacity: int
++ }
++
++ let create ~capacity ~destination =
++ { q = Queue.create (); capacity; destination }
++
++ let rec flush_pipe t =
++ if not Queue.(is_empty t.q) then
++ let item = Queue.peek t.q in
++ match t.destination item with
++ | None -> () (* no room *)
++ | Some () ->
++ (* successfully sent item to next stage *)
++ let _ = Queue.pop t.q in
++ (* continue trying to send more items *)
++ flush_pipe t
++
++ let push t item =
++ (* first try to flush as many items from this pipe as possible to make room,
++ it is important to do this first to preserve the order of the items
++ *)
++ flush_pipe t;
++ if Queue.length t.q < t.capacity then begin
++ (* enqueue, instead of sending directly.
++ this ensures that [out] sees the items in the same order as we receive them
++ *)
++ Queue.push item t.q;
++ Some (flush_pipe t)
++ end else None
++
++ let is_empty t = Queue.is_empty t.q
++ let length t = Queue.length t.q
++end
++
+ type watch = {
+ con: t;
+ token: string;
+ path: string;
+ base: string;
+ is_relative: bool;
++ pending_watchevents: Xenbus.Xb.Packet.t BoundedPipe.t;
+ }
+
+ and t = {
+@@ -38,8 +110,36 @@ and t = {
+ anonid: int;
+ mutable stat_nb_ops: int;
+ mutable perm: Perms.Connection.t;
++ pending_source_watchevents: (watch * Xenbus.Xb.Packet.t) BoundedPipe.t
+ }
+
++module Watch = struct
++ module T = struct
++ type t = watch
++
++ let compare w1 w2 =
++ (* cannot compare watches from different connections *)
++ assert (w1.con == w2.con);
++ match String.compare w1.token w2.token with
++ | 0 -> String.compare w1.path w2.path
++ | n -> n
++ end
++ module Set = Set.Make(T)
++
++ let flush_events t =
++ BoundedPipe.flush_pipe t.pending_watchevents;
++ not (BoundedPipe.is_empty t.pending_watchevents)
++
++ let pending_watchevents t =
++ BoundedPipe.length t.pending_watchevents
++end
++
++let source_flush_watchevents t =
++ BoundedPipe.flush_pipe t.pending_source_watchevents
++
++let source_pending_watchevents t =
++ BoundedPipe.length t.pending_source_watchevents
++
+ let mark_as_bad con =
+ match con.dom with
+ |None -> ()
+@@ -67,7 +167,8 @@ let watch_create ~con ~path ~token = {
+ token = token;
+ path = path;
+ base = get_path con;
+- is_relative = path.[0] <> '/' && path.[0] <> '@'
++ is_relative = path.[0] <> '/' && path.[0] <> '@';
++ pending_watchevents = BoundedPipe.create ~capacity:!Define.maxwatchevents ~destination:(Xenbus.Xb.queue con.xb)
+ }
+
+ let get_con w = w.con
+@@ -93,6 +194,9 @@ let make_perm dom =
+ Perms.Connection.create ~perms:[Perms.READ; Perms.WRITE] domid
+
+ let create xbcon dom =
++ let destination (watch, pkt) =
++ BoundedPipe.push watch.pending_watchevents pkt
++ in
+ let id =
+ match dom with
+ | None -> let old = !anon_id_next in incr anon_id_next; old
+@@ -109,6 +213,16 @@ let create xbcon dom =
+ anonid = id;
+ stat_nb_ops = 0;
+ perm = make_perm dom;
++
++ (* the actual capacity will be lower, this is used as an overflow
++ buffer: anything that doesn't fit elsewhere gets put here, only
++ limited by the amount of watches that you can generate with a
++ single xenstore command (which is finite, although possibly very
++ large in theory for Dom0). Once the pipe here has any contents the
++ domain is blocked from sending more commands until it is empty
++ again though.
++ *)
++ pending_source_watchevents = BoundedPipe.create ~capacity:Sys.max_array_length ~destination
+ }
+ in
+ Logging.new_connection ~tid:Transaction.none ~con:(get_domstr con);
+@@ -127,11 +241,17 @@ let set_target con target_domid =
+
+ let is_backend_mmap con = Xenbus.Xb.is_mmap con.xb
+
+-let send_reply con tid rid ty data =
++let packet_of con tid rid ty data =
+ if (String.length data) > xenstore_payload_max && (is_backend_mmap con) then
+- Xenbus.Xb.queue con.xb (Xenbus.Xb.Packet.create tid rid Xenbus.Xb.Op.Error "E2BIG\000")
++ Xenbus.Xb.Packet.create tid rid Xenbus.Xb.Op.Error "E2BIG\000"
+ else
+- Xenbus.Xb.queue con.xb (Xenbus.Xb.Packet.create tid rid ty data)
++ Xenbus.Xb.Packet.create tid rid ty data
++
++let send_reply con tid rid ty data =
++ let result = Xenbus.Xb.queue con.xb (packet_of con tid rid ty data) in
++ (* should never happen: we only process an input packet when there is room for an output packet *)
++ (* and the limit for replies is different from the limit for watch events *)
++ assert (result <> None)
+
+ let send_error con tid rid err = send_reply con tid rid Xenbus.Xb.Op.Error (err ^ "\000")
+ let send_ack con tid rid ty = send_reply con tid rid ty "OK\000"
+@@ -181,11 +301,11 @@ let del_watch con path token =
+ apath, w
+
+ let del_watches con =
+- Hashtbl.clear con.watches;
++ Hashtbl.reset con.watches;
+ con.nb_watches <- 0
+
+ let del_transactions con =
+- Hashtbl.clear con.transactions
++ Hashtbl.reset con.transactions
+
+ let list_watches con =
+ let ll = Hashtbl.fold
+@@ -208,21 +328,29 @@ let lookup_watch_perm path = function
+ let lookup_watch_perms oldroot root path =
+ lookup_watch_perm path oldroot @ lookup_watch_perm path (Some root)
+
+-let fire_single_watch_unchecked watch =
++let fire_single_watch_unchecked source watch =
+ let data = Utils.join_by_null [watch.path; watch.token; ""] in
+- send_reply watch.con Transaction.none 0 Xenbus.Xb.Op.Watchevent data
++ let pkt = packet_of watch.con Transaction.none 0 Xenbus.Xb.Op.Watchevent data in
+
+-let fire_single_watch (oldroot, root) watch =
++ match BoundedPipe.push source.pending_source_watchevents (watch, pkt) with
++ | Some () -> () (* packet queued *)
++ | None ->
++ (* a well behaved Dom0 shouldn't be able to trigger this,
++ if it happens it is likely a Dom0 bug causing runaway memory usage
++ *)
++ failwith "watch event overflow, cannot happen"
++
++let fire_single_watch source (oldroot, root) watch =
+ let abspath = get_watch_path watch.con watch.path |> Store.Path.of_string in
+ let perms = lookup_watch_perms oldroot root abspath in
+ if Perms.can_fire_watch watch.con.perm perms then
+- fire_single_watch_unchecked watch
++ fire_single_watch_unchecked source watch
+ else
+ let perms = perms |> List.map (Perms.Node.to_string ~sep:" ") |> String.concat ", " in
+ let con = get_domstr watch.con in
+ Logging.watch_not_fired ~con perms (Store.Path.to_string abspath)
+
+-let fire_watch roots watch path =
++let fire_watch source roots watch path =
+ let new_path =
+ if watch.is_relative && path.[0] = '/'
+ then begin
+@@ -232,7 +360,7 @@ let fire_watch roots watch path =
+ end else
+ path
+ in
+- fire_single_watch roots { watch with path = new_path }
++ fire_single_watch source roots { watch with path = new_path }
+
+ (* Search for a valid unused transaction id. *)
+ let rec valid_transaction_id con proposed_id =
+@@ -280,6 +408,7 @@ let do_input con = Xenbus.Xb.input con.xb
+ let has_partial_input con = Xenbus.Xb.has_partial_input con.xb
+ let has_more_input con = Xenbus.Xb.has_more_input con.xb
+
++let can_input con = Xenbus.Xb.can_input con.xb && BoundedPipe.is_empty con.pending_source_watchevents
+ let has_output con = Xenbus.Xb.has_output con.xb
+ let has_old_output con = Xenbus.Xb.has_old_output con.xb
+ let has_new_output con = Xenbus.Xb.has_new_output con.xb
+@@ -323,7 +452,7 @@ let prevents_live_update con = not (is_bad con)
+ && (has_extra_connection_data con || has_transaction_data con)
+
+ let has_more_work con =
+- has_more_input con || not (has_old_output con) && has_new_output con
++ (has_more_input con && can_input con) || not (has_old_output con) && has_new_output con
+
+ let incr_ops con = con.stat_nb_ops <- con.stat_nb_ops + 1
+
+diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml
+index 3c7429fe7f61..7d68c583b43a 100644
+--- a/tools/ocaml/xenstored/connections.ml
++++ b/tools/ocaml/xenstored/connections.ml
+@@ -22,22 +22,30 @@ type t = {
+ domains: (int, Connection.t) Hashtbl.t;
+ ports: (Xeneventchn.t, Connection.t) Hashtbl.t;
+ mutable watches: Connection.watch list Trie.t;
++ mutable has_pending_watchevents: Connection.Watch.Set.t
+ }
+
+ let create () = {
+ anonymous = Hashtbl.create 37;
+ domains = Hashtbl.create 37;
+ ports = Hashtbl.create 37;
+- watches = Trie.create ()
++ watches = Trie.create ();
++ has_pending_watchevents = Connection.Watch.Set.empty;
+ }
+
++let get_capacity () =
++ (* not multiplied by maxwatch on purpose: 2nd queue in watch itself! *)
++ { Xenbus.Xb.maxoutstanding = !Define.maxoutstanding; maxwatchevents = !Define.maxwatchevents }
++
+ let add_anonymous cons fd =
+- let xbcon = Xenbus.Xb.open_fd fd in
++ let capacity = get_capacity () in
++ let xbcon = Xenbus.Xb.open_fd fd ~capacity in
+ let con = Connection.create xbcon None in
+ Hashtbl.add cons.anonymous (Xenbus.Xb.get_fd xbcon) con
+
+ let add_domain cons dom =
+- let xbcon = Xenbus.Xb.open_mmap (Domain.get_interface dom) (fun () -> Domain.notify dom) in
++ let capacity = get_capacity () in
++ let xbcon = Xenbus.Xb.open_mmap ~capacity (Domain.get_interface dom) (fun () -> Domain.notify dom) in
+ let con = Connection.create xbcon (Some dom) in
+ Hashtbl.add cons.domains (Domain.get_id dom) con;
+ match Domain.get_port dom with
+@@ -48,7 +56,9 @@ let select ?(only_if = (fun _ -> true)) cons =
+ Hashtbl.fold (fun _ con (ins, outs) ->
+ if (only_if con) then (
+ let fd = Connection.get_fd con in
+- (fd :: ins, if Connection.has_output con then fd :: outs else outs)
++ let in_fds = if Connection.can_input con then fd :: ins else ins in
++ let out_fds = if Connection.has_output con then fd :: outs else outs in
++ in_fds, out_fds
+ ) else (ins, outs)
+ )
+ cons.anonymous ([], [])
+@@ -67,10 +77,17 @@ let del_watches_of_con con watches =
+ | [] -> None
+ | ws -> Some ws
+
++let del_watches cons con =
++ Connection.del_watches con;
++ cons.watches <- Trie.map (del_watches_of_con con) cons.watches;
++ cons.has_pending_watchevents <-
++ cons.has_pending_watchevents |> Connection.Watch.Set.filter @@ fun w ->
++ Connection.get_con w != con
++
+ let del_anonymous cons con =
+ try
+ Hashtbl.remove cons.anonymous (Connection.get_fd con);
+- cons.watches <- Trie.map (del_watches_of_con con) cons.watches;
++ del_watches cons con;
+ Connection.close con
+ with exn ->
+ debug "del anonymous %s" (Printexc.to_string exn)
+@@ -85,7 +102,7 @@ let del_domain cons id =
+ | Some p -> Hashtbl.remove cons.ports p
+ | None -> ())
+ | None -> ());
+- cons.watches <- Trie.map (del_watches_of_con con) cons.watches;
++ del_watches cons con;
+ Connection.close con
+ with exn ->
+ debug "del domain %u: %s" id (Printexc.to_string exn)
+@@ -136,31 +153,33 @@ let del_watch cons con path token =
+ cons.watches <- Trie.set cons.watches key watches;
+ watch
+
+-let del_watches cons con =
+- Connection.del_watches con;
+- cons.watches <- Trie.map (del_watches_of_con con) cons.watches
+-
+ (* path is absolute *)
+-let fire_watches ?oldroot root cons path recurse =
++let fire_watches ?oldroot source root cons path recurse =
+ let key = key_of_path path in
+ let path = Store.Path.to_string path in
+ let roots = oldroot, root in
+ let fire_watch _ = function
+ | None -> ()
+- | Some watches -> List.iter (fun w -> Connection.fire_watch roots w path) watches
++ | Some watches -> List.iter (fun w -> Connection.fire_watch source roots w path) watches
+ in
+ let fire_rec _x = function
+ | None -> ()
+ | Some watches ->
+- List.iter (Connection.fire_single_watch roots) watches
++ List.iter (Connection.fire_single_watch source roots) watches
+ in
+ Trie.iter_path fire_watch cons.watches key;
+ if recurse then
+ Trie.iter fire_rec (Trie.sub cons.watches key)
+
++let send_watchevents cons con =
++ cons.has_pending_watchevents <-
++ cons.has_pending_watchevents |> Connection.Watch.Set.filter Connection.Watch.flush_events;
++ Connection.source_flush_watchevents con
++
+ let fire_spec_watches root cons specpath =
++ let source = find_domain cons 0 in
+ iter cons (fun con ->
+- List.iter (Connection.fire_single_watch (None, root)) (Connection.get_watches con specpath))
++ List.iter (Connection.fire_single_watch source (None, root)) (Connection.get_watches con specpath))
+
+ let set_target cons domain target_domain =
+ let con = find_domain cons domain in
+@@ -197,6 +216,16 @@ let debug cons =
+ let domains = Hashtbl.fold (fun _ con accu -> Connection.debug con :: accu) cons.domains [] in
+ String.concat "" (domains @ anonymous)
+
++let debug_watchevents cons con =
++ (* == (physical equality)
++ has to be used here because w.con.xb.backend might contain a [unit->unit] value causing regular
++ comparison to fail due to having a 'functional value' which cannot be compared.
++ *)
++ let s = cons.has_pending_watchevents |> Connection.Watch.Set.filter (fun w -> w.con == con) in
++ let pending = s |> Connection.Watch.Set.elements
++ |> List.map (fun w -> Connection.Watch.pending_watchevents w) |> List.fold_left (+) 0 in
++ Printf.sprintf "Watches with pending events: %d, pending events total: %d" (Connection.Watch.Set.cardinal s) pending
++
+ let filter ~f cons =
+ let fold _ v acc = if f v then v :: acc else acc in
+ []
+diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml
+index ba63a8147e09..327b6d795ec7 100644
+--- a/tools/ocaml/xenstored/define.ml
++++ b/tools/ocaml/xenstored/define.ml
+@@ -24,6 +24,13 @@ let default_config_dir = Paths.xen_config_dir
+ let maxwatch = ref (100)
+ let maxtransaction = ref (10)
+ let maxrequests = ref (1024) (* maximum requests per transaction *)
++let maxoutstanding = ref (1024) (* maximum outstanding requests, i.e. in-flight requests / domain *)
++let maxwatchevents = ref (1024)
++(*
++ maximum outstanding watch events per watch,
++ recommended >= maxoutstanding to avoid blocking backend transactions due to
++ malicious frontends
++ *)
+
+ let gc_max_overhead = ref 120 (* 120% see comment in xenstored.ml *)
+ let conflict_burst_limit = ref 5.0
+diff --git a/tools/ocaml/xenstored/oxenstored.conf.in b/tools/ocaml/xenstored/oxenstored.conf.in
+index 4ae48e42d47d..9d034e744b4b 100644
+--- a/tools/ocaml/xenstored/oxenstored.conf.in
++++ b/tools/ocaml/xenstored/oxenstored.conf.in
+@@ -62,6 +62,8 @@ quota-maxwatch = 100
+ quota-transaction = 10
+ quota-maxrequests = 1024
+ quota-path-max = 1024
++quota-maxoutstanding = 1024
++quota-maxwatchevents = 1024
+
+ # Activate filed base backend
+ persistent = false
+diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
+index cbf708213796..ce39ce28b5f3 100644
+--- a/tools/ocaml/xenstored/process.ml
++++ b/tools/ocaml/xenstored/process.ml
+@@ -57,7 +57,7 @@ let split_one_path data con =
+ | path :: "" :: [] -> Store.Path.create path (Connection.get_path con)
+ | _ -> raise Invalid_Cmd_Args
+
+-let process_watch t cons =
++let process_watch source t cons =
+ let oldroot = t.Transaction.oldroot in
+ let newroot = Store.get_root t.store in
+ let ops = Transaction.get_paths t |> List.rev in
+@@ -67,8 +67,9 @@ let process_watch t cons =
+ | Xenbus.Xb.Op.Rm -> true, None, oldroot
+ | Xenbus.Xb.Op.Setperms -> false, Some oldroot, newroot
+ | _ -> raise (Failure "huh ?") in
+- Connections.fire_watches ?oldroot root cons (snd op) recurse in
+- List.iter (fun op -> do_op_watch op cons) ops
++ Connections.fire_watches ?oldroot source root cons (snd op) recurse in
++ List.iter (fun op -> do_op_watch op cons) ops;
++ Connections.send_watchevents cons source
+
+ let create_implicit_path t perm path =
+ let dirname = Store.Path.get_parent path in
+@@ -234,6 +235,20 @@ let do_debug con t _domains cons data =
+ | "watches" :: _ ->
+ let watches = Connections.debug cons in
+ Some (watches ^ "\000")
++ | "xenbus" :: domid :: _ ->
++ let domid = int_of_string domid in
++ let con = Connections.find_domain cons domid in
++ let s = Printf.sprintf "xenbus: %s; overflow queue length: %d, can_input: %b, has_more_input: %b, has_old_output: %b, has_new_output: %b, has_more_work: %b. pending: %s"
++ (Xenbus.Xb.debug con.xb)
++ (Connection.source_pending_watchevents con)
++ (Connection.can_input con)
++ (Connection.has_more_input con)
++ (Connection.has_old_output con)
++ (Connection.has_new_output con)
++ (Connection.has_more_work con)
++ (Connections.debug_watchevents cons con)
++ in
++ Some s
+ | "mfn" :: domid :: _ ->
+ let domid = int_of_string domid in
+ let con = Connections.find_domain cons domid in
+@@ -342,7 +357,7 @@ let reply_ack fct con t doms cons data =
+ fct con t doms cons data;
+ Packet.Ack (fun () ->
+ if Transaction.get_id t = Transaction.none then
+- process_watch t cons
++ process_watch con t cons
+ )
+
+ let reply_data fct con t doms cons data =
+@@ -501,7 +516,7 @@ let do_watch con t _domains cons data =
+ Packet.Ack (fun () ->
+ (* xenstore.txt says this watch is fired immediately,
+ implying even if path doesn't exist or is unreadable *)
+- Connection.fire_single_watch_unchecked watch)
++ Connection.fire_single_watch_unchecked con watch)
+
+ let do_unwatch con _t _domains cons data =
+ let (node, token) =
+@@ -532,7 +547,7 @@ let do_transaction_end con t domains cons data =
+ if not success then
+ raise Transaction_again;
+ if commit then begin
+- process_watch t cons;
++ process_watch con t cons;
+ match t.Transaction.ty with
+ | Transaction.No ->
+ () (* no need to record anything *)
+@@ -700,7 +715,8 @@ let process_packet ~store ~cons ~doms ~con ~req =
+ let do_input store cons doms con =
+ let newpacket =
+ try
+- Connection.do_input con
++ if Connection.can_input con then Connection.do_input con
++ else None
+ with Xenbus.Xb.Reconnect ->
+ info "%s requests a reconnect" (Connection.get_domstr con);
+ History.reconnect con;
+@@ -728,6 +744,7 @@ let do_input store cons doms con =
+ Connection.incr_ops con
+
+ let do_output _store _cons _doms con =
++ Connection.source_flush_watchevents con;
+ if Connection.has_output con then (
+ if Connection.has_new_output con then (
+ let packet = Connection.peek_output con in
+diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
+index 3b57ad016dfb..c799e20f1145 100644
+--- a/tools/ocaml/xenstored/xenstored.ml
++++ b/tools/ocaml/xenstored/xenstored.ml
+@@ -103,6 +103,8 @@ let parse_config filename =
+ ("quota-maxentity", Config.Set_int Quota.maxent);
+ ("quota-maxsize", Config.Set_int Quota.maxsize);
+ ("quota-maxrequests", Config.Set_int Define.maxrequests);
++ ("quota-maxoutstanding", Config.Set_int Define.maxoutstanding);
++ ("quota-maxwatchevents", Config.Set_int Define.maxwatchevents);
+ ("quota-path-max", Config.Set_int Define.path_max);
+ ("gc-max-overhead", Config.Set_int Define.gc_max_overhead);
+ ("test-eagain", Config.Set_bool Transaction.test_eagain);
+--
+2.37.4
+
diff --git a/0108-SUPPORT.md-clarify-support-of-untrusted-driver-domai.patch b/0108-SUPPORT.md-clarify-support-of-untrusted-driver-domai.patch
new file mode 100644
index 0000000..82773df
--- /dev/null
+++ b/0108-SUPPORT.md-clarify-support-of-untrusted-driver-domai.patch
@@ -0,0 +1,55 @@
+From 26faa6b55881445c25e7e83613c2354090fdff18 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 29 Sep 2022 13:07:35 +0200
+Subject: [PATCH 108/126] SUPPORT.md: clarify support of untrusted driver
+ domains with oxenstored
+
+Add a support statement for the scope of support regarding different
+Xenstore variants. Especially oxenstored does not (yet) have security
+support of untrusted driver domains, as those might drive oxenstored
+out of memory by creating lots of watch events for the guests they are
+servicing.
+
+Add a statement regarding Live Update support of oxenstored.
+
+This is part of XSA-326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: George Dunlap <george.dunlap@citrix.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit c7bc20d8d123851a468402bbfc9e3330efff21ec)
+---
+ SUPPORT.md | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/SUPPORT.md b/SUPPORT.md
+index 0fb262f81f40..48fb462221cf 100644
+--- a/SUPPORT.md
++++ b/SUPPORT.md
+@@ -179,13 +179,18 @@ Support for running qemu-xen device model in a linux stubdomain.
+
+ Status: Tech Preview
+
+-## Liveupdate of C xenstored daemon
++## Xenstore
+
+- Status: Tech Preview
++### C xenstored daemon
+
+-## Liveupdate of OCaml xenstored daemon
++ Status: Supported
++ Status, Liveupdate: Tech Preview
+
+- Status: Tech Preview
++### OCaml xenstored daemon
++
++ Status: Supported
++ Status, untrusted driver domains: Supported, not security supported
++ Status, Liveupdate: Not functional
+
+ ## Toolstack/3rd party
+
+--
+2.37.4
+
diff --git a/0109-tools-xenstore-don-t-use-conn-in-as-context-for-temp.patch b/0109-tools-xenstore-don-t-use-conn-in-as-context-for-temp.patch
new file mode 100644
index 0000000..c9a2e6e
--- /dev/null
+++ b/0109-tools-xenstore-don-t-use-conn-in-as-context-for-temp.patch
@@ -0,0 +1,716 @@
+From 607e186fe094f8d1c78572cd3b1f7a43730203c1 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:10 +0200
+Subject: [PATCH 109/126] tools/xenstore: don't use conn->in as context for
+ temporary allocations
+
+Using the struct buffered data pointer of the current processed request
+for temporary data allocations has a major drawback: the used area (and
+with that the temporary data) is freed only after the response of the
+request has been written to the ring page or has been read via the
+socket. This can happen much later in case a guest isn't reading its
+responses fast enough.
+
+As the temporary data can be safely freed after creating the response,
+add a temporary context for that purpose and use that for allocating
+the temporary memory, as it was already the case before commit
+cc0612464896 ("xenstore: add small default data buffer to internal
+struct").
+
+Some sub-functions need to gain the "const" attribute for the talloc
+context.
+
+This is XSA-416 / CVE-2022-42319.
+
+Fixes: cc0612464896 ("xenstore: add small default data buffer to internal struct")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 2a587de219cc0765330fbf9fac6827bfaf29e29b)
+---
+ tools/xenstore/xenstored_control.c | 31 ++++++-----
+ tools/xenstore/xenstored_control.h | 3 +-
+ tools/xenstore/xenstored_core.c | 76 ++++++++++++++++----------
+ tools/xenstore/xenstored_domain.c | 29 ++++++----
+ tools/xenstore/xenstored_domain.h | 21 ++++---
+ tools/xenstore/xenstored_transaction.c | 14 +++--
+ tools/xenstore/xenstored_transaction.h | 6 +-
+ tools/xenstore/xenstored_watch.c | 9 +--
+ tools/xenstore/xenstored_watch.h | 6 +-
+ 9 files changed, 118 insertions(+), 77 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_control.c b/tools/xenstore/xenstored_control.c
+index 980279fa53ff..95a60bf57858 100644
+--- a/tools/xenstore/xenstored_control.c
++++ b/tools/xenstore/xenstored_control.c
+@@ -107,7 +107,7 @@ static const char *lu_begin(struct connection *conn)
+
+ struct cmd_s {
+ char *cmd;
+- int (*func)(void *, struct connection *, char **, int);
++ int (*func)(const void *, struct connection *, char **, int);
+ char *pars;
+ /*
+ * max_pars can be used to limit the size of the parameter vector,
+@@ -119,7 +119,7 @@ struct cmd_s {
+ unsigned int max_pars;
+ };
+
+-static int do_control_check(void *ctx, struct connection *conn,
++static int do_control_check(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ if (num)
+@@ -131,7 +131,7 @@ static int do_control_check(void *ctx, struct connection *conn,
+ return 0;
+ }
+
+-static int do_control_log(void *ctx, struct connection *conn,
++static int do_control_log(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ if (num != 1)
+@@ -233,7 +233,7 @@ static int quota_get(const void *ctx, struct connection *conn,
+ return domain_get_quota(ctx, conn, atoi(vec[0]));
+ }
+
+-static int do_control_quota(void *ctx, struct connection *conn,
++static int do_control_quota(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ if (num == 0)
+@@ -245,7 +245,7 @@ static int do_control_quota(void *ctx, struct connection *conn,
+ return quota_get(ctx, conn, vec, num);
+ }
+
+-static int do_control_quota_s(void *ctx, struct connection *conn,
++static int do_control_quota_s(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ if (num == 0)
+@@ -258,7 +258,7 @@ static int do_control_quota_s(void *ctx, struct connection *conn,
+ }
+
+ #ifdef __MINIOS__
+-static int do_control_memreport(void *ctx, struct connection *conn,
++static int do_control_memreport(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ if (num)
+@@ -270,7 +270,7 @@ static int do_control_memreport(void *ctx, struct connection *conn,
+ return 0;
+ }
+ #else
+-static int do_control_logfile(void *ctx, struct connection *conn,
++static int do_control_logfile(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ if (num != 1)
+@@ -285,7 +285,7 @@ static int do_control_logfile(void *ctx, struct connection *conn,
+ return 0;
+ }
+
+-static int do_control_memreport(void *ctx, struct connection *conn,
++static int do_control_memreport(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ FILE *fp;
+@@ -325,7 +325,7 @@ static int do_control_memreport(void *ctx, struct connection *conn,
+ }
+ #endif
+
+-static int do_control_print(void *ctx, struct connection *conn,
++static int do_control_print(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ if (num != 1)
+@@ -802,7 +802,7 @@ static const char *lu_start(const void *ctx, struct connection *conn,
+ return NULL;
+ }
+
+-static int do_control_lu(void *ctx, struct connection *conn,
++static int do_control_lu(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ const char *ret = NULL;
+@@ -852,7 +852,7 @@ static int do_control_lu(void *ctx, struct connection *conn,
+ }
+ #endif
+
+-static int do_control_help(void *, struct connection *, char **, int);
++static int do_control_help(const void *, struct connection *, char **, int);
+
+ static struct cmd_s cmds[] = {
+ { "check", do_control_check, "" },
+@@ -891,7 +891,7 @@ static struct cmd_s cmds[] = {
+ { "help", do_control_help, "" },
+ };
+
+-static int do_control_help(void *ctx, struct connection *conn,
++static int do_control_help(const void *ctx, struct connection *conn,
+ char **vec, int num)
+ {
+ int cmd, len = 0;
+@@ -927,7 +927,8 @@ static int do_control_help(void *ctx, struct connection *conn,
+ return 0;
+ }
+
+-int do_control(struct connection *conn, struct buffered_data *in)
++int do_control(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ unsigned int cmd, num, off;
+ char **vec = NULL;
+@@ -947,11 +948,11 @@ int do_control(struct connection *conn, struct buffered_data *in)
+ num = xs_count_strings(in->buffer, in->used);
+ if (cmds[cmd].max_pars)
+ num = min(num, cmds[cmd].max_pars);
+- vec = talloc_array(in, char *, num);
++ vec = talloc_array(ctx, char *, num);
+ if (!vec)
+ return ENOMEM;
+ if (get_strings(in, vec, num) < num)
+ return EIO;
+
+- return cmds[cmd].func(in, conn, vec + 1, num - 1);
++ return cmds[cmd].func(ctx, conn, vec + 1, num - 1);
+ }
+diff --git a/tools/xenstore/xenstored_control.h b/tools/xenstore/xenstored_control.h
+index aac61f05908f..6430c3769361 100644
+--- a/tools/xenstore/xenstored_control.h
++++ b/tools/xenstore/xenstored_control.h
+@@ -16,5 +16,6 @@
+ along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+-int do_control(struct connection *conn, struct buffered_data *in);
++int do_control(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+ void lu_read_state(void);
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index f27d5c0101bc..806f24bbab8b 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1214,11 +1214,13 @@ static struct node *get_node_canonicalized(struct connection *conn,
+ return get_node(conn, ctx, *canonical_name, perm);
+ }
+
+-static int send_directory(struct connection *conn, struct buffered_data *in)
++static int send_directory(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct node *node;
+
+- node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
++ node = get_node_canonicalized(conn, ctx, onearg(in), NULL,
++ XS_PERM_READ);
+ if (!node)
+ return errno;
+
+@@ -1227,7 +1229,7 @@ static int send_directory(struct connection *conn, struct buffered_data *in)
+ return 0;
+ }
+
+-static int send_directory_part(struct connection *conn,
++static int send_directory_part(const void *ctx, struct connection *conn,
+ struct buffered_data *in)
+ {
+ unsigned int off, len, maxlen, genlen;
+@@ -1239,7 +1241,8 @@ static int send_directory_part(struct connection *conn,
+ return EINVAL;
+
+ /* First arg is node name. */
+- node = get_node_canonicalized(conn, in, in->buffer, NULL, XS_PERM_READ);
++ node = get_node_canonicalized(conn, ctx, in->buffer, NULL,
++ XS_PERM_READ);
+ if (!node)
+ return errno;
+
+@@ -1266,7 +1269,7 @@ static int send_directory_part(struct connection *conn,
+ break;
+ }
+
+- data = talloc_array(in, char, genlen + len + 1);
++ data = talloc_array(ctx, char, genlen + len + 1);
+ if (!data)
+ return ENOMEM;
+
+@@ -1282,11 +1285,13 @@ static int send_directory_part(struct connection *conn,
+ return 0;
+ }
+
+-static int do_read(struct connection *conn, struct buffered_data *in)
++static int do_read(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct node *node;
+
+- node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
++ node = get_node_canonicalized(conn, ctx, onearg(in), NULL,
++ XS_PERM_READ);
+ if (!node)
+ return errno;
+
+@@ -1476,7 +1481,8 @@ err:
+ }
+
+ /* path, data... */
+-static int do_write(struct connection *conn, struct buffered_data *in)
++static int do_write(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ unsigned int offset, datalen;
+ struct node *node;
+@@ -1490,12 +1496,12 @@ static int do_write(struct connection *conn, struct buffered_data *in)
+ offset = strlen(vec[0]) + 1;
+ datalen = in->used - offset;
+
+- node = get_node_canonicalized(conn, in, vec[0], &name, XS_PERM_WRITE);
++ node = get_node_canonicalized(conn, ctx, vec[0], &name, XS_PERM_WRITE);
+ if (!node) {
+ /* No permissions, invalid input? */
+ if (errno != ENOENT)
+ return errno;
+- node = create_node(conn, in, name, in->buffer + offset,
++ node = create_node(conn, ctx, name, in->buffer + offset,
+ datalen);
+ if (!node)
+ return errno;
+@@ -1506,18 +1512,19 @@ static int do_write(struct connection *conn, struct buffered_data *in)
+ return errno;
+ }
+
+- fire_watches(conn, in, name, node, false, NULL);
++ fire_watches(conn, ctx, name, node, false, NULL);
+ send_ack(conn, XS_WRITE);
+
+ return 0;
+ }
+
+-static int do_mkdir(struct connection *conn, struct buffered_data *in)
++static int do_mkdir(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct node *node;
+ char *name;
+
+- node = get_node_canonicalized(conn, in, onearg(in), &name,
++ node = get_node_canonicalized(conn, ctx, onearg(in), &name,
+ XS_PERM_WRITE);
+
+ /* If it already exists, fine. */
+@@ -1527,10 +1534,10 @@ static int do_mkdir(struct connection *conn, struct buffered_data *in)
+ return errno;
+ if (!name)
+ return ENOMEM;
+- node = create_node(conn, in, name, NULL, 0);
++ node = create_node(conn, ctx, name, NULL, 0);
+ if (!node)
+ return errno;
+- fire_watches(conn, in, name, node, false, NULL);
++ fire_watches(conn, ctx, name, node, false, NULL);
+ }
+ send_ack(conn, XS_MKDIR);
+
+@@ -1628,24 +1635,25 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node,
+ }
+
+
+-static int do_rm(struct connection *conn, struct buffered_data *in)
++static int do_rm(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct node *node;
+ int ret;
+ char *name;
+ char *parentname;
+
+- node = get_node_canonicalized(conn, in, onearg(in), &name,
++ node = get_node_canonicalized(conn, ctx, onearg(in), &name,
+ XS_PERM_WRITE);
+ if (!node) {
+ /* Didn't exist already? Fine, if parent exists. */
+ if (errno == ENOENT) {
+ if (!name)
+ return ENOMEM;
+- parentname = get_parent(in, name);
++ parentname = get_parent(ctx, name);
+ if (!parentname)
+ return errno;
+- node = read_node(conn, in, parentname);
++ node = read_node(conn, ctx, parentname);
+ if (node) {
+ send_ack(conn, XS_RM);
+ return 0;
+@@ -1660,7 +1668,7 @@ static int do_rm(struct connection *conn, struct buffered_data *in)
+ if (streq(name, "/"))
+ return EINVAL;
+
+- ret = _rm(conn, in, node, name);
++ ret = _rm(conn, ctx, node, name);
+ if (ret)
+ return ret;
+
+@@ -1670,13 +1678,15 @@ static int do_rm(struct connection *conn, struct buffered_data *in)
+ }
+
+
+-static int do_get_perms(struct connection *conn, struct buffered_data *in)
++static int do_get_perms(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct node *node;
+ char *strings;
+ unsigned int len;
+
+- node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
++ node = get_node_canonicalized(conn, ctx, onearg(in), NULL,
++ XS_PERM_READ);
+ if (!node)
+ return errno;
+
+@@ -1689,7 +1699,8 @@ static int do_get_perms(struct connection *conn, struct buffered_data *in)
+ return 0;
+ }
+
+-static int do_set_perms(struct connection *conn, struct buffered_data *in)
++static int do_set_perms(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct node_perms perms, old_perms;
+ char *name, *permstr;
+@@ -1706,7 +1717,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
+
+ permstr = in->buffer + strlen(in->buffer) + 1;
+
+- perms.p = talloc_array(in, struct xs_permissions, perms.num);
++ perms.p = talloc_array(ctx, struct xs_permissions, perms.num);
+ if (!perms.p)
+ return ENOMEM;
+ if (!xs_strings_to_perms(perms.p, perms.num, permstr))
+@@ -1721,7 +1732,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
+ }
+
+ /* We must own node to do this (tools can do this too). */
+- node = get_node_canonicalized(conn, in, in->buffer, &name,
++ node = get_node_canonicalized(conn, ctx, in->buffer, &name,
+ XS_PERM_WRITE | XS_PERM_OWNER);
+ if (!node)
+ return errno;
+@@ -1756,7 +1767,7 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
+ return errno;
+ }
+
+- fire_watches(conn, in, name, node, false, &old_perms);
++ fire_watches(conn, ctx, name, node, false, &old_perms);
+ send_ack(conn, XS_SET_PERMS);
+
+ return 0;
+@@ -1764,7 +1775,8 @@ static int do_set_perms(struct connection *conn, struct buffered_data *in)
+
+ static struct {
+ const char *str;
+- int (*func)(struct connection *conn, struct buffered_data *in);
++ int (*func)(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+ unsigned int flags;
+ #define XS_FLAG_NOTID (1U << 0) /* Ignore transaction id. */
+ #define XS_FLAG_PRIV (1U << 1) /* Privileged domain only. */
+@@ -1840,6 +1852,7 @@ static void process_message(struct connection *conn, struct buffered_data *in)
+ struct transaction *trans;
+ enum xsd_sockmsg_type type = in->hdr.msg.type;
+ int ret;
++ void *ctx;
+
+ if ((unsigned int)type >= XS_TYPE_COUNT || !wire_funcs[type].func) {
+ eprintf("Client unknown operation %i", type);
+@@ -1860,10 +1873,17 @@ static void process_message(struct connection *conn, struct buffered_data *in)
+ return;
+ }
+
++ ctx = talloc_new(NULL);
++ if (!ctx) {
++ send_error(conn, ENOMEM);
++ return;
++ }
++
+ assert(conn->transaction == NULL);
+ conn->transaction = trans;
+
+- ret = wire_funcs[type].func(conn, in);
++ ret = wire_funcs[type].func(ctx, conn, in);
++ talloc_free(ctx);
+ if (ret)
+ send_error(conn, ret);
+
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index 3d5142581332..d262f4e9dbdf 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -336,7 +336,7 @@ bool domain_can_write(struct connection *conn)
+ return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE);
+ }
+
+-static char *talloc_domain_path(void *context, unsigned int domid)
++static char *talloc_domain_path(const void *context, unsigned int domid)
+ {
+ return talloc_asprintf(context, "/local/domain/%u", domid);
+ }
+@@ -540,7 +540,8 @@ static struct domain *introduce_domain(const void *ctx,
+ }
+
+ /* domid, gfn, evtchn, path */
+-int do_introduce(struct connection *conn, struct buffered_data *in)
++int do_introduce(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct domain *domain;
+ char *vec[3];
+@@ -558,7 +559,7 @@ int do_introduce(struct connection *conn, struct buffered_data *in)
+ if (port <= 0)
+ return EINVAL;
+
+- domain = introduce_domain(in, domid, port, false);
++ domain = introduce_domain(ctx, domid, port, false);
+ if (!domain)
+ return errno;
+
+@@ -581,7 +582,8 @@ static struct domain *find_connected_domain(unsigned int domid)
+ return domain;
+ }
+
+-int do_set_target(struct connection *conn, struct buffered_data *in)
++int do_set_target(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ char *vec[2];
+ unsigned int domid, tdomid;
+@@ -625,7 +627,8 @@ static struct domain *onearg_domain(struct connection *conn,
+ }
+
+ /* domid */
+-int do_release(struct connection *conn, struct buffered_data *in)
++int do_release(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct domain *domain;
+
+@@ -640,7 +643,8 @@ int do_release(struct connection *conn, struct buffered_data *in)
+ return 0;
+ }
+
+-int do_resume(struct connection *conn, struct buffered_data *in)
++int do_resume(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct domain *domain;
+
+@@ -655,7 +659,8 @@ int do_resume(struct connection *conn, struct buffered_data *in)
+ return 0;
+ }
+
+-int do_get_domain_path(struct connection *conn, struct buffered_data *in)
++int do_get_domain_path(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ char *path;
+ const char *domid_str = onearg(in);
+@@ -663,18 +668,17 @@ int do_get_domain_path(struct connection *conn, struct buffered_data *in)
+ if (!domid_str)
+ return EINVAL;
+
+- path = talloc_domain_path(conn, atoi(domid_str));
++ path = talloc_domain_path(ctx, atoi(domid_str));
+ if (!path)
+ return errno;
+
+ send_reply(conn, XS_GET_DOMAIN_PATH, path, strlen(path) + 1);
+
+- talloc_free(path);
+-
+ return 0;
+ }
+
+-int do_is_domain_introduced(struct connection *conn, struct buffered_data *in)
++int do_is_domain_introduced(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ int result;
+ unsigned int domid;
+@@ -695,7 +699,8 @@ int do_is_domain_introduced(struct connection *conn, struct buffered_data *in)
+ }
+
+ /* Allow guest to reset all watches */
+-int do_reset_watches(struct connection *conn, struct buffered_data *in)
++int do_reset_watches(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ conn_delete_all_watches(conn);
+ conn_delete_all_transactions(conn);
+diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
+index 0f883936f413..da513443cd46 100644
+--- a/tools/xenstore/xenstored_domain.h
++++ b/tools/xenstore/xenstored_domain.h
+@@ -24,25 +24,32 @@ void handle_event(void);
+ void check_domains(bool restore);
+
+ /* domid, mfn, eventchn, path */
+-int do_introduce(struct connection *conn, struct buffered_data *in);
++int do_introduce(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ /* domid */
+-int do_is_domain_introduced(struct connection *conn, struct buffered_data *in);
++int do_is_domain_introduced(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ /* domid */
+-int do_release(struct connection *conn, struct buffered_data *in);
++int do_release(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ /* domid */
+-int do_resume(struct connection *conn, struct buffered_data *in);
++int do_resume(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ /* domid, target */
+-int do_set_target(struct connection *conn, struct buffered_data *in);
++int do_set_target(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ /* domid */
+-int do_get_domain_path(struct connection *conn, struct buffered_data *in);
++int do_get_domain_path(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ /* Allow guest to reset all watches */
+-int do_reset_watches(struct connection *conn, struct buffered_data *in);
++int do_reset_watches(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ void domain_init(int evtfd);
+ void dom0_init(void);
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index 28774813de83..3e3eb47326cc 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -481,7 +481,8 @@ struct transaction *transaction_lookup(struct connection *conn, uint32_t id)
+ return ERR_PTR(-ENOENT);
+ }
+
+-int do_transaction_start(struct connection *conn, struct buffered_data *in)
++int do_transaction_start(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct transaction *trans, *exists;
+ char id_str[20];
+@@ -494,8 +495,8 @@ int do_transaction_start(struct connection *conn, struct buffered_data *in)
+ conn->transaction_started > quota_max_transaction)
+ return ENOSPC;
+
+- /* Attach transaction to input for autofree until it's complete */
+- trans = talloc_zero(in, struct transaction);
++ /* Attach transaction to ctx for autofree until it's complete */
++ trans = talloc_zero(ctx, struct transaction);
+ if (!trans)
+ return ENOMEM;
+
+@@ -544,7 +545,8 @@ static int transaction_fix_domains(struct transaction *trans, bool update)
+ return 0;
+ }
+
+-int do_transaction_end(struct connection *conn, struct buffered_data *in)
++int do_transaction_end(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ const char *arg = onearg(in);
+ struct transaction *trans;
+@@ -562,8 +564,8 @@ int do_transaction_end(struct connection *conn, struct buffered_data *in)
+ if (!conn->transaction_started)
+ conn->ta_start_time = 0;
+
+- /* Attach transaction to in for auto-cleanup */
+- talloc_steal(in, trans);
++ /* Attach transaction to ctx for auto-cleanup */
++ talloc_steal(ctx, trans);
+
+ if (streq(arg, "T")) {
+ if (trans->fail)
+diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h
+index e3cbd6b23095..39d7f81c5127 100644
+--- a/tools/xenstore/xenstored_transaction.h
++++ b/tools/xenstore/xenstored_transaction.h
+@@ -29,8 +29,10 @@ struct transaction;
+
+ extern uint64_t generation;
+
+-int do_transaction_start(struct connection *conn, struct buffered_data *node);
+-int do_transaction_end(struct connection *conn, struct buffered_data *in);
++int do_transaction_start(const void *ctx, struct connection *conn,
++ struct buffered_data *node);
++int do_transaction_end(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ struct transaction *transaction_lookup(struct connection *conn, uint32_t id);
+
+diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
+index 4970e9f1a1b9..854bbcad6e45 100644
+--- a/tools/xenstore/xenstored_watch.c
++++ b/tools/xenstore/xenstored_watch.c
+@@ -243,7 +243,7 @@ static struct watch *add_watch(struct connection *conn, char *path, char *token,
+ return NULL;
+ }
+
+-int do_watch(struct connection *conn, struct buffered_data *in)
++int do_watch(const void *ctx, struct connection *conn, struct buffered_data *in)
+ {
+ struct watch *watch;
+ char *vec[2];
+@@ -252,7 +252,7 @@ int do_watch(struct connection *conn, struct buffered_data *in)
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec))
+ return EINVAL;
+
+- errno = check_watch_path(conn, in, &(vec[0]), &relative);
++ errno = check_watch_path(conn, ctx, &(vec[0]), &relative);
+ if (errno)
+ return errno;
+
+@@ -283,7 +283,8 @@ int do_watch(struct connection *conn, struct buffered_data *in)
+ return 0;
+ }
+
+-int do_unwatch(struct connection *conn, struct buffered_data *in)
++int do_unwatch(const void *ctx, struct connection *conn,
++ struct buffered_data *in)
+ {
+ struct watch *watch;
+ char *node, *vec[2];
+@@ -291,7 +292,7 @@ int do_unwatch(struct connection *conn, struct buffered_data *in)
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec))
+ return EINVAL;
+
+- node = canonicalize(conn, in, vec[0]);
++ node = canonicalize(conn, ctx, vec[0]);
+ if (!node)
+ return ENOMEM;
+ list_for_each_entry(watch, &conn->watches, list) {
+diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h
+index 0e693f0839cd..091890edca96 100644
+--- a/tools/xenstore/xenstored_watch.h
++++ b/tools/xenstore/xenstored_watch.h
+@@ -21,8 +21,10 @@
+
+ #include "xenstored_core.h"
+
+-int do_watch(struct connection *conn, struct buffered_data *in);
+-int do_unwatch(struct connection *conn, struct buffered_data *in);
++int do_watch(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
++int do_unwatch(const void *ctx, struct connection *conn,
++ struct buffered_data *in);
+
+ /* Fire all watches: !exact means all the children are affected (ie. rm). */
+ void fire_watches(struct connection *conn, const void *tmp, const char *name,
+--
+2.37.4
+
diff --git a/0110-tools-xenstore-fix-checking-node-permissions.patch b/0110-tools-xenstore-fix-checking-node-permissions.patch
new file mode 100644
index 0000000..77345f7
--- /dev/null
+++ b/0110-tools-xenstore-fix-checking-node-permissions.patch
@@ -0,0 +1,143 @@
+From 8012324cb9e676bd342a5adfda1700525f195e2e Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:10 +0200
+Subject: [PATCH 110/126] tools/xenstore: fix checking node permissions
+
+Today chk_domain_generation() is being used to check whether a node
+permission entry is still valid or whether it is referring to a domain
+no longer existing. This is done by comparing the node's and the
+domain's generation count.
+
+In case no struct domain is existing for a checked domain, but the
+domain itself is valid, chk_domain_generation() assumes it is being
+called due to the first node created for a new domain and it will
+return success.
+
+This might be wrong in case the checked permission is related to an
+old domain, which has just been replaced with a new domain using the
+same domid.
+
+Fix that by letting chk_domain_generation() fail in case a struct
+domain isn't found. In order to cover the case of the first node for
+a new domain try to allocate the needed struct domain explicitly when
+processing the related SET_PERMS command. In case a referenced domain
+isn't existing, flag the related permission to be ignored right away.
+
+This is XSA-417 / CVE-2022-42320.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit ab128218225d3542596ca3a02aee80d55494bef8)
+---
+ tools/xenstore/xenstored_core.c | 5 +++++
+ tools/xenstore/xenstored_domain.c | 37 +++++++++++++++++++++----------
+ tools/xenstore/xenstored_domain.h | 1 +
+ 3 files changed, 31 insertions(+), 12 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 806f24bbab8b..8aecd425f274 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1723,6 +1723,11 @@ static int do_set_perms(const void *ctx, struct connection *conn,
+ if (!xs_strings_to_perms(perms.p, perms.num, permstr))
+ return errno;
+
++ if (domain_alloc_permrefs(&perms) < 0)
++ return ENOMEM;
++ if (perms.p[0].perms & XS_PERM_IGNORE)
++ return ENOENT;
++
+ /* First arg is node name. */
+ if (strstarts(in->buffer, "@")) {
+ if (set_perms_special(conn, in->buffer, &perms))
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index d262f4e9dbdf..8b503c2dfe07 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -881,7 +881,6 @@ int domain_entry_inc(struct connection *conn, struct node *node)
+ * count (used for testing whether a node permission is older than a domain).
+ *
+ * Return values:
+- * -1: error
+ * 0: domain has higher generation count (it is younger than a node with the
+ * given count), or domain isn't existing any longer
+ * 1: domain is older than the node
+@@ -889,20 +888,38 @@ int domain_entry_inc(struct connection *conn, struct node *node)
+ static int chk_domain_generation(unsigned int domid, uint64_t gen)
+ {
+ struct domain *d;
+- xc_dominfo_t dominfo;
+
+ if (!xc_handle && domid == 0)
+ return 1;
+
+ d = find_domain_struct(domid);
+- if (d)
+- return (d->generation <= gen) ? 1 : 0;
+
+- if (!get_domain_info(domid, &dominfo))
+- return 0;
++ return (d && d->generation <= gen) ? 1 : 0;
++}
+
+- d = alloc_domain(NULL, domid);
+- return d ? 1 : -1;
++/*
++ * Allocate all missing struct domain referenced by a permission set.
++ * Any permission entries for not existing domains will be marked to be
++ * ignored.
++ */
++int domain_alloc_permrefs(struct node_perms *perms)
++{
++ unsigned int i, domid;
++ struct domain *d;
++ xc_dominfo_t dominfo;
++
++ for (i = 0; i < perms->num; i++) {
++ domid = perms->p[i].id;
++ d = find_domain_struct(domid);
++ if (!d) {
++ if (!get_domain_info(domid, &dominfo))
++ perms->p[i].perms |= XS_PERM_IGNORE;
++ else if (!alloc_domain(NULL, domid))
++ return ENOMEM;
++ }
++ }
++
++ return 0;
+ }
+
+ /*
+@@ -915,8 +932,6 @@ int domain_adjust_node_perms(struct connection *conn, struct node *node)
+ int ret;
+
+ ret = chk_domain_generation(node->perms.p[0].id, node->generation);
+- if (ret < 0)
+- return errno;
+
+ /* If the owner doesn't exist any longer give it to priv domain. */
+ if (!ret) {
+@@ -933,8 +948,6 @@ int domain_adjust_node_perms(struct connection *conn, struct node *node)
+ continue;
+ ret = chk_domain_generation(node->perms.p[i].id,
+ node->generation);
+- if (ret < 0)
+- return errno;
+ if (!ret)
+ node->perms.p[i].perms |= XS_PERM_IGNORE;
+ }
+diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
+index da513443cd46..0b4f56b8146c 100644
+--- a/tools/xenstore/xenstored_domain.h
++++ b/tools/xenstore/xenstored_domain.h
+@@ -66,6 +66,7 @@ bool domain_is_unprivileged(struct connection *conn);
+
+ /* Remove node permissions for no longer existing domains. */
+ int domain_adjust_node_perms(struct connection *conn, struct node *node);
++int domain_alloc_permrefs(struct node_perms *perms);
+
+ /* Quota manipulation */
+ int domain_entry_inc(struct connection *conn, struct node *);
+--
+2.37.4
+
diff --git a/0111-tools-xenstore-remove-recursion-from-construct_node.patch b/0111-tools-xenstore-remove-recursion-from-construct_node.patch
new file mode 100644
index 0000000..aa63d32
--- /dev/null
+++ b/0111-tools-xenstore-remove-recursion-from-construct_node.patch
@@ -0,0 +1,126 @@
+From 62755d0a90344e704062e7b6943a3fa2dc5e02e6 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:11 +0200
+Subject: [PATCH 111/126] tools/xenstore: remove recursion from
+ construct_node()
+
+In order to reduce stack usage due to recursion, switch
+construct_node() to use a loop instead.
+
+This is part of XSA-418 / CVE-2022-42321.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit da8ee25d02a5447ba39a9800ee2a710ae1f54222)
+---
+ tools/xenstore/xenstored_core.c | 86 +++++++++++++++++++++------------
+ 1 file changed, 55 insertions(+), 31 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 8aecd425f274..46a37e5257e5 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1343,45 +1343,69 @@ static int add_child(const void *ctx, struct node *parent, const char *name)
+ static struct node *construct_node(struct connection *conn, const void *ctx,
+ const char *name)
+ {
+- struct node *parent, *node;
+- char *parentname = get_parent(ctx, name);
++ const char **names = NULL;
++ unsigned int levels = 0;
++ struct node *node = NULL;
++ struct node *parent = NULL;
++ const char *parentname = talloc_strdup(ctx, name);
+
+ if (!parentname)
+ return NULL;
+
+- /* If parent doesn't exist, create it. */
+- parent = read_node(conn, parentname, parentname);
+- if (!parent && errno == ENOENT)
+- parent = construct_node(conn, ctx, parentname);
+- if (!parent)
+- return NULL;
++ /* Walk the path up until an existing node is found. */
++ while (!parent) {
++ names = talloc_realloc(ctx, names, const char *, levels + 1);
++ if (!names)
++ goto nomem;
+
+- /* Add child to parent. */
+- if (add_child(ctx, parent, name))
+- goto nomem;
++ /*
++ * names[0] is the name of the node to construct initially,
++ * names[1] is its parent, and so on.
++ */
++ names[levels] = parentname;
++ parentname = get_parent(ctx, parentname);
++ if (!parentname)
++ return NULL;
+
+- /* Allocate node */
+- node = talloc(ctx, struct node);
+- if (!node)
+- goto nomem;
+- node->name = talloc_strdup(node, name);
+- if (!node->name)
+- goto nomem;
++ /* Try to read parent node until we found an existing one. */
++ parent = read_node(conn, ctx, parentname);
++ if (!parent && (errno != ENOENT || !strcmp(parentname, "/")))
++ return NULL;
+
+- /* Inherit permissions, except unprivileged domains own what they create */
+- node->perms.num = parent->perms.num;
+- node->perms.p = talloc_memdup(node, parent->perms.p,
+- node->perms.num * sizeof(*node->perms.p));
+- if (!node->perms.p)
+- goto nomem;
+- if (domain_is_unprivileged(conn))
+- node->perms.p[0].id = conn->id;
++ levels++;
++ }
++
++ /* Walk the path down again constructing the missing nodes. */
++ for (; levels > 0; levels--) {
++ /* Add child to parent. */
++ if (add_child(ctx, parent, names[levels - 1]))
++ goto nomem;
++
++ /* Allocate node */
++ node = talloc(ctx, struct node);
++ if (!node)
++ goto nomem;
++ node->name = talloc_steal(node, names[levels - 1]);
++
++ /* Inherit permissions, unpriv domains own what they create. */
++ node->perms.num = parent->perms.num;
++ node->perms.p = talloc_memdup(node, parent->perms.p,
++ node->perms.num *
++ sizeof(*node->perms.p));
++ if (!node->perms.p)
++ goto nomem;
++ if (domain_is_unprivileged(conn))
++ node->perms.p[0].id = conn->id;
++
++ /* No children, no data */
++ node->children = node->data = NULL;
++ node->childlen = node->datalen = 0;
++ node->acc.memory = 0;
++ node->parent = parent;
++
++ parent = node;
++ }
+
+- /* No children, no data */
+- node->children = node->data = NULL;
+- node->childlen = node->datalen = 0;
+- node->acc.memory = 0;
+- node->parent = parent;
+ return node;
+
+ nomem:
+--
+2.37.4
+
diff --git a/0112-tools-xenstore-don-t-let-remove_child_entry-call-cor.patch b/0112-tools-xenstore-don-t-let-remove_child_entry-call-cor.patch
new file mode 100644
index 0000000..8250ff0
--- /dev/null
+++ b/0112-tools-xenstore-don-t-let-remove_child_entry-call-cor.patch
@@ -0,0 +1,110 @@
+From b9a005b0b4520261c6c362fca55500782837f119 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:11 +0200
+Subject: [PATCH 112/126] tools/xenstore: don't let remove_child_entry() call
+ corrupt()
+
+In case of write_node() returning an error, remove_child_entry() will
+call corrupt() today. This could result in an endless recursion, as
+remove_child_entry() is called by corrupt(), too:
+
+corrupt()
+ check_store()
+ check_store_()
+ remove_child_entry()
+
+Fix that by letting remove_child_entry() return an error instead and
+let the caller decide what to do.
+
+This is part of XSA-418 / CVE-2022-42321.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 0c00c51f3bc8206c7f9cf87d014650157bee2bf4)
+---
+ tools/xenstore/xenstored_core.c | 36 ++++++++++++++++++---------------
+ 1 file changed, 20 insertions(+), 16 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 46a37e5257e5..4c3897721bdd 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1574,15 +1574,15 @@ static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
+ memmove(mem + off, mem + off + len, total - off - len);
+ }
+
+-static void remove_child_entry(struct connection *conn, struct node *node,
+- size_t offset)
++static int remove_child_entry(struct connection *conn, struct node *node,
++ size_t offset)
+ {
+ size_t childlen = strlen(node->children + offset);
+
+ memdel(node->children, offset, childlen + 1, node->childlen);
+ node->childlen -= childlen + 1;
+- if (write_node(conn, node, true))
+- corrupt(conn, "Can't update parent node '%s'", node->name);
++
++ return write_node(conn, node, true);
+ }
+
+ static void delete_child(struct connection *conn,
+@@ -1592,7 +1592,9 @@ static void delete_child(struct connection *conn,
+
+ for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
+ if (streq(node->children+i, childname)) {
+- remove_child_entry(conn, node, i);
++ if (remove_child_entry(conn, node, i))
++ corrupt(conn, "Can't update parent node '%s'",
++ node->name);
+ return;
+ }
+ }
+@@ -2226,6 +2228,17 @@ int remember_string(struct hashtable *hash, const char *str)
+ return hashtable_insert(hash, k, (void *)1);
+ }
+
++static int rm_child_entry(struct node *node, size_t off, size_t len)
++{
++ if (!recovery)
++ return off;
++
++ if (remove_child_entry(NULL, node, off))
++ log("check_store: child entry could not be removed from '%s'",
++ node->name);
++
++ return off - len - 1;
++}
+
+ /**
+ * A node has a children field that names the children of the node, separated
+@@ -2278,12 +2291,7 @@ static int check_store_(const char *name, struct hashtable *reachable)
+ if (hashtable_search(children, childname)) {
+ log("check_store: '%s' is duplicated!",
+ childname);
+-
+- if (recovery) {
+- remove_child_entry(NULL, node,
+- i);
+- i -= childlen + 1;
+- }
++ i = rm_child_entry(node, i, childlen);
+ }
+ else {
+ if (!remember_string(children,
+@@ -2300,11 +2308,7 @@ static int check_store_(const char *name, struct hashtable *reachable)
+ } else if (errno != ENOMEM) {
+ log("check_store: No child '%s' found!\n",
+ childname);
+-
+- if (recovery) {
+- remove_child_entry(NULL, node, i);
+- i -= childlen + 1;
+- }
++ i = rm_child_entry(node, i, childlen);
+ } else {
+ log("check_store: ENOMEM");
+ ret = ENOMEM;
+--
+2.37.4
+
diff --git a/0113-tools-xenstore-add-generic-treewalk-function.patch b/0113-tools-xenstore-add-generic-treewalk-function.patch
new file mode 100644
index 0000000..b80c574
--- /dev/null
+++ b/0113-tools-xenstore-add-generic-treewalk-function.patch
@@ -0,0 +1,250 @@
+From 83b6c511a5989a83c50daae83c5b5a683d6dc096 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:11 +0200
+Subject: [PATCH 113/126] tools/xenstore: add generic treewalk function
+
+Add a generic function to walk the complete node tree. It will start
+at "/" and descend recursively into each child, calling a function
+specified by the caller. Depending on the return value of the user
+specified function the walk will be aborted, continued, or the current
+child will be skipped by not descending into its children.
+
+This is part of XSA-418 / CVE-2022-42321.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 0d7c5d19bc27492360196e7dad2b227908564fff)
+---
+ tools/xenstore/xenstored_core.c | 143 +++++++++++++++++++++++++++++---
+ tools/xenstore/xenstored_core.h | 40 +++++++++
+ 2 files changed, 170 insertions(+), 13 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 4c3897721bdd..7463d0a002d7 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1804,6 +1804,135 @@ static int do_set_perms(const void *ctx, struct connection *conn,
+ return 0;
+ }
+
++static char *child_name(const void *ctx, const char *s1, const char *s2)
++{
++ if (strcmp(s1, "/"))
++ return talloc_asprintf(ctx, "%s/%s", s1, s2);
++ return talloc_asprintf(ctx, "/%s", s2);
++}
++
++static int rm_from_parent(struct connection *conn, struct node *parent,
++ const char *name)
++{
++ size_t off;
++
++ if (!parent)
++ return WALK_TREE_ERROR_STOP;
++
++ for (off = parent->childoff - 1; off && parent->children[off - 1];
++ off--);
++ if (remove_child_entry(conn, parent, off)) {
++ log("treewalk: child entry could not be removed from '%s'",
++ parent->name);
++ return WALK_TREE_ERROR_STOP;
++ }
++ parent->childoff = off;
++
++ return WALK_TREE_OK;
++}
++
++static int walk_call_func(const void *ctx, struct connection *conn,
++ struct node *node, struct node *parent, void *arg,
++ int (*func)(const void *ctx, struct connection *conn,
++ struct node *node, void *arg))
++{
++ int ret;
++
++ if (!func)
++ return WALK_TREE_OK;
++
++ ret = func(ctx, conn, node, arg);
++ if (ret == WALK_TREE_RM_CHILDENTRY && parent)
++ ret = rm_from_parent(conn, parent, node->name);
++
++ return ret;
++}
++
++int walk_node_tree(const void *ctx, struct connection *conn, const char *root,
++ struct walk_funcs *funcs, void *arg)
++{
++ int ret = 0;
++ void *tmpctx;
++ char *name;
++ struct node *node = NULL;
++ struct node *parent = NULL;
++
++ tmpctx = talloc_new(ctx);
++ if (!tmpctx) {
++ errno = ENOMEM;
++ return WALK_TREE_ERROR_STOP;
++ }
++ name = talloc_strdup(tmpctx, root);
++ if (!name) {
++ errno = ENOMEM;
++ talloc_free(tmpctx);
++ return WALK_TREE_ERROR_STOP;
++ }
++
++ /* Continue the walk until an error is returned. */
++ while (ret >= 0) {
++ /* node == NULL possible only for the initial loop iteration. */
++ if (node) {
++ /* Go one step up if ret or if last child finished. */
++ if (ret || node->childoff >= node->childlen) {
++ parent = node->parent;
++ /* Call function AFTER processing a node. */
++ ret = walk_call_func(ctx, conn, node, parent,
++ arg, funcs->exit);
++ /* Last node, so exit loop. */
++ if (!parent)
++ break;
++ talloc_free(node);
++ /* Continue with parent. */
++ node = parent;
++ continue;
++ }
++ /* Get next child of current node. */
++ name = child_name(tmpctx, node->name,
++ node->children + node->childoff);
++ if (!name) {
++ ret = WALK_TREE_ERROR_STOP;
++ break;
++ }
++ /* Point to next child. */
++ node->childoff += strlen(node->children +
++ node->childoff) + 1;
++ /* Descent into children. */
++ parent = node;
++ }
++ /* Read next node (root node or next child). */
++ node = read_node(conn, tmpctx, name);
++ if (!node) {
++ /* Child not found - should not happen! */
++ /* ENOENT case can be handled by supplied function. */
++ if (errno == ENOENT && funcs->enoent)
++ ret = funcs->enoent(ctx, conn, parent, name,
++ arg);
++ else
++ ret = WALK_TREE_ERROR_STOP;
++ if (!parent)
++ break;
++ if (ret == WALK_TREE_RM_CHILDENTRY)
++ ret = rm_from_parent(conn, parent, name);
++ if (ret < 0)
++ break;
++ talloc_free(name);
++ node = parent;
++ continue;
++ }
++ talloc_free(name);
++ node->parent = parent;
++ node->childoff = 0;
++ /* Call function BEFORE processing a node. */
++ ret = walk_call_func(ctx, conn, node, parent, arg,
++ funcs->enter);
++ }
++
++ talloc_free(tmpctx);
++
++ return ret < 0 ? ret : WALK_TREE_OK;
++}
++
+ static struct {
+ const char *str;
+ int (*func)(const void *ctx, struct connection *conn,
+@@ -2206,18 +2335,6 @@ static int keys_equal_fn(void *key1, void *key2)
+ return 0 == strcmp((char *)key1, (char *)key2);
+ }
+
+-
+-static char *child_name(const char *s1, const char *s2)
+-{
+- if (strcmp(s1, "/")) {
+- return talloc_asprintf(NULL, "%s/%s", s1, s2);
+- }
+- else {
+- return talloc_asprintf(NULL, "/%s", s2);
+- }
+-}
+-
+-
+ int remember_string(struct hashtable *hash, const char *str)
+ {
+ char *k = malloc(strlen(str) + 1);
+@@ -2277,7 +2394,7 @@ static int check_store_(const char *name, struct hashtable *reachable)
+ while (i < node->childlen && !ret) {
+ struct node *childnode;
+ size_t childlen = strlen(node->children + i);
+- char * childname = child_name(node->name,
++ char * childname = child_name(NULL, node->name,
+ node->children + i);
+
+ if (!childname) {
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 1eb3708f82dd..f0fd8c352857 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -195,6 +195,7 @@ struct node {
+
+ /* Children, each nul-terminated. */
+ unsigned int childlen;
++ unsigned int childoff; /* Used by walk_node_tree() internally. */
+ char *children;
+
+ /* Allocation information for node currently in store. */
+@@ -334,6 +335,45 @@ void read_state_buffered_data(const void *ctx, struct connection *conn,
+ const struct xs_state_connection *sc);
+ void read_state_node(const void *ctx, const void *state);
+
++/*
++ * Walk the node tree below root calling funcs->enter() and funcs->exit() for
++ * each node. funcs->enter() is being called when entering a node, so before
++ * any of the children of the node is processed. funcs->exit() is being
++ * called when leaving the node, so after all children have been processed.
++ * funcs->enoent() is being called when a node isn't existing.
++ * funcs->*() return values:
++ * < 0: tree walk is stopped, walk_node_tree() returns funcs->*() return value
++ * in case WALK_TREE_ERROR_STOP is returned, errno should be set
++ * WALK_TREE_OK: tree walk is continuing
++ * WALK_TREE_SKIP_CHILDREN: tree walk won't descend below current node, but
++ * walk continues
++ * WALK_TREE_RM_CHILDENTRY: Remove the child entry from its parent and write
++ * the modified parent node back to the data base, implies to not descend
++ * below the current node, but to continue the walk
++ * funcs->*() is allowed to modify the node it is called for in the data base.
++ * In case funcs->enter() is deleting the node, it must not return WALK_TREE_OK
++ * in order to avoid descending into no longer existing children.
++ */
++/* Return values for funcs->*() and walk_node_tree(). */
++#define WALK_TREE_SUCCESS_STOP -100 /* Stop walk early, no error. */
++#define WALK_TREE_ERROR_STOP -1 /* Stop walk due to error. */
++#define WALK_TREE_OK 0 /* No error. */
++/* Return value for funcs->*() only. */
++#define WALK_TREE_SKIP_CHILDREN 1 /* Don't recurse below current node. */
++#define WALK_TREE_RM_CHILDENTRY 2 /* Remove child entry from parent. */
++
++struct walk_funcs {
++ int (*enter)(const void *ctx, struct connection *conn,
++ struct node *node, void *arg);
++ int (*exit)(const void *ctx, struct connection *conn,
++ struct node *node, void *arg);
++ int (*enoent)(const void *ctx, struct connection *conn,
++ struct node *parent, char *name, void *arg);
++};
++
++int walk_node_tree(const void *ctx, struct connection *conn, const char *root,
++ struct walk_funcs *funcs, void *arg);
++
+ #endif /* _XENSTORED_CORE_H */
+
+ /*
+--
+2.37.4
+
diff --git a/0114-tools-xenstore-simplify-check_store.patch b/0114-tools-xenstore-simplify-check_store.patch
new file mode 100644
index 0000000..6247114
--- /dev/null
+++ b/0114-tools-xenstore-simplify-check_store.patch
@@ -0,0 +1,114 @@
+From 4096512a70fd0bb65e40ed4269a1ca74dbb16220 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:12 +0200
+Subject: [PATCH 114/126] tools/xenstore: simplify check_store()
+
+check_store() is using a hash table for storing all node names it has
+found via walking the tree. Additionally it using another hash table
+for all children of a node to detect duplicate child names.
+
+Simplify that by dropping the second hash table as the first one is
+already holding all the needed information.
+
+This is part of XSA-418 / CVE-2022-42321.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 70f719f52a220bc5bc987e4dd28e14a7039a176b)
+---
+ tools/xenstore/xenstored_core.c | 47 +++++++++++----------------------
+ 1 file changed, 15 insertions(+), 32 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 7463d0a002d7..a48255c64cad 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -2378,50 +2378,34 @@ static int check_store_(const char *name, struct hashtable *reachable)
+ if (node) {
+ size_t i = 0;
+
+- struct hashtable * children =
+- create_hashtable(16, hash_from_key_fn, keys_equal_fn);
+- if (!children) {
+- log("check_store create table: ENOMEM");
+- return ENOMEM;
+- }
+-
+ if (!remember_string(reachable, name)) {
+- hashtable_destroy(children, 0);
+ log("check_store: ENOMEM");
+ return ENOMEM;
+ }
+
+ while (i < node->childlen && !ret) {
+- struct node *childnode;
++ struct node *childnode = NULL;
+ size_t childlen = strlen(node->children + i);
+- char * childname = child_name(NULL, node->name,
+- node->children + i);
++ char *childname = child_name(NULL, node->name,
++ node->children + i);
+
+ if (!childname) {
+ log("check_store: ENOMEM");
+ ret = ENOMEM;
+ break;
+ }
++
++ if (hashtable_search(reachable, childname)) {
++ log("check_store: '%s' is duplicated!",
++ childname);
++ i = rm_child_entry(node, i, childlen);
++ goto next;
++ }
++
+ childnode = read_node(NULL, childname, childname);
+-
++
+ if (childnode) {
+- if (hashtable_search(children, childname)) {
+- log("check_store: '%s' is duplicated!",
+- childname);
+- i = rm_child_entry(node, i, childlen);
+- }
+- else {
+- if (!remember_string(children,
+- childname)) {
+- log("check_store: ENOMEM");
+- talloc_free(childnode);
+- talloc_free(childname);
+- ret = ENOMEM;
+- break;
+- }
+- ret = check_store_(childname,
+- reachable);
+- }
++ ret = check_store_(childname, reachable);
+ } else if (errno != ENOMEM) {
+ log("check_store: No child '%s' found!\n",
+ childname);
+@@ -2431,19 +2415,18 @@ static int check_store_(const char *name, struct hashtable *reachable)
+ ret = ENOMEM;
+ }
+
++ next:
+ talloc_free(childnode);
+ talloc_free(childname);
+ i += childlen + 1;
+ }
+
+- hashtable_destroy(children, 0 /* Don't free values (they are
+- all (void *)1) */);
+ talloc_free(node);
+ } else if (errno != ENOMEM) {
+ /* Impossible, because no database should ever be without the
+ root, and otherwise, we've just checked in our caller
+ (which made a recursive call to get here). */
+-
++
+ log("check_store: No child '%s' found: impossible!", name);
+ } else {
+ log("check_store: ENOMEM");
+--
+2.37.4
+
diff --git a/0115-tools-xenstore-use-treewalk-for-check_store.patch b/0115-tools-xenstore-use-treewalk-for-check_store.patch
new file mode 100644
index 0000000..74d58f4
--- /dev/null
+++ b/0115-tools-xenstore-use-treewalk-for-check_store.patch
@@ -0,0 +1,172 @@
+From a95277ee36e1db2f67e8091f4ea401975d341659 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:12 +0200
+Subject: [PATCH 115/126] tools/xenstore: use treewalk for check_store()
+
+Instead of doing an open tree walk using call recursion, use
+walk_node_tree() when checking the store for inconsistencies.
+
+This will reduce code size and avoid many nesting levels of function
+calls which could potentially exhaust the stack.
+
+This is part of XSA-418 / CVE-2022-42321.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit a07cc0ec60612f414bedf2bafb26ec38d2602e95)
+---
+ tools/xenstore/xenstored_core.c | 109 +++++++++-----------------------
+ 1 file changed, 30 insertions(+), 79 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index a48255c64cad..ed8bc9b02ed2 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -2345,18 +2345,6 @@ int remember_string(struct hashtable *hash, const char *str)
+ return hashtable_insert(hash, k, (void *)1);
+ }
+
+-static int rm_child_entry(struct node *node, size_t off, size_t len)
+-{
+- if (!recovery)
+- return off;
+-
+- if (remove_child_entry(NULL, node, off))
+- log("check_store: child entry could not be removed from '%s'",
+- node->name);
+-
+- return off - len - 1;
+-}
+-
+ /**
+ * A node has a children field that names the children of the node, separated
+ * by NULs. We check whether there are entries in there that are duplicated
+@@ -2370,70 +2358,29 @@ static int rm_child_entry(struct node *node, size_t off, size_t len)
+ * As we go, we record each node in the given reachable hashtable. These
+ * entries will be used later in clean_store.
+ */
+-static int check_store_(const char *name, struct hashtable *reachable)
++static int check_store_step(const void *ctx, struct connection *conn,
++ struct node *node, void *arg)
+ {
+- struct node *node = read_node(NULL, name, name);
+- int ret = 0;
++ struct hashtable *reachable = arg;
+
+- if (node) {
+- size_t i = 0;
+-
+- if (!remember_string(reachable, name)) {
+- log("check_store: ENOMEM");
+- return ENOMEM;
+- }
+-
+- while (i < node->childlen && !ret) {
+- struct node *childnode = NULL;
+- size_t childlen = strlen(node->children + i);
+- char *childname = child_name(NULL, node->name,
+- node->children + i);
+-
+- if (!childname) {
+- log("check_store: ENOMEM");
+- ret = ENOMEM;
+- break;
+- }
+-
+- if (hashtable_search(reachable, childname)) {
+- log("check_store: '%s' is duplicated!",
+- childname);
+- i = rm_child_entry(node, i, childlen);
+- goto next;
+- }
+-
+- childnode = read_node(NULL, childname, childname);
+-
+- if (childnode) {
+- ret = check_store_(childname, reachable);
+- } else if (errno != ENOMEM) {
+- log("check_store: No child '%s' found!\n",
+- childname);
+- i = rm_child_entry(node, i, childlen);
+- } else {
+- log("check_store: ENOMEM");
+- ret = ENOMEM;
+- }
+-
+- next:
+- talloc_free(childnode);
+- talloc_free(childname);
+- i += childlen + 1;
+- }
+-
+- talloc_free(node);
+- } else if (errno != ENOMEM) {
+- /* Impossible, because no database should ever be without the
+- root, and otherwise, we've just checked in our caller
+- (which made a recursive call to get here). */
+-
+- log("check_store: No child '%s' found: impossible!", name);
+- } else {
+- log("check_store: ENOMEM");
+- ret = ENOMEM;
++ if (hashtable_search(reachable, (void *)node->name)) {
++ log("check_store: '%s' is duplicated!", node->name);
++ return recovery ? WALK_TREE_RM_CHILDENTRY
++ : WALK_TREE_SKIP_CHILDREN;
+ }
+
+- return ret;
++ if (!remember_string(reachable, node->name))
++ return WALK_TREE_ERROR_STOP;
++
++ return WALK_TREE_OK;
++}
++
++static int check_store_enoent(const void *ctx, struct connection *conn,
++ struct node *parent, char *name, void *arg)
++{
++ log("check_store: node '%s' not found", name);
++
++ return recovery ? WALK_TREE_RM_CHILDENTRY : WALK_TREE_OK;
+ }
+
+
+@@ -2482,24 +2429,28 @@ static void clean_store(struct hashtable *reachable)
+
+ void check_store(void)
+ {
+- char * root = talloc_strdup(NULL, "/");
+- struct hashtable * reachable =
+- create_hashtable(16, hash_from_key_fn, keys_equal_fn);
+-
++ struct hashtable *reachable;
++ struct walk_funcs walkfuncs = {
++ .enter = check_store_step,
++ .enoent = check_store_enoent,
++ };
++
++ reachable = create_hashtable(16, hash_from_key_fn, keys_equal_fn);
+ if (!reachable) {
+ log("check_store: ENOMEM");
+ return;
+ }
+
+ log("Checking store ...");
+- if (!check_store_(root, reachable) &&
+- !check_transactions(reachable))
++ if (walk_node_tree(NULL, NULL, "/", &walkfuncs, reachable)) {
++ if (errno == ENOMEM)
++ log("check_store: ENOMEM");
++ } else if (!check_transactions(reachable))
+ clean_store(reachable);
+ log("Checking store complete.");
+
+ hashtable_destroy(reachable, 0 /* Don't free values (they are all
+ (void *)1) */);
+- talloc_free(root);
+ }
+
+
+--
+2.37.4
+
diff --git a/0116-tools-xenstore-use-treewalk-for-deleting-nodes.patch b/0116-tools-xenstore-use-treewalk-for-deleting-nodes.patch
new file mode 100644
index 0000000..2dcf32e
--- /dev/null
+++ b/0116-tools-xenstore-use-treewalk-for-deleting-nodes.patch
@@ -0,0 +1,180 @@
+From 9ead5845034c04a5c6e04d9b069d9c13141f4f33 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:12 +0200
+Subject: [PATCH 116/126] tools/xenstore: use treewalk for deleting nodes
+
+Instead of doing an open tree walk using call recursion, use
+walk_node_tree() when deleting a sub-tree of nodes.
+
+This will reduce code size and avoid many nesting levels of function
+calls which could potentially exhaust the stack.
+
+This is part of XSA-418 / CVE-2022-42321.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit ea16962053a6849a6e7cada549ba7f8c586d85c6)
+---
+ tools/xenstore/xenstored_core.c | 99 ++++++++++++++-------------------
+ 1 file changed, 43 insertions(+), 56 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index ed8bc9b02ed2..9576411757fa 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -1300,21 +1300,6 @@ static int do_read(const void *ctx, struct connection *conn,
+ return 0;
+ }
+
+-static void delete_node_single(struct connection *conn, struct node *node)
+-{
+- TDB_DATA key;
+-
+- if (access_node(conn, node, NODE_ACCESS_DELETE, &key))
+- return;
+-
+- if (do_tdb_delete(conn, &key, &node->acc) != 0) {
+- corrupt(conn, "Could not delete '%s'", node->name);
+- return;
+- }
+-
+- domain_entry_dec(conn, node);
+-}
+-
+ /* Must not be / */
+ static char *basename(const char *name)
+ {
+@@ -1585,69 +1570,59 @@ static int remove_child_entry(struct connection *conn, struct node *node,
+ return write_node(conn, node, true);
+ }
+
+-static void delete_child(struct connection *conn,
+- struct node *node, const char *childname)
++static int delete_child(struct connection *conn,
++ struct node *node, const char *childname)
+ {
+ unsigned int i;
+
+ for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
+ if (streq(node->children+i, childname)) {
+- if (remove_child_entry(conn, node, i))
+- corrupt(conn, "Can't update parent node '%s'",
+- node->name);
+- return;
++ errno = remove_child_entry(conn, node, i) ? EIO : 0;
++ return errno;
+ }
+ }
+ corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
++
++ errno = EIO;
++ return errno;
+ }
+
+-static int delete_node(struct connection *conn, const void *ctx,
+- struct node *parent, struct node *node, bool watch_exact)
++static int delnode_sub(const void *ctx, struct connection *conn,
++ struct node *node, void *arg)
+ {
+- char *name;
++ const char *root = arg;
++ bool watch_exact;
++ int ret;
++ TDB_DATA key;
+
+- /* Delete children. */
+- while (node->childlen) {
+- struct node *child;
++ /* Any error here will probably be repeated for all following calls. */
++ ret = access_node(conn, node, NODE_ACCESS_DELETE, &key);
++ if (ret > 0)
++ return WALK_TREE_SUCCESS_STOP;
+
+- name = talloc_asprintf(node, "%s/%s", node->name,
+- node->children);
+- child = name ? read_node(conn, node, name) : NULL;
+- if (child) {
+- if (delete_node(conn, ctx, node, child, true))
+- return errno;
+- } else {
+- trace("delete_node: Error deleting child '%s/%s'!\n",
+- node->name, node->children);
+- /* Quit deleting. */
+- errno = ENOMEM;
+- return errno;
+- }
+- talloc_free(name);
+- }
++ /* In case of error stop the walk. */
++ if (!ret && do_tdb_delete(conn, &key, &node->acc))
++ return WALK_TREE_SUCCESS_STOP;
+
+ /*
+ * Fire the watches now, when we can still see the node permissions.
+ * This fine as we are single threaded and the next possible read will
+ * be handled only after the node has been really removed.
+- */
++ */
++ watch_exact = strcmp(root, node->name);
+ fire_watches(conn, ctx, node->name, node, watch_exact, NULL);
+- delete_node_single(conn, node);
+- delete_child(conn, parent, basename(node->name));
+- talloc_free(node);
+
+- return 0;
++ domain_entry_dec(conn, node);
++
++ return WALK_TREE_RM_CHILDENTRY;
+ }
+
+-static int _rm(struct connection *conn, const void *ctx, struct node *node,
+- const char *name)
++static int _rm(struct connection *conn, const void *ctx, const char *name)
+ {
+- /*
+- * Deleting node by node, so the result is always consistent even in
+- * case of a failure.
+- */
+ struct node *parent;
+ char *parentname = get_parent(ctx, name);
++ struct walk_funcs walkfuncs = { .exit = delnode_sub };
++ int ret;
+
+ if (!parentname)
+ return errno;
+@@ -1655,9 +1630,21 @@ static int _rm(struct connection *conn, const void *ctx, struct node *node,
+ parent = read_node(conn, ctx, parentname);
+ if (!parent)
+ return read_node_can_propagate_errno() ? errno : EINVAL;
+- node->parent = parent;
+
+- return delete_node(conn, ctx, parent, node, false);
++ ret = walk_node_tree(ctx, conn, name, &walkfuncs, (void *)name);
++ if (ret < 0) {
++ if (ret == WALK_TREE_ERROR_STOP) {
++ corrupt(conn, "error when deleting sub-nodes of %s\n",
++ name);
++ errno = EIO;
++ }
++ return errno;
++ }
++
++ if (delete_child(conn, parent, basename(name)))
++ return errno;
++
++ return 0;
+ }
+
+
+@@ -1694,7 +1681,7 @@ static int do_rm(const void *ctx, struct connection *conn,
+ if (streq(name, "/"))
+ return EINVAL;
+
+- ret = _rm(conn, ctx, node, name);
++ ret = _rm(conn, ctx, name);
+ if (ret)
+ return ret;
+
+--
+2.37.4
+
diff --git a/0117-tools-xenstore-use-treewalk-for-creating-node-record.patch b/0117-tools-xenstore-use-treewalk-for-creating-node-record.patch
new file mode 100644
index 0000000..6271169
--- /dev/null
+++ b/0117-tools-xenstore-use-treewalk-for-creating-node-record.patch
@@ -0,0 +1,242 @@
+From 84674f206778e9b3d8d67c6c76aa8094a262d5ec Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:12 +0200
+Subject: [PATCH 117/126] tools/xenstore: use treewalk for creating node
+ records
+
+Instead of doing an open tree walk using call recursion, use
+walk_node_tree() when creating the node records during a live update.
+
+This will reduce code size and avoid many nesting levels of function
+calls which could potentially exhaust the stack.
+
+This is part of XSA-418 / CVE-2022-42321.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 297ac246a5d8ed656b349641288f3402dcc0251e)
+---
+ tools/xenstore/xenstored_core.c | 127 ++++++++++++------------------
+ tools/xenstore/xenstored_core.h | 3 +-
+ tools/xenstore/xenstored_domain.c | 2 +-
+ 3 files changed, 54 insertions(+), 78 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 9576411757fa..e8cdfeef50c7 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -2990,132 +2990,109 @@ const char *dump_state_buffered_data(FILE *fp, const struct connection *c,
+ return NULL;
+ }
+
+-const char *dump_state_node_perms(FILE *fp, struct xs_state_node *sn,
+- const struct xs_permissions *perms,
++const char *dump_state_node_perms(FILE *fp, const struct xs_permissions *perms,
+ unsigned int n_perms)
+ {
+ unsigned int p;
+
+ for (p = 0; p < n_perms; p++) {
++ struct xs_state_node_perm sp;
++
+ switch ((int)perms[p].perms & ~XS_PERM_IGNORE) {
+ case XS_PERM_READ:
+- sn->perms[p].access = XS_STATE_NODE_PERM_READ;
++ sp.access = XS_STATE_NODE_PERM_READ;
+ break;
+ case XS_PERM_WRITE:
+- sn->perms[p].access = XS_STATE_NODE_PERM_WRITE;
++ sp.access = XS_STATE_NODE_PERM_WRITE;
+ break;
+ case XS_PERM_READ | XS_PERM_WRITE:
+- sn->perms[p].access = XS_STATE_NODE_PERM_BOTH;
++ sp.access = XS_STATE_NODE_PERM_BOTH;
+ break;
+ default:
+- sn->perms[p].access = XS_STATE_NODE_PERM_NONE;
++ sp.access = XS_STATE_NODE_PERM_NONE;
+ break;
+ }
+- sn->perms[p].flags = (perms[p].perms & XS_PERM_IGNORE)
++ sp.flags = (perms[p].perms & XS_PERM_IGNORE)
+ ? XS_STATE_NODE_PERM_IGNORE : 0;
+- sn->perms[p].domid = perms[p].id;
+- }
++ sp.domid = perms[p].id;
+
+- if (fwrite(sn->perms, sizeof(*sn->perms), n_perms, fp) != n_perms)
+- return "Dump node permissions error";
++ if (fwrite(&sp, sizeof(sp), 1, fp) != 1)
++ return "Dump node permissions error";
++ }
+
+ return NULL;
+ }
+
+-static const char *dump_state_node_tree(FILE *fp, char *path)
++struct dump_node_data {
++ FILE *fp;
++ const char *err;
++};
++
++static int dump_state_node_err(struct dump_node_data *data, const char *err)
+ {
+- unsigned int pathlen, childlen, p = 0;
++ data->err = err;
++ return WALK_TREE_ERROR_STOP;
++}
++
++static int dump_state_node(const void *ctx, struct connection *conn,
++ struct node *node, void *arg)
++{
++ struct dump_node_data *data = arg;
++ FILE *fp = data->fp;
++ unsigned int pathlen;
+ struct xs_state_record_header head;
+ struct xs_state_node sn;
+- TDB_DATA key, data;
+- const struct xs_tdb_record_hdr *hdr;
+- const char *child;
+ const char *ret;
+
+- pathlen = strlen(path) + 1;
+-
+- set_tdb_key(path, &key);
+- data = tdb_fetch(tdb_ctx, key);
+- if (data.dptr == NULL)
+- return "Error reading node";
+-
+- /* Clean up in case of failure. */
+- talloc_steal(path, data.dptr);
+-
+- hdr = (void *)data.dptr;
++ pathlen = strlen(node->name) + 1;
+
+ head.type = XS_STATE_TYPE_NODE;
+ head.length = sizeof(sn);
+ sn.conn_id = 0;
+ sn.ta_id = 0;
+ sn.ta_access = 0;
+- sn.perm_n = hdr->num_perms;
++ sn.perm_n = node->perms.num;
+ sn.path_len = pathlen;
+- sn.data_len = hdr->datalen;
+- head.length += hdr->num_perms * sizeof(*sn.perms);
++ sn.data_len = node->datalen;
++ head.length += node->perms.num * sizeof(*sn.perms);
+ head.length += pathlen;
+- head.length += hdr->datalen;
++ head.length += node->datalen;
+ head.length = ROUNDUP(head.length, 3);
+
+ if (fwrite(&head, sizeof(head), 1, fp) != 1)
+- return "Dump node state error";
++ return dump_state_node_err(data, "Dump node head error");
+ if (fwrite(&sn, sizeof(sn), 1, fp) != 1)
+- return "Dump node state error";
++ return dump_state_node_err(data, "Dump node state error");
+
+- ret = dump_state_node_perms(fp, &sn, hdr->perms, hdr->num_perms);
++ ret = dump_state_node_perms(fp, node->perms.p, node->perms.num);
+ if (ret)
+- return ret;
++ return dump_state_node_err(data, ret);
+
+- if (fwrite(path, pathlen, 1, fp) != 1)
+- return "Dump node path error";
+- if (hdr->datalen &&
+- fwrite(hdr->perms + hdr->num_perms, hdr->datalen, 1, fp) != 1)
+- return "Dump node data error";
++ if (fwrite(node->name, pathlen, 1, fp) != 1)
++ return dump_state_node_err(data, "Dump node path error");
++
++ if (node->datalen && fwrite(node->data, node->datalen, 1, fp) != 1)
++ return dump_state_node_err(data, "Dump node data error");
+
+ ret = dump_state_align(fp);
+ if (ret)
+- return ret;
++ return dump_state_node_err(data, ret);
+
+- child = (char *)(hdr->perms + hdr->num_perms) + hdr->datalen;
+-
+- /*
+- * Use path for constructing children paths.
+- * As we don't write out nodes without having written their parent
+- * already we will never clobber a part of the path we'll need later.
+- */
+- pathlen--;
+- if (path[pathlen - 1] != '/') {
+- path[pathlen] = '/';
+- pathlen++;
+- }
+- while (p < hdr->childlen) {
+- childlen = strlen(child) + 1;
+- if (pathlen + childlen > XENSTORE_ABS_PATH_MAX)
+- return "Dump node path length error";
+- strcpy(path + pathlen, child);
+- ret = dump_state_node_tree(fp, path);
+- if (ret)
+- return ret;
+- p += childlen;
+- child += childlen;
+- }
+-
+- talloc_free(data.dptr);
+-
+- return NULL;
++ return WALK_TREE_OK;
+ }
+
+ const char *dump_state_nodes(FILE *fp, const void *ctx)
+ {
+- char *path;
++ struct dump_node_data data = {
++ .fp = fp,
++ .err = "Dump node walk error"
++ };
++ struct walk_funcs walkfuncs = { .enter = dump_state_node };
+
+- path = talloc_size(ctx, XENSTORE_ABS_PATH_MAX);
+- if (!path)
+- return "Path buffer allocation error";
++ if (walk_node_tree(ctx, NULL, "/", &walkfuncs, &data))
++ return data.err;
+
+- strcpy(path, "/");
+-
+- return dump_state_node_tree(fp, path);
++ return NULL;
+ }
+
+ void read_state_global(const void *ctx, const void *state)
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index f0fd8c352857..3190494bbeb5 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -326,8 +326,7 @@ const char *dump_state_buffered_data(FILE *fp, const struct connection *c,
+ const struct connection *conn,
+ struct xs_state_connection *sc);
+ const char *dump_state_nodes(FILE *fp, const void *ctx);
+-const char *dump_state_node_perms(FILE *fp, struct xs_state_node *sn,
+- const struct xs_permissions *perms,
++const char *dump_state_node_perms(FILE *fp, const struct xs_permissions *perms,
+ unsigned int n_perms);
+
+ void read_state_global(const void *ctx, const void *state);
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index 8b503c2dfe07..a91cc75ab59b 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -1449,7 +1449,7 @@ static const char *dump_state_special_node(FILE *fp, const char *name,
+ if (fwrite(&sn, sizeof(sn), 1, fp) != 1)
+ return "Dump special node error";
+
+- ret = dump_state_node_perms(fp, &sn, perms->p, perms->num);
++ ret = dump_state_node_perms(fp, perms->p, perms->num);
+ if (ret)
+ return ret;
+
+--
+2.37.4
+
diff --git a/0118-tools-xenstore-remove-nodes-owned-by-destroyed-domai.patch b/0118-tools-xenstore-remove-nodes-owned-by-destroyed-domai.patch
new file mode 100644
index 0000000..a95a48e
--- /dev/null
+++ b/0118-tools-xenstore-remove-nodes-owned-by-destroyed-domai.patch
@@ -0,0 +1,299 @@
+From da87661d058c4a6cf2ea6439771b9834f1c06223 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:12 +0200
+Subject: [PATCH 118/126] tools/xenstore: remove nodes owned by destroyed
+ domain
+
+In case a domain is removed from Xenstore, remove all nodes owned by
+it per default.
+
+This tackles the problem that nodes might be created by a domain
+outside its home path in Xenstore, leading to Xenstore hogging more
+and more memory. Domain quota don't work in this case if the guest is
+rebooting in between.
+
+Since XSA-322 ownership of such stale nodes is transferred to dom0,
+which is helping against unintended access, but not against OOM of
+Xenstore.
+
+As a fallback for weird cases add a Xenstore start parameter for
+keeping today's way to handle stale nodes, adding the risk of Xenstore
+hitting an OOM situation.
+
+This is part of XSA-419 / CVE-2022-42322.
+
+Fixes: 496306324d8d ("tools/xenstore: revoke access rights for removed domains")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 755d3f9debf8879448211fffb018f556136f6a79)
+---
+ tools/xenstore/xenstored_core.c | 17 +++++--
+ tools/xenstore/xenstored_core.h | 4 ++
+ tools/xenstore/xenstored_domain.c | 84 +++++++++++++++++++++++--------
+ tools/xenstore/xenstored_domain.h | 2 +-
+ 4 files changed, 80 insertions(+), 27 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index e8cdfeef50c7..d5b2e59b0db6 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -80,6 +80,7 @@ static bool verbose = false;
+ LIST_HEAD(connections);
+ int tracefd = -1;
+ static bool recovery = true;
++bool keep_orphans = false;
+ static int reopen_log_pipe[2];
+ static int reopen_log_pipe0_pollfd_idx = -1;
+ char *tracefile = NULL;
+@@ -722,7 +723,7 @@ struct node *read_node(struct connection *conn, const void *ctx,
+ node->perms.p = hdr->perms;
+ node->acc.domid = node->perms.p[0].id;
+ node->acc.memory = data.dsize;
+- if (domain_adjust_node_perms(conn, node))
++ if (domain_adjust_node_perms(node))
+ goto error;
+
+ /* If owner is gone reset currently accounted memory size. */
+@@ -765,7 +766,7 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+ void *p;
+ struct xs_tdb_record_hdr *hdr;
+
+- if (domain_adjust_node_perms(conn, node))
++ if (domain_adjust_node_perms(node))
+ return errno;
+
+ data.dsize = sizeof(*hdr)
+@@ -1617,7 +1618,7 @@ static int delnode_sub(const void *ctx, struct connection *conn,
+ return WALK_TREE_RM_CHILDENTRY;
+ }
+
+-static int _rm(struct connection *conn, const void *ctx, const char *name)
++int rm_node(struct connection *conn, const void *ctx, const char *name)
+ {
+ struct node *parent;
+ char *parentname = get_parent(ctx, name);
+@@ -1681,7 +1682,7 @@ static int do_rm(const void *ctx, struct connection *conn,
+ if (streq(name, "/"))
+ return EINVAL;
+
+- ret = _rm(conn, ctx, name);
++ ret = rm_node(conn, ctx, name);
+ if (ret)
+ return ret;
+
+@@ -2537,6 +2538,8 @@ static void usage(void)
+ " -R, --no-recovery to request that no recovery should be attempted when\n"
+ " the store is corrupted (debug only),\n"
+ " -I, --internal-db store database in memory, not on disk\n"
++" -K, --keep-orphans don't delete nodes owned by a domain when the\n"
++" domain is deleted (this is a security risk!)\n"
+ " -V, --verbose to request verbose execution.\n");
+ }
+
+@@ -2561,6 +2564,7 @@ static struct option options[] = {
+ { "timeout", 1, NULL, 'w' },
+ { "no-recovery", 0, NULL, 'R' },
+ { "internal-db", 0, NULL, 'I' },
++ { "keep-orphans", 0, NULL, 'K' },
+ { "verbose", 0, NULL, 'V' },
+ { "watch-nb", 1, NULL, 'W' },
+ #ifndef NO_LIVE_UPDATE
+@@ -2641,7 +2645,7 @@ int main(int argc, char *argv[])
+ orig_argc = argc;
+ orig_argv = argv;
+
+- while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:M:Q:q:T:RVW:w:U",
++ while ((opt = getopt_long(argc, argv, "DE:F:HKNPS:t:A:M:Q:q:T:RVW:w:U",
+ options, NULL)) != -1) {
+ switch (opt) {
+ case 'D':
+@@ -2677,6 +2681,9 @@ int main(int argc, char *argv[])
+ case 'I':
+ tdb_flags = TDB_INTERNAL|TDB_NOLOCK;
+ break;
++ case 'K':
++ keep_orphans = true;
++ break;
+ case 'V':
+ verbose = true;
+ break;
+diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
+index 3190494bbeb5..9a9dbb2c3c86 100644
+--- a/tools/xenstore/xenstored_core.h
++++ b/tools/xenstore/xenstored_core.h
+@@ -233,6 +233,9 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+ struct node *read_node(struct connection *conn, const void *ctx,
+ const char *name);
+
++/* Remove a node and its children. */
++int rm_node(struct connection *conn, const void *ctx, const char *name);
++
+ void setup_structure(bool live_update);
+ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read);
+ struct connection *get_connection_by_id(unsigned int conn_id);
+@@ -279,6 +282,7 @@ extern int quota_req_outstanding;
+ extern int quota_trans_nodes;
+ extern int quota_memory_per_domain_soft;
+ extern int quota_memory_per_domain_hard;
++extern bool keep_orphans;
+
+ extern unsigned int timeout_watch_event_msec;
+
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index a91cc75ab59b..ee4b19387db8 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -196,10 +196,64 @@ static void unmap_interface(void *interface)
+ xengnttab_unmap(*xgt_handle, interface, 1);
+ }
+
++static int domain_tree_remove_sub(const void *ctx, struct connection *conn,
++ struct node *node, void *arg)
++{
++ struct domain *domain = arg;
++ TDB_DATA key;
++ int ret = WALK_TREE_OK;
++
++ if (node->perms.p[0].id != domain->domid)
++ return WALK_TREE_OK;
++
++ if (keep_orphans) {
++ set_tdb_key(node->name, &key);
++ domain->nbentry--;
++ node->perms.p[0].id = priv_domid;
++ node->acc.memory = 0;
++ domain_entry_inc(NULL, node);
++ if (write_node_raw(NULL, &key, node, true)) {
++ /* That's unfortunate. We only can try to continue. */
++ syslog(LOG_ERR,
++ "error when moving orphaned node %s to dom0\n",
++ node->name);
++ } else
++ trace("orphaned node %s moved to dom0\n", node->name);
++ } else {
++ if (rm_node(NULL, ctx, node->name)) {
++ /* That's unfortunate. We only can try to continue. */
++ syslog(LOG_ERR,
++ "error when deleting orphaned node %s\n",
++ node->name);
++ } else
++ trace("orphaned node %s deleted\n", node->name);
++
++ /* Skip children in all cases in order to avoid more errors. */
++ ret = WALK_TREE_SKIP_CHILDREN;
++ }
++
++ return domain->nbentry > 0 ? ret : WALK_TREE_SUCCESS_STOP;
++}
++
++static void domain_tree_remove(struct domain *domain)
++{
++ int ret;
++ struct walk_funcs walkfuncs = { .enter = domain_tree_remove_sub };
++
++ if (domain->nbentry > 0) {
++ ret = walk_node_tree(domain, NULL, "/", &walkfuncs, domain);
++ if (ret == WALK_TREE_ERROR_STOP)
++ syslog(LOG_ERR,
++ "error when looking for orphaned nodes\n");
++ }
++}
++
+ static int destroy_domain(void *_domain)
+ {
+ struct domain *domain = _domain;
+
++ domain_tree_remove(domain);
++
+ list_del(&domain->list);
+
+ if (!domain->introduced)
+@@ -857,15 +911,15 @@ int domain_entry_inc(struct connection *conn, struct node *node)
+ struct domain *d;
+ unsigned int domid;
+
+- if (!conn)
++ if (!node->perms.p)
+ return 0;
+
+- domid = node->perms.p ? node->perms.p[0].id : conn->id;
++ domid = node->perms.p[0].id;
+
+- if (conn->transaction) {
++ if (conn && conn->transaction) {
+ transaction_entry_inc(conn->transaction, domid);
+ } else {
+- d = (domid == conn->id && conn->domain) ? conn->domain
++ d = (conn && domid == conn->id && conn->domain) ? conn->domain
+ : find_or_alloc_existing_domain(domid);
+ if (d)
+ d->nbentry++;
+@@ -926,23 +980,11 @@ int domain_alloc_permrefs(struct node_perms *perms)
+ * Remove permissions for no longer existing domains in order to avoid a new
+ * domain with the same domid inheriting the permissions.
+ */
+-int domain_adjust_node_perms(struct connection *conn, struct node *node)
++int domain_adjust_node_perms(struct node *node)
+ {
+ unsigned int i;
+ int ret;
+
+- ret = chk_domain_generation(node->perms.p[0].id, node->generation);
+-
+- /* If the owner doesn't exist any longer give it to priv domain. */
+- if (!ret) {
+- /*
+- * In theory we'd need to update the number of dom0 nodes here,
+- * but we could be called for a read of the node. So better
+- * avoid the risk to overflow the node count of dom0.
+- */
+- node->perms.p[0].id = priv_domid;
+- }
+-
+ for (i = 1; i < node->perms.num; i++) {
+ if (node->perms.p[i].perms & XS_PERM_IGNORE)
+ continue;
+@@ -960,15 +1002,15 @@ void domain_entry_dec(struct connection *conn, struct node *node)
+ struct domain *d;
+ unsigned int domid;
+
+- if (!conn)
++ if (!node->perms.p)
+ return;
+
+ domid = node->perms.p ? node->perms.p[0].id : conn->id;
+
+- if (conn->transaction) {
++ if (conn && conn->transaction) {
+ transaction_entry_dec(conn->transaction, domid);
+ } else {
+- d = (domid == conn->id && conn->domain) ? conn->domain
++ d = (conn && domid == conn->id && conn->domain) ? conn->domain
+ : find_domain_struct(domid);
+ if (d) {
+ d->nbentry--;
+@@ -1087,7 +1129,7 @@ int domain_memory_add(unsigned int domid, int mem, bool no_quota_check)
+ * exist, as accounting is done either for a domain related to
+ * the current connection, or for the domain owning a node
+ * (which is always existing, as the owner of the node is
+- * tested to exist and replaced by domid 0 if not).
++ * tested to exist and deleted or replaced by domid 0 if not).
+ * So not finding the related domain MUST be an error in the
+ * data base.
+ */
+diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
+index 0b4f56b8146c..491d7a325bd3 100644
+--- a/tools/xenstore/xenstored_domain.h
++++ b/tools/xenstore/xenstored_domain.h
+@@ -65,7 +65,7 @@ bool domain_can_write(struct connection *conn);
+ bool domain_is_unprivileged(struct connection *conn);
+
+ /* Remove node permissions for no longer existing domains. */
+-int domain_adjust_node_perms(struct connection *conn, struct node *node);
++int domain_adjust_node_perms(struct node *node);
+ int domain_alloc_permrefs(struct node_perms *perms);
+
+ /* Quota manipulation */
+--
+2.37.4
+
diff --git a/0119-tools-xenstore-make-the-internal-memory-data-base-th.patch b/0119-tools-xenstore-make-the-internal-memory-data-base-th.patch
new file mode 100644
index 0000000..8c1611b
--- /dev/null
+++ b/0119-tools-xenstore-make-the-internal-memory-data-base-th.patch
@@ -0,0 +1,101 @@
+From 4269999ecedf79452a3fbbfab842f045d1ece16e Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:13 +0200
+Subject: [PATCH 119/126] tools/xenstore: make the internal memory data base
+ the default
+
+Having a file backed data base has the only advantage of being capable
+to dump the contents of it while Xenstore is running, and potentially
+using less swap space in case the data base can't be kept in memory.
+
+It has the major disadvantage of a huge performance overhead: switching
+to keep the data base in memory only speeds up live update of xenstored
+with 120000 nodes from 20 minutes to 11 seconds. A complete tree walk
+of this configuration will be reduced from 7 seconds to 280 msecs
+(measured by "xenstore-control check").
+
+So make the internal memory data base the default and enhance the
+"--internal-db" command line parameter to take an optional parameter
+allowing to switch the internal data base back to the file based one.
+
+This is part of XSA-419.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit d174fefa90487ddd25ebc618028f67b2e8a1f795)
+---
+ tools/helpers/init-xenstore-domain.c | 4 ++--
+ tools/xenstore/xenstored_core.c | 13 ++++++++-----
+ 2 files changed, 10 insertions(+), 7 deletions(-)
+
+diff --git a/tools/helpers/init-xenstore-domain.c b/tools/helpers/init-xenstore-domain.c
+index 32689abd7479..d080dae5d3b8 100644
+--- a/tools/helpers/init-xenstore-domain.c
++++ b/tools/helpers/init-xenstore-domain.c
+@@ -214,9 +214,9 @@ static int build(xc_interface *xch)
+ }
+
+ if ( param )
+- snprintf(cmdline, 512, "--event %d --internal-db %s", rv, param);
++ snprintf(cmdline, 512, "--event %d %s", rv, param);
+ else
+- snprintf(cmdline, 512, "--event %d --internal-db", rv);
++ snprintf(cmdline, 512, "--event %d", rv);
+
+ dom->cmdline = xc_dom_strdup(dom, cmdline);
+ dom->xenstore_domid = domid;
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index d5b2e59b0db6..9ddbd934f794 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -2230,7 +2230,7 @@ static void accept_connection(int sock)
+ }
+ #endif
+
+-static int tdb_flags;
++static int tdb_flags = TDB_INTERNAL | TDB_NOLOCK;
+
+ /* We create initial nodes manually. */
+ static void manual_node(const char *name, const char *child)
+@@ -2537,7 +2537,8 @@ static void usage(void)
+ " watch-event: time a watch-event is kept pending\n"
+ " -R, --no-recovery to request that no recovery should be attempted when\n"
+ " the store is corrupted (debug only),\n"
+-" -I, --internal-db store database in memory, not on disk\n"
++" -I, --internal-db [on|off] store database in memory, not on disk, default is\n"
++" memory, with \"--internal-db off\" it is on disk\n"
+ " -K, --keep-orphans don't delete nodes owned by a domain when the\n"
+ " domain is deleted (this is a security risk!)\n"
+ " -V, --verbose to request verbose execution.\n");
+@@ -2563,7 +2564,7 @@ static struct option options[] = {
+ { "quota-soft", 1, NULL, 'q' },
+ { "timeout", 1, NULL, 'w' },
+ { "no-recovery", 0, NULL, 'R' },
+- { "internal-db", 0, NULL, 'I' },
++ { "internal-db", 2, NULL, 'I' },
+ { "keep-orphans", 0, NULL, 'K' },
+ { "verbose", 0, NULL, 'V' },
+ { "watch-nb", 1, NULL, 'W' },
+@@ -2645,7 +2646,8 @@ int main(int argc, char *argv[])
+ orig_argc = argc;
+ orig_argv = argv;
+
+- while ((opt = getopt_long(argc, argv, "DE:F:HKNPS:t:A:M:Q:q:T:RVW:w:U",
++ while ((opt = getopt_long(argc, argv,
++ "DE:F:HI::KNPS:t:A:M:Q:q:T:RVW:w:U",
+ options, NULL)) != -1) {
+ switch (opt) {
+ case 'D':
+@@ -2679,7 +2681,8 @@ int main(int argc, char *argv[])
+ tracefile = optarg;
+ break;
+ case 'I':
+- tdb_flags = TDB_INTERNAL|TDB_NOLOCK;
++ if (optarg && !strcmp(optarg, "off"))
++ tdb_flags = 0;
+ break;
+ case 'K':
+ keep_orphans = true;
+--
+2.37.4
+
diff --git a/0120-docs-enhance-xenstore.txt-with-permissions-descripti.patch b/0120-docs-enhance-xenstore.txt-with-permissions-descripti.patch
new file mode 100644
index 0000000..e0d7d9e
--- /dev/null
+++ b/0120-docs-enhance-xenstore.txt-with-permissions-descripti.patch
@@ -0,0 +1,51 @@
+From bc3921135cf8590d0f587f460be431922183c4c4 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:13 +0200
+Subject: [PATCH 120/126] docs: enhance xenstore.txt with permissions
+ description
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The permission scheme of Xenstore nodes is not really covered by
+docs/misc/xenstore.txt, other than referring to the Xen wiki.
+
+Add a paragraph explaining the permissions of nodes, and especially
+mentioning removal of nodes when a domain has been removed from
+Xenstore.
+
+This is part of XSA-419.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit d084d2c6dff7044956ebdf83a259ad6081a1d921)
+---
+ docs/misc/xenstore.txt | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/docs/misc/xenstore.txt b/docs/misc/xenstore.txt
+index a7d006519ae8..eccd596ee38c 100644
+--- a/docs/misc/xenstore.txt
++++ b/docs/misc/xenstore.txt
+@@ -43,6 +43,17 @@ bytes are forbidden; clients specifying relative paths should keep
+ them to within 2048 bytes. (See XENSTORE_*_PATH_MAX in xs_wire.h.)
+
+
++Each node has one or multiple permission entries. Permissions are
++granted by domain-id, the first permission entry of each node specifies
++the owner of the node. Permissions of a node can be changed by the
++owner of the node, the owner can only be modified by the control
++domain (usually domain id 0). The owner always has the right to read
++and write the node, while other permissions can be setup to allow
++read and/or write access. When a domain is being removed from Xenstore
++nodes owned by that domain will be removed together with all of those
++nodes' children.
++
++
+ Communication with xenstore is via either sockets, or event channel
+ and shared memory, as specified in io/xs_wire.h: each message in
+ either direction is a header formatted as a struct xsd_sockmsg
+--
+2.37.4
+
diff --git a/0121-tools-ocaml-xenstored-Fix-quota-bypass-on-domain-shu.patch b/0121-tools-ocaml-xenstored-Fix-quota-bypass-on-domain-shu.patch
new file mode 100644
index 0000000..722700e
--- /dev/null
+++ b/0121-tools-ocaml-xenstored-Fix-quota-bypass-on-domain-shu.patch
@@ -0,0 +1,93 @@
+From b9ede0950b3a6526d5ccea074841f093e0580948 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 12 Oct 2022 19:13:06 +0100
+Subject: [PATCH 121/126] tools/ocaml/xenstored: Fix quota bypass on domain
+ shutdown
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+XSA-322 fixed a domid reuse vulnerability by assigning Dom0 as the owner of
+any nodes left after a domain is shutdown (e.g. outside its /local/domain/N
+tree).
+
+However Dom0 has no quota on purpose, so this opened up another potential
+attack vector. Avoid it by deleting these nodes instead of assigning them to
+Dom0.
+
+This is part of XSA-419 / CVE-2022-42323.
+
+Fixes: c46eff921209 ("tools/ocaml/xenstored: clean up permissions for dead domains")
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit db471408edd46af403b8bd44d180a928ad7fbb80)
+---
+ tools/ocaml/xenstored/perms.ml | 3 +--
+ tools/ocaml/xenstored/store.ml | 29 +++++++++++++++++++++--------
+ 2 files changed, 22 insertions(+), 10 deletions(-)
+
+diff --git a/tools/ocaml/xenstored/perms.ml b/tools/ocaml/xenstored/perms.ml
+index e8a16221f8fa..84f2503e8e29 100644
+--- a/tools/ocaml/xenstored/perms.ml
++++ b/tools/ocaml/xenstored/perms.ml
+@@ -64,8 +64,7 @@ let get_owner perm = perm.owner
+ * *)
+ let remove_domid ~domid perm =
+ let acl = List.filter (fun (acl_domid, _) -> acl_domid <> domid) perm.acl in
+- let owner = if perm.owner = domid then 0 else perm.owner in
+- { perm with acl; owner }
++ if perm.owner = domid then None else Some { perm with acl; owner = perm.owner }
+
+ let default0 = create 0 NONE []
+
+diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml
+index 20e67b142746..70f0c83de404 100644
+--- a/tools/ocaml/xenstored/store.ml
++++ b/tools/ocaml/xenstored/store.ml
+@@ -87,10 +87,21 @@ let check_owner node connection =
+
+ let rec recurse fct node = fct node; SymbolMap.iter (fun _ -> recurse fct) node.children
+
+-(** [recurse_map f tree] applies [f] on each node in the tree recursively *)
+-let recurse_map f =
++(** [recurse_filter_map f tree] applies [f] on each node in the tree recursively,
++ possibly removing some nodes.
++ Note that the nodes removed this way won't generate watch events.
++*)
++let recurse_filter_map f =
++ let invalid = -1 in
++ let is_valid _ node = node.perms.owner <> invalid in
+ let rec walk node =
+- f { node with children = SymbolMap.map walk node.children }
++ (* Map.filter_map is Ocaml 4.11+ only *)
++ let node =
++ { node with children =
++ SymbolMap.map walk node.children |> SymbolMap.filter is_valid } in
++ match f node with
++ | Some keep -> keep
++ | None -> { node with perms = {node.perms with owner = invalid } }
+ in
+ walk
+
+@@ -444,11 +455,13 @@ let setperms store perm path nperms =
+
+ let reset_permissions store domid =
+ Logging.info "store|node" "Cleaning up xenstore ACLs for domid %d" domid;
+- store.root <- Node.recurse_map (fun node ->
+- let perms = Perms.Node.remove_domid ~domid node.perms in
+- if perms <> node.perms then
+- Logging.debug "store|node" "Changed permissions for node %s" (Node.get_name node);
+- { node with perms }
++ store.root <- Node.recurse_filter_map (fun node ->
++ match Perms.Node.remove_domid ~domid node.perms with
++ | None -> None
++ | Some perms ->
++ if perms <> node.perms then
++ Logging.debug "store|node" "Changed permissions for node %s" (Node.get_name node);
++ Some { node with perms }
+ ) store.root
+
+ type ops = {
+--
+2.37.4
+
diff --git a/0122-tools-ocaml-Ensure-packet-size-is-never-negative.patch b/0122-tools-ocaml-Ensure-packet-size-is-never-negative.patch
new file mode 100644
index 0000000..35a14f1
--- /dev/null
+++ b/0122-tools-ocaml-Ensure-packet-size-is-never-negative.patch
@@ -0,0 +1,75 @@
+From d3649d33e1eae49d3925ef34a7ccf39cae8852e6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 12 Oct 2022 19:13:05 +0100
+Subject: [PATCH 122/126] tools/ocaml: Ensure packet size is never negative
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Integers in Ocaml have 63 or 31 bits of signed precision.
+
+On 64-bit builds of Ocaml, this is fine because a C uint32_t always fits
+within a 63-bit signed integer.
+
+In 32-bit builds of Ocaml, this goes wrong. The C uint32_t is truncated
+first (loses the top bit), then has a unsigned/signed mismatch.
+
+A "negative" value (i.e. a packet on the ring of between 1G and 2G in size)
+will trigger an exception later in Bytes.make in xb.ml, and because the packet
+is not removed from the ring, the exception re-triggers on every subsequent
+query, creating a livelock.
+
+Fix both the source of the exception in Xb, and as defence in depth, mark the
+domain as bad for any Invalid_argument exceptions to avoid the risk of
+livelock.
+
+This is XSA-420 / CVE-2022-42324.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Acked-by: Christian Lindig <christian.lindig@citrix.com>
+(cherry picked from commit ae34df4d82636f4c82700b447ea2c93b9f82b3f3)
+---
+ tools/ocaml/libs/xb/partial.ml | 6 +++---
+ tools/ocaml/xenstored/process.ml | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tools/ocaml/libs/xb/partial.ml b/tools/ocaml/libs/xb/partial.ml
+index b6e2a716e263..3aa8927eb7f0 100644
+--- a/tools/ocaml/libs/xb/partial.ml
++++ b/tools/ocaml/libs/xb/partial.ml
+@@ -36,7 +36,7 @@ let of_string s =
+ This will leave the guest connection is a bad state and will
+ be hard to recover from without restarting the connection
+ (ie rebooting the guest) *)
+- let dlen = min xenstore_payload_max dlen in
++ let dlen = max 0 (min xenstore_payload_max dlen) in
+ {
+ tid = tid;
+ rid = rid;
+@@ -46,8 +46,8 @@ let of_string s =
+ }
+
+ let append pkt s sz =
+- if pkt.len > 4096 then failwith "Buffer.add: cannot grow buffer";
+- Buffer.add_string pkt.buf (String.sub s 0 sz)
++ if Buffer.length pkt.buf + sz > xenstore_payload_max then failwith "Buffer.add: cannot grow buffer";
++ Buffer.add_substring pkt.buf s 0 sz
+
+ let to_complete pkt =
+ pkt.len - (Buffer.length pkt.buf)
+diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
+index ce39ce28b5f3..6cb990ee7fb2 100644
+--- a/tools/ocaml/xenstored/process.ml
++++ b/tools/ocaml/xenstored/process.ml
+@@ -722,7 +722,7 @@ let do_input store cons doms con =
+ History.reconnect con;
+ info "%s reconnection complete" (Connection.get_domstr con);
+ None
+- | Failure exp ->
++ | Invalid_argument exp | Failure exp ->
+ error "caught exception %s" exp;
+ error "got a bad client %s" (sprintf "%-8s" (Connection.get_domstr con));
+ Connection.mark_as_bad con;
+--
+2.37.4
+
diff --git a/0123-tools-xenstore-fix-deleting-node-in-transaction.patch b/0123-tools-xenstore-fix-deleting-node-in-transaction.patch
new file mode 100644
index 0000000..efa7178
--- /dev/null
+++ b/0123-tools-xenstore-fix-deleting-node-in-transaction.patch
@@ -0,0 +1,46 @@
+From 2d3476effe3a9236867562f14dc26979a6527080 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:13 +0200
+Subject: [PATCH 123/126] tools/xenstore: fix deleting node in transaction
+
+In case a node has been created in a transaction and it is later
+deleted in the same transaction, the transaction will be terminated
+with an error.
+
+As this error is encountered only when handling the deleted node at
+transaction finalization, the transaction will have been performed
+partially and without updating the accounting information. This will
+enable a malicious guest to create arbitrary number of nodes.
+
+This is part of XSA-421 / CVE-2022-42325.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Tested-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 13ac37f1416cae88d97f7baf6cf2a827edb9a187)
+---
+ tools/xenstore/xenstored_transaction.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index 3e3eb47326cc..7ffe21bb5285 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -418,7 +418,13 @@ static int finalize_transaction(struct connection *conn,
+ true);
+ talloc_free(data.dptr);
+ } else {
+- ret = do_tdb_delete(conn, &key, NULL);
++ /*
++ * A node having been created and later deleted
++ * in this transaction will have no generation
++ * information stored.
++ */
++ ret = (i->generation == NO_GENERATION)
++ ? 0 : do_tdb_delete(conn, &key, NULL);
+ }
+ if (ret)
+ goto err;
+--
+2.37.4
+
diff --git a/0124-tools-xenstore-harden-transaction-finalization-again.patch b/0124-tools-xenstore-harden-transaction-finalization-again.patch
new file mode 100644
index 0000000..8279aeb
--- /dev/null
+++ b/0124-tools-xenstore-harden-transaction-finalization-again.patch
@@ -0,0 +1,410 @@
+From e818f4f0dabf83a6138cd77d7464495fab7bfc16 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 13 Sep 2022 07:35:14 +0200
+Subject: [PATCH 124/126] tools/xenstore: harden transaction finalization
+ against errors
+
+When finalizing a transaction, any error occurring after checking for
+conflicts will result in the transaction being performed only
+partially today. Additionally accounting data will not be updated at
+the end of the transaction, which might result in further problems
+later.
+
+Avoid those problems by multiple modifications:
+
+- free any transaction specific nodes which don't need to be committed
+ as they haven't been written during the transaction as soon as their
+ generation count has been verified, this will reduce the risk of
+ out-of-memory situations
+
+- store the transaction specific node name in struct accessed_node in
+ order to avoid the need to allocate additional memory for it when
+ finalizing the transaction
+
+- don't stop the transaction finalization when hitting an error
+ condition, but try to continue to handle all modified nodes
+
+- in case of a detected error do the accounting update as needed and
+ call the data base checking only after that
+
+- if writing a node in a transaction is failing (e.g. due to a failed
+ quota check), fail the transaction, as prior changes to struct
+ accessed_node can't easily be undone in that case
+
+This is part of XSA-421 / CVE-2022-42326.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+Tested-by: Julien Grall <jgrall@amazon.com>
+(cherry picked from commit 2dd823ca7237e7fb90c890642d6a3b357a26fcff)
+---
+ tools/xenstore/xenstored_core.c | 16 ++-
+ tools/xenstore/xenstored_transaction.c | 171 +++++++++++--------------
+ tools/xenstore/xenstored_transaction.h | 4 +-
+ 3 files changed, 92 insertions(+), 99 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
+index 9ddbd934f794..3c008c8cd455 100644
+--- a/tools/xenstore/xenstored_core.c
++++ b/tools/xenstore/xenstored_core.c
+@@ -692,8 +692,7 @@ struct node *read_node(struct connection *conn, const void *ctx,
+ return NULL;
+ }
+
+- if (transaction_prepend(conn, name, &key))
+- return NULL;
++ transaction_prepend(conn, name, &key);
+
+ data = tdb_fetch(tdb_ctx, key);
+
+@@ -811,10 +810,21 @@ int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
+ static int write_node(struct connection *conn, struct node *node,
+ bool no_quota_check)
+ {
++ int ret;
++
+ if (access_node(conn, node, NODE_ACCESS_WRITE, &node->key))
+ return errno;
+
+- return write_node_raw(conn, &node->key, node, no_quota_check);
++ ret = write_node_raw(conn, &node->key, node, no_quota_check);
++ if (ret && conn && conn->transaction) {
++ /*
++ * Reverting access_node() is hard, so just fail the
++ * transaction.
++ */
++ fail_transaction(conn->transaction);
++ }
++
++ return ret;
+ }
+
+ enum xs_perm_type perm_for_conn(struct connection *conn,
+diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
+index 7ffe21bb5285..ac854197cadb 100644
+--- a/tools/xenstore/xenstored_transaction.c
++++ b/tools/xenstore/xenstored_transaction.c
+@@ -114,7 +114,8 @@ struct accessed_node
+ struct list_head list;
+
+ /* The name of the node. */
+- char *node;
++ char *trans_name; /* Transaction specific name. */
++ char *node; /* Main data base name. */
+
+ /* Generation count (or NO_GENERATION) for conflict checking. */
+ uint64_t generation;
+@@ -199,25 +200,20 @@ static char *transaction_get_node_name(void *ctx, struct transaction *trans,
+ * Prepend the transaction to name if node has been modified in the current
+ * transaction.
+ */
+-int transaction_prepend(struct connection *conn, const char *name,
+- TDB_DATA *key)
++void transaction_prepend(struct connection *conn, const char *name,
++ TDB_DATA *key)
+ {
+- char *tdb_name;
++ struct accessed_node *i;
+
+- if (!conn || !conn->transaction ||
+- !find_accessed_node(conn->transaction, name)) {
+- set_tdb_key(name, key);
+- return 0;
++ if (conn && conn->transaction) {
++ i = find_accessed_node(conn->transaction, name);
++ if (i) {
++ set_tdb_key(i->trans_name, key);
++ return;
++ }
+ }
+
+- tdb_name = transaction_get_node_name(conn->transaction,
+- conn->transaction, name);
+- if (!tdb_name)
+- return errno;
+-
+- set_tdb_key(tdb_name, key);
+-
+- return 0;
++ set_tdb_key(name, key);
+ }
+
+ /*
+@@ -240,7 +236,6 @@ int access_node(struct connection *conn, struct node *node,
+ struct accessed_node *i = NULL;
+ struct transaction *trans;
+ TDB_DATA local_key;
+- const char *trans_name = NULL;
+ int ret;
+ bool introduce = false;
+
+@@ -259,10 +254,6 @@ int access_node(struct connection *conn, struct node *node,
+
+ trans = conn->transaction;
+
+- trans_name = transaction_get_node_name(node, trans, node->name);
+- if (!trans_name)
+- goto nomem;
+-
+ i = find_accessed_node(trans, node->name);
+ if (!i) {
+ if (trans->nodes >= quota_trans_nodes &&
+@@ -273,9 +264,10 @@ int access_node(struct connection *conn, struct node *node,
+ i = talloc_zero(trans, struct accessed_node);
+ if (!i)
+ goto nomem;
+- i->node = talloc_strdup(i, node->name);
+- if (!i->node)
++ i->trans_name = transaction_get_node_name(i, trans, node->name);
++ if (!i->trans_name)
+ goto nomem;
++ i->node = strchr(i->trans_name, '/') + 1;
+ if (node->generation != NO_GENERATION && node->perms.num) {
+ i->perms.p = talloc_array(i, struct xs_permissions,
+ node->perms.num);
+@@ -302,7 +294,7 @@ int access_node(struct connection *conn, struct node *node,
+ i->generation = node->generation;
+ i->check_gen = true;
+ if (node->generation != NO_GENERATION) {
+- set_tdb_key(trans_name, &local_key);
++ set_tdb_key(i->trans_name, &local_key);
+ ret = write_node_raw(conn, &local_key, node, true);
+ if (ret)
+ goto err;
+@@ -321,7 +313,7 @@ int access_node(struct connection *conn, struct node *node,
+ return -1;
+
+ if (key) {
+- set_tdb_key(trans_name, key);
++ set_tdb_key(i->trans_name, key);
+ if (type == NODE_ACCESS_WRITE)
+ i->ta_node = true;
+ if (type == NODE_ACCESS_DELETE)
+@@ -333,7 +325,6 @@ int access_node(struct connection *conn, struct node *node,
+ nomem:
+ ret = ENOMEM;
+ err:
+- talloc_free((void *)trans_name);
+ talloc_free(i);
+ trans->fail = true;
+ errno = ret;
+@@ -371,100 +362,90 @@ void queue_watches(struct connection *conn, const char *name, bool watch_exact)
+ * base.
+ */
+ static int finalize_transaction(struct connection *conn,
+- struct transaction *trans)
++ struct transaction *trans, bool *is_corrupt)
+ {
+- struct accessed_node *i;
++ struct accessed_node *i, *n;
+ TDB_DATA key, ta_key, data;
+ struct xs_tdb_record_hdr *hdr;
+ uint64_t gen;
+- char *trans_name;
+- int ret;
+
+- list_for_each_entry(i, &trans->accessed, list) {
+- if (!i->check_gen)
+- continue;
++ list_for_each_entry_safe(i, n, &trans->accessed, list) {
++ if (i->check_gen) {
++ set_tdb_key(i->node, &key);
++ data = tdb_fetch(tdb_ctx, key);
++ hdr = (void *)data.dptr;
++ if (!data.dptr) {
++ if (tdb_error(tdb_ctx) != TDB_ERR_NOEXIST)
++ return EIO;
++ gen = NO_GENERATION;
++ } else
++ gen = hdr->generation;
++ talloc_free(data.dptr);
++ if (i->generation != gen)
++ return EAGAIN;
++ }
+
+- set_tdb_key(i->node, &key);
+- data = tdb_fetch(tdb_ctx, key);
+- hdr = (void *)data.dptr;
+- if (!data.dptr) {
+- if (tdb_error(tdb_ctx) != TDB_ERR_NOEXIST)
+- return EIO;
+- gen = NO_GENERATION;
+- } else
+- gen = hdr->generation;
+- talloc_free(data.dptr);
+- if (i->generation != gen)
+- return EAGAIN;
++ /* Entries for unmodified nodes can be removed early. */
++ if (!i->modified) {
++ if (i->ta_node) {
++ set_tdb_key(i->trans_name, &ta_key);
++ if (do_tdb_delete(conn, &ta_key, NULL))
++ return EIO;
++ }
++ list_del(&i->list);
++ talloc_free(i);
++ }
+ }
+
+ while ((i = list_top(&trans->accessed, struct accessed_node, list))) {
+- trans_name = transaction_get_node_name(i, trans, i->node);
+- if (!trans_name)
+- /* We are doomed: the transaction is only partial. */
+- goto err;
+-
+- set_tdb_key(trans_name, &ta_key);
+-
+- if (i->modified) {
+- set_tdb_key(i->node, &key);
+- if (i->ta_node) {
+- data = tdb_fetch(tdb_ctx, ta_key);
+- if (!data.dptr)
+- goto err;
++ set_tdb_key(i->node, &key);
++ if (i->ta_node) {
++ set_tdb_key(i->trans_name, &ta_key);
++ data = tdb_fetch(tdb_ctx, ta_key);
++ if (data.dptr) {
+ hdr = (void *)data.dptr;
+ hdr->generation = ++generation;
+- ret = do_tdb_write(conn, &key, &data, NULL,
+- true);
++ *is_corrupt |= do_tdb_write(conn, &key, &data,
++ NULL, true);
+ talloc_free(data.dptr);
++ if (do_tdb_delete(conn, &ta_key, NULL))
++ *is_corrupt = true;
+ } else {
+- /*
+- * A node having been created and later deleted
+- * in this transaction will have no generation
+- * information stored.
+- */
+- ret = (i->generation == NO_GENERATION)
+- ? 0 : do_tdb_delete(conn, &key, NULL);
+- }
+- if (ret)
+- goto err;
+- if (i->fire_watch) {
+- fire_watches(conn, trans, i->node, NULL,
+- i->watch_exact,
+- i->perms.p ? &i->perms : NULL);
++ *is_corrupt = true;
+ }
++ } else {
++ /*
++ * A node having been created and later deleted
++ * in this transaction will have no generation
++ * information stored.
++ */
++ *is_corrupt |= (i->generation == NO_GENERATION)
++ ? false
++ : do_tdb_delete(conn, &key, NULL);
+ }
++ if (i->fire_watch)
++ fire_watches(conn, trans, i->node, NULL, i->watch_exact,
++ i->perms.p ? &i->perms : NULL);
+
+- if (i->ta_node && do_tdb_delete(conn, &ta_key, NULL))
+- goto err;
+ list_del(&i->list);
+ talloc_free(i);
+ }
+
+ return 0;
+-
+-err:
+- corrupt(conn, "Partial transaction");
+- return EIO;
+ }
+
+ static int destroy_transaction(void *_transaction)
+ {
+ struct transaction *trans = _transaction;
+ struct accessed_node *i;
+- char *trans_name;
+ TDB_DATA key;
+
+ wrl_ntransactions--;
+ trace_destroy(trans, "transaction");
+ while ((i = list_top(&trans->accessed, struct accessed_node, list))) {
+ if (i->ta_node) {
+- trans_name = transaction_get_node_name(i, trans,
+- i->node);
+- if (trans_name) {
+- set_tdb_key(trans_name, &key);
+- do_tdb_delete(trans->conn, &key, NULL);
+- }
++ set_tdb_key(i->trans_name, &key);
++ do_tdb_delete(trans->conn, &key, NULL);
+ }
+ list_del(&i->list);
+ talloc_free(i);
+@@ -556,6 +537,7 @@ int do_transaction_end(const void *ctx, struct connection *conn,
+ {
+ const char *arg = onearg(in);
+ struct transaction *trans;
++ bool is_corrupt = false;
+ int ret;
+
+ if (!arg || (!streq(arg, "T") && !streq(arg, "F")))
+@@ -579,13 +561,17 @@ int do_transaction_end(const void *ctx, struct connection *conn,
+ ret = transaction_fix_domains(trans, false);
+ if (ret)
+ return ret;
+- if (finalize_transaction(conn, trans))
+- return EAGAIN;
++ ret = finalize_transaction(conn, trans, &is_corrupt);
++ if (ret)
++ return ret;
+
+ wrl_apply_debit_trans_commit(conn);
+
+ /* fix domain entry for each changed domain */
+ transaction_fix_domains(trans, true);
++
++ if (is_corrupt)
++ corrupt(conn, "transaction inconsistency");
+ }
+ send_ack(conn, XS_TRANSACTION_END);
+
+@@ -660,7 +646,7 @@ int check_transactions(struct hashtable *hash)
+ struct connection *conn;
+ struct transaction *trans;
+ struct accessed_node *i;
+- char *tname, *tnode;
++ char *tname;
+
+ list_for_each_entry(conn, &connections, list) {
+ list_for_each_entry(trans, &conn->transaction_list, list) {
+@@ -672,11 +658,8 @@ int check_transactions(struct hashtable *hash)
+ list_for_each_entry(i, &trans->accessed, list) {
+ if (!i->ta_node)
+ continue;
+- tnode = transaction_get_node_name(tname, trans,
+- i->node);
+- if (!tnode || !remember_string(hash, tnode))
++ if (!remember_string(hash, i->trans_name))
+ goto nomem;
+- talloc_free(tnode);
+ }
+
+ talloc_free(tname);
+diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h
+index 39d7f81c5127..3417303f9427 100644
+--- a/tools/xenstore/xenstored_transaction.h
++++ b/tools/xenstore/xenstored_transaction.h
+@@ -48,8 +48,8 @@ int __must_check access_node(struct connection *conn, struct node *node,
+ void queue_watches(struct connection *conn, const char *name, bool watch_exact);
+
+ /* Prepend the transaction to name if appropriate. */
+-int transaction_prepend(struct connection *conn, const char *name,
+- TDB_DATA *key);
++void transaction_prepend(struct connection *conn, const char *name,
++ TDB_DATA *key);
+
+ /* Mark the transaction as failed. This will prevent it to be committed. */
+ void fail_transaction(struct transaction *trans);
+--
+2.37.4
+
diff --git a/0125-x86-spec-ctrl-Enumeration-for-IBPB_RET.patch b/0125-x86-spec-ctrl-Enumeration-for-IBPB_RET.patch
new file mode 100644
index 0000000..f1667ac
--- /dev/null
+++ b/0125-x86-spec-ctrl-Enumeration-for-IBPB_RET.patch
@@ -0,0 +1,82 @@
+From 07be0fe497349ed423c5201bdc410b6281ebf04f Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 14 Jun 2022 16:18:36 +0100
+Subject: [PATCH 125/126] x86/spec-ctrl: Enumeration for IBPB_RET
+
+The IBPB_RET bit indicates that the CPU's implementation of MSR_PRED_CMD.IBPB
+does flush the RSB/RAS too.
+
+This is part of XSA-422 / CVE-2022-23824.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 24496558e650535bdbd22cc04731e82276cd1b3f)
+---
+ tools/libs/light/libxl_cpuid.c | 1 +
+ tools/misc/xen-cpuid.c | 1 +
+ xen/arch/x86/spec_ctrl.c | 5 +++--
+ xen/include/public/arch-x86/cpufeatureset.h | 1 +
+ 4 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
+index 2632efc6adb0..4cc2f211b878 100644
+--- a/tools/libs/light/libxl_cpuid.c
++++ b/tools/libs/light/libxl_cpuid.c
+@@ -284,6 +284,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
+ {"ssb-no", 0x80000008, NA, CPUID_REG_EBX, 26, 1},
+ {"psfd", 0x80000008, NA, CPUID_REG_EBX, 28, 1},
+ {"btc-no", 0x80000008, NA, CPUID_REG_EBX, 29, 1},
++ {"ibpb-ret", 0x80000008, NA, CPUID_REG_EBX, 30, 1},
+
+ {"nc", 0x80000008, NA, CPUID_REG_ECX, 0, 8},
+ {"apicidsize", 0x80000008, NA, CPUID_REG_ECX, 12, 4},
+diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
+index e83bc4793d6e..5c944c24fe36 100644
+--- a/tools/misc/xen-cpuid.c
++++ b/tools/misc/xen-cpuid.c
+@@ -158,6 +158,7 @@ static const char *const str_e8b[32] =
+ [24] = "amd-ssbd", [25] = "virt-ssbd",
+ [26] = "ssb-no",
+ [28] = "psfd", [29] = "btc-no",
++ [30] = "ibpb-ret",
+ };
+
+ static const char *const str_7d0[32] =
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 3ff602bd0281..459c64d139b6 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -419,7 +419,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+ * Hardware read-only information, stating immunity to certain issues, or
+ * suggestions of which mitigation to use.
+ */
+- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
++ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "",
+ (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "",
+ (caps & ARCH_CAPS_RSBA) ? " RSBA" : "",
+@@ -436,7 +436,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+ (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : "",
+- (e8b & cpufeat_mask(X86_FEATURE_BTC_NO)) ? " BTC_NO" : "");
++ (e8b & cpufeat_mask(X86_FEATURE_BTC_NO)) ? " BTC_NO" : "",
++ (e8b & cpufeat_mask(X86_FEATURE_IBPB_RET)) ? " IBPB_RET" : "");
+
+ /* Hardware features which need driving to mitigate issues. */
+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n",
+diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
+index 1bbc7da4b53c..41a358d575d3 100644
+--- a/xen/include/public/arch-x86/cpufeatureset.h
++++ b/xen/include/public/arch-x86/cpufeatureset.h
+@@ -266,6 +266,7 @@ XEN_CPUFEATURE(VIRT_SSBD, 8*32+25) /* MSR_VIRT_SPEC_CTRL.SSBD */
+ XEN_CPUFEATURE(SSB_NO, 8*32+26) /*A Hardware not vulnerable to SSB */
+ XEN_CPUFEATURE(PSFD, 8*32+28) /*S MSR_SPEC_CTRL.PSFD */
+ XEN_CPUFEATURE(BTC_NO, 8*32+29) /*A Hardware not vulnerable to Branch Type Confusion */
++XEN_CPUFEATURE(IBPB_RET, 8*32+30) /*A IBPB clears RSB/RAS too. */
+
+ /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
+ XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */
+--
+2.37.4
+
diff --git a/0126-x86-spec-ctrl-Mitigate-IBPB-not-flushing-the-RSB-RAS.patch b/0126-x86-spec-ctrl-Mitigate-IBPB-not-flushing-the-RSB-RAS.patch
new file mode 100644
index 0000000..2abb0f2
--- /dev/null
+++ b/0126-x86-spec-ctrl-Mitigate-IBPB-not-flushing-the-RSB-RAS.patch
@@ -0,0 +1,113 @@
+From 32445f23fea6a533fc1d7ade5871246d75210bf1 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 14 Jun 2022 16:18:36 +0100
+Subject: [PATCH 126/126] x86/spec-ctrl: Mitigate IBPB not flushing the RSB/RAS
+
+Introduce spec_ctrl_new_guest_context() to encapsulate all logic pertaining to
+using MSR_PRED_CMD for a new guest context, even if it only has one user
+presently.
+
+Introduce X86_BUG_IBPB_NO_RET, and use it extend spec_ctrl_new_guest_context()
+with a manual fixup for hardware which mis-implements IBPB.
+
+This is part of XSA-422 / CVE-2022-23824.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 2b27967fb89d7904a1571a2fb963b1c9cac548db)
+---
+ xen/arch/x86/asm-macros.c | 1 +
+ xen/arch/x86/domain.c | 2 +-
+ xen/arch/x86/spec_ctrl.c | 8 ++++++++
+ xen/include/asm-x86/cpufeatures.h | 1 +
+ xen/include/asm-x86/spec_ctrl.h | 22 ++++++++++++++++++++++
+ 5 files changed, 33 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/asm-macros.c b/xen/arch/x86/asm-macros.c
+index 7e536b0d82f5..891d86c7655c 100644
+--- a/xen/arch/x86/asm-macros.c
++++ b/xen/arch/x86/asm-macros.c
+@@ -1,2 +1,3 @@
+ #include <asm/asm-defns.h>
+ #include <asm/alternative-asm.h>
++#include <asm/spec_ctrl_asm.h>
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index e9b8ed4c96c2..b82e18dd62d8 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -2069,7 +2069,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
+ */
+ if ( *last_id != next_id )
+ {
+- wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB);
++ spec_ctrl_new_guest_context();
+ *last_id = next_id;
+ }
+ }
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 459c64d139b6..5636853aae6b 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -775,6 +775,14 @@ static void __init ibpb_calculations(void)
+ return;
+ }
+
++ /*
++ * AMD/Hygon CPUs to date (June 2022) don't flush the the RAS. Future
++ * CPUs are expected to enumerate IBPB_RET when this has been fixed.
++ * Until then, cover the difference with the software sequence.
++ */
++ if ( boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_IBPB_RET) )
++ setup_force_cpu_cap(X86_BUG_IBPB_NO_RET);
++
+ /*
+ * IBPB-on-entry mitigations for Branch Type Confusion.
+ *
+diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
+index b233e5835fb5..bdb119a34c5d 100644
+--- a/xen/include/asm-x86/cpufeatures.h
++++ b/xen/include/asm-x86/cpufeatures.h
+@@ -48,6 +48,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for
+
+ #define X86_BUG_FPU_PTRS X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */
+ #define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */
++#define X86_BUG_IBPB_NO_RET X86_BUG( 3) /* IBPB doesn't flush the RSB/RAS */
+
+ /* Total number of capability words, inc synth and bug words. */
+ #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */
+diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
+index 33e845991b0a..e400ff227391 100644
+--- a/xen/include/asm-x86/spec_ctrl.h
++++ b/xen/include/asm-x86/spec_ctrl.h
+@@ -65,6 +65,28 @@
+ void init_speculation_mitigations(void);
+ void spec_ctrl_init_domain(struct domain *d);
+
++/*
++ * Switch to a new guest prediction context.
++ *
++ * This flushes all indirect branch predictors (BTB, RSB/RAS), so guest code
++ * which has previously run on this CPU can't attack subsequent guest code.
++ *
++ * As this flushes the RSB/RAS, it destroys the predictions of the calling
++ * context. For best performace, arrange for this to be used when we're going
++ * to jump out of the current context, e.g. with reset_stack_and_jump().
++ *
++ * For hardware which mis-implements IBPB, fix up by flushing the RSB/RAS
++ * manually.
++ */
++static always_inline void spec_ctrl_new_guest_context(void)
++{
++ wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB);
++
++ /* (ab)use alternative_input() to specify clobbers. */
++ alternative_input("", "DO_OVERWRITE_RSB", X86_BUG_IBPB_NO_RET,
++ : "rax", "rcx");
++}
++
+ extern int8_t opt_ibpb_ctxt_switch;
+ extern bool opt_ssbd;
+ extern int8_t opt_eager_fpu;
+--
+2.37.4
+
diff --git a/info.txt b/info.txt
index a677aa4..6f3a715 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #1 for 4.15.4-pre
+Xen upstream patchset #2 for 4.15.4-pre
Containing patches from
RELEASE-4.15.3 (feecaf4abf733e83b7a297190819eca7a7f65168)
to
-staging-4.15 (816580afdd1730d4f85f64477a242a439af1cdf8)
+staging-4.15 (32445f23fea6a533fc1d7ade5871246d75210bf1)