|
| 1 | +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Daniel Stodden < [email protected]> |
| 3 | +Date: Sun, 22 Dec 2024 19:39:08 -0800 |
| 4 | +Subject: [PATCH] PCI/ASPM: Fix link state exit during switch upstream function |
| 5 | + removal |
| 6 | +MIME-Version: 1.0 |
| 7 | +Content-Type: text/plain; charset=UTF-8 |
| 8 | +Content-Transfer-Encoding: 8bit |
| 9 | + |
| 10 | +Before 456d8aa37d0f ("PCI/ASPM: Disable ASPM on MFD function removal to |
| 11 | +avoid use-after-free"), we would free the ASPM link only after the last |
| 12 | +function on the bus pertaining to the given link was removed. |
| 13 | + |
| 14 | +That was too late. If function 0 is removed before sibling function, |
| 15 | +link->downstream would point to free'd memory after. |
| 16 | + |
| 17 | +After above change, we freed the ASPM parent link state upon any function |
| 18 | +removal on the bus pertaining to a given link. |
| 19 | + |
| 20 | +That is too early. If the link is to a PCIe switch with MFD on the upstream |
| 21 | +port, then removing functions other than 0 first would free a link which |
| 22 | +still remains parent_link to the remaining downstream ports. |
| 23 | + |
| 24 | +The resulting GPFs are especially frequent during hot-unplug, because |
| 25 | +pciehp removes devices on the link bus in reverse order. |
| 26 | + |
| 27 | +On that switch, function 0 is the virtual P2P bridge to the internal bus. |
| 28 | +Free exactly when function 0 is removed -- before the parent link is |
| 29 | +obsolete, but after all subordinate links are gone. |
| 30 | + |
| 31 | +Link: https://lore.kernel.org/r/e12898835f25234561c9d7de4435590d957b85d9.1734924854.git.dns@arista.com |
| 32 | +Fixes: 456d8aa37d0f ("PCI/ASPM: Disable ASPM on MFD function removal to avoid use-after-free") |
| 33 | +Signed-off-by: Daniel Stodden < [email protected]> |
| 34 | +Signed-off-by: Bjorn Helgaas < [email protected]> |
| 35 | +[kwilczynski: commit log] |
| 36 | +Signed-off-by: Krzysztof Wilczyński < [email protected]> |
| 37 | +--- |
| 38 | + drivers/pci/pcie/aspm.c | 17 +++++++++-------- |
| 39 | + 1 file changed, 9 insertions(+), 8 deletions(-) |
| 40 | + |
| 41 | +diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c |
| 42 | +index cf4acea6610d..20ec2a4aae65 100644 |
| 43 | +--- a/drivers/pci/pcie/aspm.c |
| 44 | ++++ b/drivers/pci/pcie/aspm.c |
| 45 | +@@ -1031,16 +1031,16 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) |
| 46 | + parent_link = link->parent; |
| 47 | + |
| 48 | + /* |
| 49 | +- * link->downstream is a pointer to the pci_dev of function 0. If |
| 50 | +- * we remove that function, the pci_dev is about to be deallocated, |
| 51 | +- * so we can't use link->downstream again. Free the link state to |
| 52 | +- * avoid this. |
| 53 | ++ * Free the parent link state, no later than function 0 (i.e. |
| 54 | ++ * link->downstream) being removed. |
| 55 | + * |
| 56 | +- * If we're removing a non-0 function, it's possible we could |
| 57 | +- * retain the link state, but PCIe r6.0, sec 7.5.3.7, recommends |
| 58 | +- * programming the same ASPM Control value for all functions of |
| 59 | +- * multi-function devices, so disable ASPM for all of them. |
| 60 | ++ * Do not free the link state any earlier. If function 0 is a |
| 61 | ++ * switch upstream port, this link state is parent_link to all |
| 62 | ++ * subordinate ones. |
| 63 | + */ |
| 64 | ++ if (pdev != link->downstream) |
| 65 | ++ goto out; |
| 66 | ++ |
| 67 | + pcie_config_aspm_link(link, 0); |
| 68 | + list_del(&link->sibling); |
| 69 | + free_link_state(link); |
| 70 | +@@ -1051,6 +1051,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) |
| 71 | + pcie_config_aspm_path(parent_link); |
| 72 | + } |
| 73 | + |
| 74 | ++ out: |
| 75 | + mutex_unlock(&aspm_lock); |
| 76 | + up_read(&pci_bus_sem); |
| 77 | + } |
0 commit comments