|
27 | 27 | #include <linux/swapops.h>
|
28 | 28 | #include <linux/jhash.h>
|
29 | 29 | #include <linux/numa.h>
|
| 30 | +#include <linux/llist.h> |
30 | 31 |
|
31 | 32 | #include <asm/page.h>
|
32 | 33 | #include <asm/pgtable.h>
|
@@ -1136,7 +1137,7 @@ static inline void ClearPageHugeTemporary(struct page *page)
|
1136 | 1137 | page[2].mapping = NULL;
|
1137 | 1138 | }
|
1138 | 1139 |
|
1139 |
| -void free_huge_page(struct page *page) |
| 1140 | +static void __free_huge_page(struct page *page) |
1140 | 1141 | {
|
1141 | 1142 | /*
|
1142 | 1143 | * Can't pass hstate in here because it is called from the
|
@@ -1199,6 +1200,54 @@ void free_huge_page(struct page *page)
|
1199 | 1200 | spin_unlock(&hugetlb_lock);
|
1200 | 1201 | }
|
1201 | 1202 |
|
| 1203 | +/* |
| 1204 | + * As free_huge_page() can be called from a non-task context, we have |
| 1205 | + * to defer the actual freeing in a workqueue to prevent potential |
| 1206 | + * hugetlb_lock deadlock. |
| 1207 | + * |
| 1208 | + * free_hpage_workfn() locklessly retrieves the linked list of pages to |
| 1209 | + * be freed and frees them one-by-one. As the page->mapping pointer is |
| 1210 | + * going to be cleared in __free_huge_page() anyway, it is reused as the |
| 1211 | + * llist_node structure of a lockless linked list of huge pages to be freed. |
| 1212 | + */ |
| 1213 | +static LLIST_HEAD(hpage_freelist); |
| 1214 | + |
| 1215 | +static void free_hpage_workfn(struct work_struct *work) |
| 1216 | +{ |
| 1217 | + struct llist_node *node; |
| 1218 | + struct page *page; |
| 1219 | + |
| 1220 | + node = llist_del_all(&hpage_freelist); |
| 1221 | + |
| 1222 | + while (node) { |
| 1223 | + page = container_of((struct address_space **)node, |
| 1224 | + struct page, mapping); |
| 1225 | + node = node->next; |
| 1226 | + __free_huge_page(page); |
| 1227 | + } |
| 1228 | +} |
| 1229 | +static DECLARE_WORK(free_hpage_work, free_hpage_workfn); |
| 1230 | + |
| 1231 | +void free_huge_page(struct page *page) |
| 1232 | +{ |
| 1233 | + /* |
| 1234 | + * Defer freeing if in non-task context to avoid hugetlb_lock deadlock. |
| 1235 | + */ |
| 1236 | + if (!in_task()) { |
| 1237 | + /* |
| 1238 | + * Only call schedule_work() if hpage_freelist is previously |
| 1239 | + * empty. Otherwise, schedule_work() had been called but the |
| 1240 | + * workfn hasn't retrieved the list yet. |
| 1241 | + */ |
| 1242 | + if (llist_add((struct llist_node *)&page->mapping, |
| 1243 | + &hpage_freelist)) |
| 1244 | + schedule_work(&free_hpage_work); |
| 1245 | + return; |
| 1246 | + } |
| 1247 | + |
| 1248 | + __free_huge_page(page); |
| 1249 | +} |
| 1250 | + |
1202 | 1251 | static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
|
1203 | 1252 | {
|
1204 | 1253 | INIT_LIST_HEAD(&page->lru);
|
|
0 commit comments