Root/
Source at commit b13e7eb172b6f08e5fc22da162bdde5fcde201b5 created 11 years 11 months ago. By Maarten ter Huurne, fbcon: Add 6x10 font | |
---|---|
1 | /* |
2 | * Memory Migration functionality - linux/mm/migration.c |
3 | * |
4 | * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter |
5 | * |
6 | * Page migration was first developed in the context of the memory hotplug |
7 | * project. The main authors of the migration code are: |
8 | * |
9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> |
10 | * Hirokazu Takahashi <taka@valinux.co.jp> |
11 | * Dave Hansen <haveblue@us.ibm.com> |
12 | * Christoph Lameter |
13 | */ |
14 | |
15 | #include <linux/migrate.h> |
16 | #include <linux/export.h> |
17 | #include <linux/swap.h> |
18 | #include <linux/swapops.h> |
19 | #include <linux/pagemap.h> |
20 | #include <linux/buffer_head.h> |
21 | #include <linux/mm_inline.h> |
22 | #include <linux/nsproxy.h> |
23 | #include <linux/pagevec.h> |
24 | #include <linux/ksm.h> |
25 | #include <linux/rmap.h> |
26 | #include <linux/topology.h> |
27 | #include <linux/cpu.h> |
28 | #include <linux/cpuset.h> |
29 | #include <linux/writeback.h> |
30 | #include <linux/mempolicy.h> |
31 | #include <linux/vmalloc.h> |
32 | #include <linux/security.h> |
33 | #include <linux/memcontrol.h> |
34 | #include <linux/syscalls.h> |
35 | #include <linux/hugetlb.h> |
36 | #include <linux/gfp.h> |
37 | |
38 | #include <asm/tlbflush.h> |
39 | |
40 | #include "internal.h" |
41 | |
42 | /* |
43 | * migrate_prep() needs to be called before we start compiling a list of pages |
44 | * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is |
45 | * undesirable, use migrate_prep_local() |
46 | */ |
47 | int migrate_prep(void) |
48 | { |
49 | /* |
50 | * Clear the LRU lists so pages can be isolated. |
51 | * Note that pages may be moved off the LRU after we have |
52 | * drained them. Those pages will fail to migrate like other |
53 | * pages that may be busy. |
54 | */ |
55 | lru_add_drain_all(); |
56 | |
57 | return 0; |
58 | } |
59 | |
60 | /* Do the necessary work of migrate_prep but not if it involves other CPUs */ |
61 | int migrate_prep_local(void) |
62 | { |
63 | lru_add_drain(); |
64 | |
65 | return 0; |
66 | } |
67 | |
68 | /* |
69 | * Add isolated pages on the list back to the LRU under page lock |
70 | * to avoid leaking evictable pages back onto unevictable list. |
71 | */ |
72 | void putback_lru_pages(struct list_head *l) |
73 | { |
74 | struct page *page; |
75 | struct page *page2; |
76 | |
77 | list_for_each_entry_safe(page, page2, l, lru) { |
78 | list_del(&page->lru); |
79 | dec_zone_page_state(page, NR_ISOLATED_ANON + |
80 | page_is_file_cache(page)); |
81 | putback_lru_page(page); |
82 | } |
83 | } |
84 | |
85 | /* |
86 | * Restore a potential migration pte to a working pte entry |
87 | */ |
88 | static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, |
89 | unsigned long addr, void *old) |
90 | { |
91 | struct mm_struct *mm = vma->vm_mm; |
92 | swp_entry_t entry; |
93 | pgd_t *pgd; |
94 | pud_t *pud; |
95 | pmd_t *pmd; |
96 | pte_t *ptep, pte; |
97 | spinlock_t *ptl; |
98 | |
99 | if (unlikely(PageHuge(new))) { |
100 | ptep = huge_pte_offset(mm, addr); |
101 | if (!ptep) |
102 | goto out; |
103 | ptl = &mm->page_table_lock; |
104 | } else { |
105 | pgd = pgd_offset(mm, addr); |
106 | if (!pgd_present(*pgd)) |
107 | goto out; |
108 | |
109 | pud = pud_offset(pgd, addr); |
110 | if (!pud_present(*pud)) |
111 | goto out; |
112 | |
113 | pmd = pmd_offset(pud, addr); |
114 | if (pmd_trans_huge(*pmd)) |
115 | goto out; |
116 | if (!pmd_present(*pmd)) |
117 | goto out; |
118 | |
119 | ptep = pte_offset_map(pmd, addr); |
120 | |
121 | /* |
122 | * Peek to check is_swap_pte() before taking ptlock? No, we |
123 | * can race mremap's move_ptes(), which skips anon_vma lock. |
124 | */ |
125 | |
126 | ptl = pte_lockptr(mm, pmd); |
127 | } |
128 | |
129 | spin_lock(ptl); |
130 | pte = *ptep; |
131 | if (!is_swap_pte(pte)) |
132 | goto unlock; |
133 | |
134 | entry = pte_to_swp_entry(pte); |
135 | |
136 | if (!is_migration_entry(entry) || |
137 | migration_entry_to_page(entry) != old) |
138 | goto unlock; |
139 | |
140 | get_page(new); |
141 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
142 | if (is_write_migration_entry(entry)) |
143 | pte = pte_mkwrite(pte); |
144 | #ifdef CONFIG_HUGETLB_PAGE |
145 | if (PageHuge(new)) |
146 | pte = pte_mkhuge(pte); |
147 | #endif |
148 | flush_cache_page(vma, addr, pte_pfn(pte)); |
149 | set_pte_at(mm, addr, ptep, pte); |
150 | |
151 | if (PageHuge(new)) { |
152 | if (PageAnon(new)) |
153 | hugepage_add_anon_rmap(new, vma, addr); |
154 | else |
155 | page_dup_rmap(new); |
156 | } else if (PageAnon(new)) |
157 | page_add_anon_rmap(new, vma, addr); |
158 | else |
159 | page_add_file_rmap(new); |
160 | |
161 | /* No need to invalidate - it was non-present before */ |
162 | update_mmu_cache(vma, addr, ptep); |
163 | unlock: |
164 | pte_unmap_unlock(ptep, ptl); |
165 | out: |
166 | return SWAP_AGAIN; |
167 | } |
168 | |
169 | /* |
170 | * Get rid of all migration entries and replace them by |
171 | * references to the indicated page. |
172 | */ |
173 | static void remove_migration_ptes(struct page *old, struct page *new) |
174 | { |
175 | rmap_walk(new, remove_migration_pte, old); |
176 | } |
177 | |
178 | /* |
179 | * Something used the pte of a page under migration. We need to |
180 | * get to the page and wait until migration is finished. |
181 | * When we return from this function the fault will be retried. |
182 | */ |
183 | void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, |
184 | unsigned long address) |
185 | { |
186 | pte_t *ptep, pte; |
187 | spinlock_t *ptl; |
188 | swp_entry_t entry; |
189 | struct page *page; |
190 | |
191 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); |
192 | pte = *ptep; |
193 | if (!is_swap_pte(pte)) |
194 | goto out; |
195 | |
196 | entry = pte_to_swp_entry(pte); |
197 | if (!is_migration_entry(entry)) |
198 | goto out; |
199 | |
200 | page = migration_entry_to_page(entry); |
201 | |
202 | /* |
203 | * Once radix-tree replacement of page migration started, page_count |
204 | * *must* be zero. And, we don't want to call wait_on_page_locked() |
205 | * against a page without get_page(). |
206 | * So, we use get_page_unless_zero(), here. Even failed, page fault |
207 | * will occur again. |
208 | */ |
209 | if (!get_page_unless_zero(page)) |
210 | goto out; |
211 | pte_unmap_unlock(ptep, ptl); |
212 | wait_on_page_locked(page); |
213 | put_page(page); |
214 | return; |
215 | out: |
216 | pte_unmap_unlock(ptep, ptl); |
217 | } |
218 | |
219 | #ifdef CONFIG_BLOCK |
220 | /* Returns true if all buffers are successfully locked */ |
221 | static bool buffer_migrate_lock_buffers(struct buffer_head *head, |
222 | enum migrate_mode mode) |
223 | { |
224 | struct buffer_head *bh = head; |
225 | |
226 | /* Simple case, sync compaction */ |
227 | if (mode != MIGRATE_ASYNC) { |
228 | do { |
229 | get_bh(bh); |
230 | lock_buffer(bh); |
231 | bh = bh->b_this_page; |
232 | |
233 | } while (bh != head); |
234 | |
235 | return true; |
236 | } |
237 | |
238 | /* async case, we cannot block on lock_buffer so use trylock_buffer */ |
239 | do { |
240 | get_bh(bh); |
241 | if (!trylock_buffer(bh)) { |
242 | /* |
243 | * We failed to lock the buffer and cannot stall in |
244 | * async migration. Release the taken locks |
245 | */ |
246 | struct buffer_head *failed_bh = bh; |
247 | put_bh(failed_bh); |
248 | bh = head; |
249 | while (bh != failed_bh) { |
250 | unlock_buffer(bh); |
251 | put_bh(bh); |
252 | bh = bh->b_this_page; |
253 | } |
254 | return false; |
255 | } |
256 | |
257 | bh = bh->b_this_page; |
258 | } while (bh != head); |
259 | return true; |
260 | } |
261 | #else |
262 | static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, |
263 | enum migrate_mode mode) |
264 | { |
265 | return true; |
266 | } |
267 | #endif /* CONFIG_BLOCK */ |
268 | |
269 | /* |
270 | * Replace the page in the mapping. |
271 | * |
272 | * The number of remaining references must be: |
273 | * 1 for anonymous pages without a mapping |
274 | * 2 for pages with a mapping |
275 | * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. |
276 | */ |
277 | static int migrate_page_move_mapping(struct address_space *mapping, |
278 | struct page *newpage, struct page *page, |
279 | struct buffer_head *head, enum migrate_mode mode) |
280 | { |
281 | int expected_count; |
282 | void **pslot; |
283 | |
284 | if (!mapping) { |
285 | /* Anonymous page without mapping */ |
286 | if (page_count(page) != 1) |
287 | return -EAGAIN; |
288 | return 0; |
289 | } |
290 | |
291 | spin_lock_irq(&mapping->tree_lock); |
292 | |
293 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
294 | page_index(page)); |
295 | |
296 | expected_count = 2 + page_has_private(page); |
297 | if (page_count(page) != expected_count || |
298 | radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { |
299 | spin_unlock_irq(&mapping->tree_lock); |
300 | return -EAGAIN; |
301 | } |
302 | |
303 | if (!page_freeze_refs(page, expected_count)) { |
304 | spin_unlock_irq(&mapping->tree_lock); |
305 | return -EAGAIN; |
306 | } |
307 | |
308 | /* |
309 | * In the async migration case of moving a page with buffers, lock the |
310 | * buffers using trylock before the mapping is moved. If the mapping |
311 | * was moved, we later failed to lock the buffers and could not move |
312 | * the mapping back due to an elevated page count, we would have to |
313 | * block waiting on other references to be dropped. |
314 | */ |
315 | if (mode == MIGRATE_ASYNC && head && |
316 | !buffer_migrate_lock_buffers(head, mode)) { |
317 | page_unfreeze_refs(page, expected_count); |
318 | spin_unlock_irq(&mapping->tree_lock); |
319 | return -EAGAIN; |
320 | } |
321 | |
322 | /* |
323 | * Now we know that no one else is looking at the page. |
324 | */ |
325 | get_page(newpage); /* add cache reference */ |
326 | if (PageSwapCache(page)) { |
327 | SetPageSwapCache(newpage); |
328 | set_page_private(newpage, page_private(page)); |
329 | } |
330 | |
331 | radix_tree_replace_slot(pslot, newpage); |
332 | |
333 | /* |
334 | * Drop cache reference from old page by unfreezing |
335 | * to one less reference. |
336 | * We know this isn't the last reference. |
337 | */ |
338 | page_unfreeze_refs(page, expected_count - 1); |
339 | |
340 | /* |
341 | * If moved to a different zone then also account |
342 | * the page for that zone. Other VM counters will be |
343 | * taken care of when we establish references to the |
344 | * new page and drop references to the old page. |
345 | * |
346 | * Note that anonymous pages are accounted for |
347 | * via NR_FILE_PAGES and NR_ANON_PAGES if they |
348 | * are mapped to swap space. |
349 | */ |
350 | __dec_zone_page_state(page, NR_FILE_PAGES); |
351 | __inc_zone_page_state(newpage, NR_FILE_PAGES); |
352 | if (!PageSwapCache(page) && PageSwapBacked(page)) { |
353 | __dec_zone_page_state(page, NR_SHMEM); |
354 | __inc_zone_page_state(newpage, NR_SHMEM); |
355 | } |
356 | spin_unlock_irq(&mapping->tree_lock); |
357 | |
358 | return 0; |
359 | } |
360 | |
361 | /* |
362 | * The expected number of remaining references is the same as that |
363 | * of migrate_page_move_mapping(). |
364 | */ |
365 | int migrate_huge_page_move_mapping(struct address_space *mapping, |
366 | struct page *newpage, struct page *page) |
367 | { |
368 | int expected_count; |
369 | void **pslot; |
370 | |
371 | if (!mapping) { |
372 | if (page_count(page) != 1) |
373 | return -EAGAIN; |
374 | return 0; |
375 | } |
376 | |
377 | spin_lock_irq(&mapping->tree_lock); |
378 | |
379 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
380 | page_index(page)); |
381 | |
382 | expected_count = 2 + page_has_private(page); |
383 | if (page_count(page) != expected_count || |
384 | radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { |
385 | spin_unlock_irq(&mapping->tree_lock); |
386 | return -EAGAIN; |
387 | } |
388 | |
389 | if (!page_freeze_refs(page, expected_count)) { |
390 | spin_unlock_irq(&mapping->tree_lock); |
391 | return -EAGAIN; |
392 | } |
393 | |
394 | get_page(newpage); |
395 | |
396 | radix_tree_replace_slot(pslot, newpage); |
397 | |
398 | page_unfreeze_refs(page, expected_count - 1); |
399 | |
400 | spin_unlock_irq(&mapping->tree_lock); |
401 | return 0; |
402 | } |
403 | |
404 | /* |
405 | * Copy the page to its new location |
406 | */ |
407 | void migrate_page_copy(struct page *newpage, struct page *page) |
408 | { |
409 | if (PageHuge(page)) |
410 | copy_huge_page(newpage, page); |
411 | else |
412 | copy_highpage(newpage, page); |
413 | |
414 | if (PageError(page)) |
415 | SetPageError(newpage); |
416 | if (PageReferenced(page)) |
417 | SetPageReferenced(newpage); |
418 | if (PageUptodate(page)) |
419 | SetPageUptodate(newpage); |
420 | if (TestClearPageActive(page)) { |
421 | VM_BUG_ON(PageUnevictable(page)); |
422 | SetPageActive(newpage); |
423 | } else if (TestClearPageUnevictable(page)) |
424 | SetPageUnevictable(newpage); |
425 | if (PageChecked(page)) |
426 | SetPageChecked(newpage); |
427 | if (PageMappedToDisk(page)) |
428 | SetPageMappedToDisk(newpage); |
429 | |
430 | if (PageDirty(page)) { |
431 | clear_page_dirty_for_io(page); |
432 | /* |
433 | * Want to mark the page and the radix tree as dirty, and |
434 | * redo the accounting that clear_page_dirty_for_io undid, |
435 | * but we can't use set_page_dirty because that function |
436 | * is actually a signal that all of the page has become dirty. |
437 | * Whereas only part of our page may be dirty. |
438 | */ |
439 | __set_page_dirty_nobuffers(newpage); |
440 | } |
441 | |
442 | mlock_migrate_page(newpage, page); |
443 | ksm_migrate_page(newpage, page); |
444 | |
445 | ClearPageSwapCache(page); |
446 | ClearPagePrivate(page); |
447 | set_page_private(page, 0); |
448 | |
449 | /* |
450 | * If any waiters have accumulated on the new page then |
451 | * wake them up. |
452 | */ |
453 | if (PageWriteback(newpage)) |
454 | end_page_writeback(newpage); |
455 | } |
456 | |
457 | /************************************************************ |
458 | * Migration functions |
459 | ***********************************************************/ |
460 | |
461 | /* Always fail migration. Used for mappings that are not movable */ |
462 | int fail_migrate_page(struct address_space *mapping, |
463 | struct page *newpage, struct page *page) |
464 | { |
465 | return -EIO; |
466 | } |
467 | EXPORT_SYMBOL(fail_migrate_page); |
468 | |
469 | /* |
470 | * Common logic to directly migrate a single page suitable for |
471 | * pages that do not use PagePrivate/PagePrivate2. |
472 | * |
473 | * Pages are locked upon entry and exit. |
474 | */ |
475 | int migrate_page(struct address_space *mapping, |
476 | struct page *newpage, struct page *page, |
477 | enum migrate_mode mode) |
478 | { |
479 | int rc; |
480 | |
481 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ |
482 | |
483 | rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); |
484 | |
485 | if (rc) |
486 | return rc; |
487 | |
488 | migrate_page_copy(newpage, page); |
489 | return 0; |
490 | } |
491 | EXPORT_SYMBOL(migrate_page); |
492 | |
493 | #ifdef CONFIG_BLOCK |
494 | /* |
495 | * Migration function for pages with buffers. This function can only be used |
496 | * if the underlying filesystem guarantees that no other references to "page" |
497 | * exist. |
498 | */ |
499 | int buffer_migrate_page(struct address_space *mapping, |
500 | struct page *newpage, struct page *page, enum migrate_mode mode) |
501 | { |
502 | struct buffer_head *bh, *head; |
503 | int rc; |
504 | |
505 | if (!page_has_buffers(page)) |
506 | return migrate_page(mapping, newpage, page, mode); |
507 | |
508 | head = page_buffers(page); |
509 | |
510 | rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); |
511 | |
512 | if (rc) |
513 | return rc; |
514 | |
515 | /* |
516 | * In the async case, migrate_page_move_mapping locked the buffers |
517 | * with an IRQ-safe spinlock held. In the sync case, the buffers |
518 | * need to be locked now |
519 | */ |
520 | if (mode != MIGRATE_ASYNC) |
521 | BUG_ON(!buffer_migrate_lock_buffers(head, mode)); |
522 | |
523 | ClearPagePrivate(page); |
524 | set_page_private(newpage, page_private(page)); |
525 | set_page_private(page, 0); |
526 | put_page(page); |
527 | get_page(newpage); |
528 | |
529 | bh = head; |
530 | do { |
531 | set_bh_page(bh, newpage, bh_offset(bh)); |
532 | bh = bh->b_this_page; |
533 | |
534 | } while (bh != head); |
535 | |
536 | SetPagePrivate(newpage); |
537 | |
538 | migrate_page_copy(newpage, page); |
539 | |
540 | bh = head; |
541 | do { |
542 | unlock_buffer(bh); |
543 | put_bh(bh); |
544 | bh = bh->b_this_page; |
545 | |
546 | } while (bh != head); |
547 | |
548 | return 0; |
549 | } |
550 | EXPORT_SYMBOL(buffer_migrate_page); |
551 | #endif |
552 | |
553 | /* |
554 | * Writeback a page to clean the dirty state |
555 | */ |
556 | static int writeout(struct address_space *mapping, struct page *page) |
557 | { |
558 | struct writeback_control wbc = { |
559 | .sync_mode = WB_SYNC_NONE, |
560 | .nr_to_write = 1, |
561 | .range_start = 0, |
562 | .range_end = LLONG_MAX, |
563 | .for_reclaim = 1 |
564 | }; |
565 | int rc; |
566 | |
567 | if (!mapping->a_ops->writepage) |
568 | /* No write method for the address space */ |
569 | return -EINVAL; |
570 | |
571 | if (!clear_page_dirty_for_io(page)) |
572 | /* Someone else already triggered a write */ |
573 | return -EAGAIN; |
574 | |
575 | /* |
576 | * A dirty page may imply that the underlying filesystem has |
577 | * the page on some queue. So the page must be clean for |
578 | * migration. Writeout may mean we loose the lock and the |
579 | * page state is no longer what we checked for earlier. |
580 | * At this point we know that the migration attempt cannot |
581 | * be successful. |
582 | */ |
583 | remove_migration_ptes(page, page); |
584 | |
585 | rc = mapping->a_ops->writepage(page, &wbc); |
586 | |
587 | if (rc != AOP_WRITEPAGE_ACTIVATE) |
588 | /* unlocked. Relock */ |
589 | lock_page(page); |
590 | |
591 | return (rc < 0) ? -EIO : -EAGAIN; |
592 | } |
593 | |
594 | /* |
595 | * Default handling if a filesystem does not provide a migration function. |
596 | */ |
597 | static int fallback_migrate_page(struct address_space *mapping, |
598 | struct page *newpage, struct page *page, enum migrate_mode mode) |
599 | { |
600 | if (PageDirty(page)) { |
601 | /* Only writeback pages in full synchronous migration */ |
602 | if (mode != MIGRATE_SYNC) |
603 | return -EBUSY; |
604 | return writeout(mapping, page); |
605 | } |
606 | |
607 | /* |
608 | * Buffers may be managed in a filesystem specific way. |
609 | * We must have no buffers or drop them. |
610 | */ |
611 | if (page_has_private(page) && |
612 | !try_to_release_page(page, GFP_KERNEL)) |
613 | return -EAGAIN; |
614 | |
615 | return migrate_page(mapping, newpage, page, mode); |
616 | } |
617 | |
618 | /* |
619 | * Move a page to a newly allocated page |
620 | * The page is locked and all ptes have been successfully removed. |
621 | * |
622 | * The new page will have replaced the old page if this function |
623 | * is successful. |
624 | * |
625 | * Return value: |
626 | * < 0 - error code |
627 | * == 0 - success |
628 | */ |
629 | static int move_to_new_page(struct page *newpage, struct page *page, |
630 | int remap_swapcache, enum migrate_mode mode) |
631 | { |
632 | struct address_space *mapping; |
633 | int rc; |
634 | |
635 | /* |
636 | * Block others from accessing the page when we get around to |
637 | * establishing additional references. We are the only one |
638 | * holding a reference to the new page at this point. |
639 | */ |
640 | if (!trylock_page(newpage)) |
641 | BUG(); |
642 | |
643 | /* Prepare mapping for the new page.*/ |
644 | newpage->index = page->index; |
645 | newpage->mapping = page->mapping; |
646 | if (PageSwapBacked(page)) |
647 | SetPageSwapBacked(newpage); |
648 | |
649 | mapping = page_mapping(page); |
650 | if (!mapping) |
651 | rc = migrate_page(mapping, newpage, page, mode); |
652 | else if (mapping->a_ops->migratepage) |
653 | /* |
654 | * Most pages have a mapping and most filesystems provide a |
655 | * migratepage callback. Anonymous pages are part of swap |
656 | * space which also has its own migratepage callback. This |
657 | * is the most common path for page migration. |
658 | */ |
659 | rc = mapping->a_ops->migratepage(mapping, |
660 | newpage, page, mode); |
661 | else |
662 | rc = fallback_migrate_page(mapping, newpage, page, mode); |
663 | |
664 | if (rc) { |
665 | newpage->mapping = NULL; |
666 | } else { |
667 | if (remap_swapcache) |
668 | remove_migration_ptes(page, newpage); |
669 | page->mapping = NULL; |
670 | } |
671 | |
672 | unlock_page(newpage); |
673 | |
674 | return rc; |
675 | } |
676 | |
677 | static int __unmap_and_move(struct page *page, struct page *newpage, |
678 | int force, bool offlining, enum migrate_mode mode) |
679 | { |
680 | int rc = -EAGAIN; |
681 | int remap_swapcache = 1; |
682 | int charge = 0; |
683 | struct mem_cgroup *mem; |
684 | struct anon_vma *anon_vma = NULL; |
685 | |
686 | if (!trylock_page(page)) { |
687 | if (!force || mode == MIGRATE_ASYNC) |
688 | goto out; |
689 | |
690 | /* |
691 | * It's not safe for direct compaction to call lock_page. |
692 | * For example, during page readahead pages are added locked |
693 | * to the LRU. Later, when the IO completes the pages are |
694 | * marked uptodate and unlocked. However, the queueing |
695 | * could be merging multiple pages for one bio (e.g. |
696 | * mpage_readpages). If an allocation happens for the |
697 | * second or third page, the process can end up locking |
698 | * the same page twice and deadlocking. Rather than |
699 | * trying to be clever about what pages can be locked, |
700 | * avoid the use of lock_page for direct compaction |
701 | * altogether. |
702 | */ |
703 | if (current->flags & PF_MEMALLOC) |
704 | goto out; |
705 | |
706 | lock_page(page); |
707 | } |
708 | |
709 | /* |
710 | * Only memory hotplug's offline_pages() caller has locked out KSM, |
711 | * and can safely migrate a KSM page. The other cases have skipped |
712 | * PageKsm along with PageReserved - but it is only now when we have |
713 | * the page lock that we can be certain it will not go KSM beneath us |
714 | * (KSM will not upgrade a page from PageAnon to PageKsm when it sees |
715 | * its pagecount raised, but only here do we take the page lock which |
716 | * serializes that). |
717 | */ |
718 | if (PageKsm(page) && !offlining) { |
719 | rc = -EBUSY; |
720 | goto unlock; |
721 | } |
722 | |
723 | /* charge against new page */ |
724 | charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL); |
725 | if (charge == -ENOMEM) { |
726 | rc = -ENOMEM; |
727 | goto unlock; |
728 | } |
729 | BUG_ON(charge); |
730 | |
731 | if (PageWriteback(page)) { |
732 | /* |
733 | * Only in the case of a full syncronous migration is it |
734 | * necessary to wait for PageWriteback. In the async case, |
735 | * the retry loop is too short and in the sync-light case, |
736 | * the overhead of stalling is too much |
737 | */ |
738 | if (mode != MIGRATE_SYNC) { |
739 | rc = -EBUSY; |
740 | goto uncharge; |
741 | } |
742 | if (!force) |
743 | goto uncharge; |
744 | wait_on_page_writeback(page); |
745 | } |
746 | /* |
747 | * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, |
748 | * we cannot notice that anon_vma is freed while we migrates a page. |
749 | * This get_anon_vma() delays freeing anon_vma pointer until the end |
750 | * of migration. File cache pages are no problem because of page_lock() |
751 | * File Caches may use write_page() or lock_page() in migration, then, |
752 | * just care Anon page here. |
753 | */ |
754 | if (PageAnon(page)) { |
755 | /* |
756 | * Only page_lock_anon_vma() understands the subtleties of |
757 | * getting a hold on an anon_vma from outside one of its mms. |
758 | */ |
759 | anon_vma = page_get_anon_vma(page); |
760 | if (anon_vma) { |
761 | /* |
762 | * Anon page |
763 | */ |
764 | } else if (PageSwapCache(page)) { |
765 | /* |
766 | * We cannot be sure that the anon_vma of an unmapped |
767 | * swapcache page is safe to use because we don't |
768 | * know in advance if the VMA that this page belonged |
769 | * to still exists. If the VMA and others sharing the |
770 | * data have been freed, then the anon_vma could |
771 | * already be invalid. |
772 | * |
773 | * To avoid this possibility, swapcache pages get |
774 | * migrated but are not remapped when migration |
775 | * completes |
776 | */ |
777 | remap_swapcache = 0; |
778 | } else { |
779 | goto uncharge; |
780 | } |
781 | } |
782 | |
783 | /* |
784 | * Corner case handling: |
785 | * 1. When a new swap-cache page is read into, it is added to the LRU |
786 | * and treated as swapcache but it has no rmap yet. |
787 | * Calling try_to_unmap() against a page->mapping==NULL page will |
788 | * trigger a BUG. So handle it here. |
789 | * 2. An orphaned page (see truncate_complete_page) might have |
790 | * fs-private metadata. The page can be picked up due to memory |
791 | * offlining. Everywhere else except page reclaim, the page is |
792 | * invisible to the vm, so the page can not be migrated. So try to |
793 | * free the metadata, so the page can be freed. |
794 | */ |
795 | if (!page->mapping) { |
796 | VM_BUG_ON(PageAnon(page)); |
797 | if (page_has_private(page)) { |
798 | try_to_free_buffers(page); |
799 | goto uncharge; |
800 | } |
801 | goto skip_unmap; |
802 | } |
803 | |
804 | /* Establish migration ptes or remove ptes */ |
805 | try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); |
806 | |
807 | skip_unmap: |
808 | if (!page_mapped(page)) |
809 | rc = move_to_new_page(newpage, page, remap_swapcache, mode); |
810 | |
811 | if (rc && remap_swapcache) |
812 | remove_migration_ptes(page, page); |
813 | |
814 | /* Drop an anon_vma reference if we took one */ |
815 | if (anon_vma) |
816 | put_anon_vma(anon_vma); |
817 | |
818 | uncharge: |
819 | if (!charge) |
820 | mem_cgroup_end_migration(mem, page, newpage, rc == 0); |
821 | unlock: |
822 | unlock_page(page); |
823 | out: |
824 | return rc; |
825 | } |
826 | |
827 | /* |
828 | * Obtain the lock on page, remove all ptes and migrate the page |
829 | * to the newly allocated page in newpage. |
830 | */ |
831 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, |
832 | struct page *page, int force, bool offlining, |
833 | enum migrate_mode mode) |
834 | { |
835 | int rc = 0; |
836 | int *result = NULL; |
837 | struct page *newpage = get_new_page(page, private, &result); |
838 | |
839 | if (!newpage) |
840 | return -ENOMEM; |
841 | |
842 | if (page_count(page) == 1) { |
843 | /* page was freed from under us. So we are done. */ |
844 | goto out; |
845 | } |
846 | |
847 | if (unlikely(PageTransHuge(page))) |
848 | if (unlikely(split_huge_page(page))) |
849 | goto out; |
850 | |
851 | rc = __unmap_and_move(page, newpage, force, offlining, mode); |
852 | out: |
853 | if (rc != -EAGAIN) { |
854 | /* |
855 | * A page that has been migrated has all references |
856 | * removed and will be freed. A page that has not been |
857 | * migrated will have kepts its references and be |
858 | * restored. |
859 | */ |
860 | list_del(&page->lru); |
861 | dec_zone_page_state(page, NR_ISOLATED_ANON + |
862 | page_is_file_cache(page)); |
863 | putback_lru_page(page); |
864 | } |
865 | /* |
866 | * Move the new page to the LRU. If migration was not successful |
867 | * then this will free the page. |
868 | */ |
869 | putback_lru_page(newpage); |
870 | if (result) { |
871 | if (rc) |
872 | *result = rc; |
873 | else |
874 | *result = page_to_nid(newpage); |
875 | } |
876 | return rc; |
877 | } |
878 | |
879 | /* |
880 | * Counterpart of unmap_and_move_page() for hugepage migration. |
881 | * |
882 | * This function doesn't wait the completion of hugepage I/O |
883 | * because there is no race between I/O and migration for hugepage. |
884 | * Note that currently hugepage I/O occurs only in direct I/O |
885 | * where no lock is held and PG_writeback is irrelevant, |
886 | * and writeback status of all subpages are counted in the reference |
887 | * count of the head page (i.e. if all subpages of a 2MB hugepage are |
888 | * under direct I/O, the reference of the head page is 512 and a bit more.) |
889 | * This means that when we try to migrate hugepage whose subpages are |
890 | * doing direct I/O, some references remain after try_to_unmap() and |
891 | * hugepage migration fails without data corruption. |
892 | * |
893 | * There is also no race when direct I/O is issued on the page under migration, |
894 | * because then pte is replaced with migration swap entry and direct I/O code |
895 | * will wait in the page fault for migration to complete. |
896 | */ |
897 | static int unmap_and_move_huge_page(new_page_t get_new_page, |
898 | unsigned long private, struct page *hpage, |
899 | int force, bool offlining, |
900 | enum migrate_mode mode) |
901 | { |
902 | int rc = 0; |
903 | int *result = NULL; |
904 | struct page *new_hpage = get_new_page(hpage, private, &result); |
905 | struct anon_vma *anon_vma = NULL; |
906 | |
907 | if (!new_hpage) |
908 | return -ENOMEM; |
909 | |
910 | rc = -EAGAIN; |
911 | |
912 | if (!trylock_page(hpage)) { |
913 | if (!force || mode != MIGRATE_SYNC) |
914 | goto out; |
915 | lock_page(hpage); |
916 | } |
917 | |
918 | if (PageAnon(hpage)) |
919 | anon_vma = page_get_anon_vma(hpage); |
920 | |
921 | try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); |
922 | |
923 | if (!page_mapped(hpage)) |
924 | rc = move_to_new_page(new_hpage, hpage, 1, mode); |
925 | |
926 | if (rc) |
927 | remove_migration_ptes(hpage, hpage); |
928 | |
929 | if (anon_vma) |
930 | put_anon_vma(anon_vma); |
931 | unlock_page(hpage); |
932 | |
933 | out: |
934 | if (rc != -EAGAIN) { |
935 | list_del(&hpage->lru); |
936 | put_page(hpage); |
937 | } |
938 | |
939 | put_page(new_hpage); |
940 | |
941 | if (result) { |
942 | if (rc) |
943 | *result = rc; |
944 | else |
945 | *result = page_to_nid(new_hpage); |
946 | } |
947 | return rc; |
948 | } |
949 | |
950 | /* |
951 | * migrate_pages |
952 | * |
953 | * The function takes one list of pages to migrate and a function |
954 | * that determines from the page to be migrated and the private data |
955 | * the target of the move and allocates the page. |
956 | * |
957 | * The function returns after 10 attempts or if no pages |
958 | * are movable anymore because to has become empty |
959 | * or no retryable pages exist anymore. |
960 | * Caller should call putback_lru_pages to return pages to the LRU |
961 | * or free list only if ret != 0. |
962 | * |
963 | * Return: Number of pages not migrated or error code. |
964 | */ |
965 | int migrate_pages(struct list_head *from, |
966 | new_page_t get_new_page, unsigned long private, bool offlining, |
967 | enum migrate_mode mode) |
968 | { |
969 | int retry = 1; |
970 | int nr_failed = 0; |
971 | int pass = 0; |
972 | struct page *page; |
973 | struct page *page2; |
974 | int swapwrite = current->flags & PF_SWAPWRITE; |
975 | int rc; |
976 | |
977 | if (!swapwrite) |
978 | current->flags |= PF_SWAPWRITE; |
979 | |
980 | for(pass = 0; pass < 10 && retry; pass++) { |
981 | retry = 0; |
982 | |
983 | list_for_each_entry_safe(page, page2, from, lru) { |
984 | cond_resched(); |
985 | |
986 | rc = unmap_and_move(get_new_page, private, |
987 | page, pass > 2, offlining, |
988 | mode); |
989 | |
990 | switch(rc) { |
991 | case -ENOMEM: |
992 | goto out; |
993 | case -EAGAIN: |
994 | retry++; |
995 | break; |
996 | case 0: |
997 | break; |
998 | default: |
999 | /* Permanent failure */ |
1000 | nr_failed++; |
1001 | break; |
1002 | } |
1003 | } |
1004 | } |
1005 | rc = 0; |
1006 | out: |
1007 | if (!swapwrite) |
1008 | current->flags &= ~PF_SWAPWRITE; |
1009 | |
1010 | if (rc) |
1011 | return rc; |
1012 | |
1013 | return nr_failed + retry; |
1014 | } |
1015 | |
1016 | int migrate_huge_pages(struct list_head *from, |
1017 | new_page_t get_new_page, unsigned long private, bool offlining, |
1018 | enum migrate_mode mode) |
1019 | { |
1020 | int retry = 1; |
1021 | int nr_failed = 0; |
1022 | int pass = 0; |
1023 | struct page *page; |
1024 | struct page *page2; |
1025 | int rc; |
1026 | |
1027 | for (pass = 0; pass < 10 && retry; pass++) { |
1028 | retry = 0; |
1029 | |
1030 | list_for_each_entry_safe(page, page2, from, lru) { |
1031 | cond_resched(); |
1032 | |
1033 | rc = unmap_and_move_huge_page(get_new_page, |
1034 | private, page, pass > 2, offlining, |
1035 | mode); |
1036 | |
1037 | switch(rc) { |
1038 | case -ENOMEM: |
1039 | goto out; |
1040 | case -EAGAIN: |
1041 | retry++; |
1042 | break; |
1043 | case 0: |
1044 | break; |
1045 | default: |
1046 | /* Permanent failure */ |
1047 | nr_failed++; |
1048 | break; |
1049 | } |
1050 | } |
1051 | } |
1052 | rc = 0; |
1053 | out: |
1054 | if (rc) |
1055 | return rc; |
1056 | |
1057 | return nr_failed + retry; |
1058 | } |
1059 | |
1060 | #ifdef CONFIG_NUMA |
1061 | /* |
1062 | * Move a list of individual pages |
1063 | */ |
1064 | struct page_to_node { |
1065 | unsigned long addr; |
1066 | struct page *page; |
1067 | int node; |
1068 | int status; |
1069 | }; |
1070 | |
1071 | static struct page *new_page_node(struct page *p, unsigned long private, |
1072 | int **result) |
1073 | { |
1074 | struct page_to_node *pm = (struct page_to_node *)private; |
1075 | |
1076 | while (pm->node != MAX_NUMNODES && pm->page != p) |
1077 | pm++; |
1078 | |
1079 | if (pm->node == MAX_NUMNODES) |
1080 | return NULL; |
1081 | |
1082 | *result = &pm->status; |
1083 | |
1084 | return alloc_pages_exact_node(pm->node, |
1085 | GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); |
1086 | } |
1087 | |
1088 | /* |
1089 | * Move a set of pages as indicated in the pm array. The addr |
1090 | * field must be set to the virtual address of the page to be moved |
1091 | * and the node number must contain a valid target node. |
1092 | * The pm array ends with node = MAX_NUMNODES. |
1093 | */ |
1094 | static int do_move_page_to_node_array(struct mm_struct *mm, |
1095 | struct page_to_node *pm, |
1096 | int migrate_all) |
1097 | { |
1098 | int err; |
1099 | struct page_to_node *pp; |
1100 | LIST_HEAD(pagelist); |
1101 | |
1102 | down_read(&mm->mmap_sem); |
1103 | |
1104 | /* |
1105 | * Build a list of pages to migrate |
1106 | */ |
1107 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { |
1108 | struct vm_area_struct *vma; |
1109 | struct page *page; |
1110 | |
1111 | err = -EFAULT; |
1112 | vma = find_vma(mm, pp->addr); |
1113 | if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma)) |
1114 | goto set_status; |
1115 | |
1116 | page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT); |
1117 | |
1118 | err = PTR_ERR(page); |
1119 | if (IS_ERR(page)) |
1120 | goto set_status; |
1121 | |
1122 | err = -ENOENT; |
1123 | if (!page) |
1124 | goto set_status; |
1125 | |
1126 | /* Use PageReserved to check for zero page */ |
1127 | if (PageReserved(page) || PageKsm(page)) |
1128 | goto put_and_set; |
1129 | |
1130 | pp->page = page; |
1131 | err = page_to_nid(page); |
1132 | |
1133 | if (err == pp->node) |
1134 | /* |
1135 | * Node already in the right place |
1136 | */ |
1137 | goto put_and_set; |
1138 | |
1139 | err = -EACCES; |
1140 | if (page_mapcount(page) > 1 && |
1141 | !migrate_all) |
1142 | goto put_and_set; |
1143 | |
1144 | err = isolate_lru_page(page); |
1145 | if (!err) { |
1146 | list_add_tail(&page->lru, &pagelist); |
1147 | inc_zone_page_state(page, NR_ISOLATED_ANON + |
1148 | page_is_file_cache(page)); |
1149 | } |
1150 | put_and_set: |
1151 | /* |
1152 | * Either remove the duplicate refcount from |
1153 | * isolate_lru_page() or drop the page ref if it was |
1154 | * not isolated. |
1155 | */ |
1156 | put_page(page); |
1157 | set_status: |
1158 | pp->status = err; |
1159 | } |
1160 | |
1161 | err = 0; |
1162 | if (!list_empty(&pagelist)) { |
1163 | err = migrate_pages(&pagelist, new_page_node, |
1164 | (unsigned long)pm, 0, MIGRATE_SYNC); |
1165 | if (err) |
1166 | putback_lru_pages(&pagelist); |
1167 | } |
1168 | |
1169 | up_read(&mm->mmap_sem); |
1170 | return err; |
1171 | } |
1172 | |
1173 | /* |
1174 | * Migrate an array of page address onto an array of nodes and fill |
1175 | * the corresponding array of status. |
1176 | */ |
1177 | static int do_pages_move(struct mm_struct *mm, struct task_struct *task, |
1178 | unsigned long nr_pages, |
1179 | const void __user * __user *pages, |
1180 | const int __user *nodes, |
1181 | int __user *status, int flags) |
1182 | { |
1183 | struct page_to_node *pm; |
1184 | nodemask_t task_nodes; |
1185 | unsigned long chunk_nr_pages; |
1186 | unsigned long chunk_start; |
1187 | int err; |
1188 | |
1189 | task_nodes = cpuset_mems_allowed(task); |
1190 | |
1191 | err = -ENOMEM; |
1192 | pm = (struct page_to_node *)__get_free_page(GFP_KERNEL); |
1193 | if (!pm) |
1194 | goto out; |
1195 | |
1196 | migrate_prep(); |
1197 | |
1198 | /* |
1199 | * Store a chunk of page_to_node array in a page, |
1200 | * but keep the last one as a marker |
1201 | */ |
1202 | chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1; |
1203 | |
1204 | for (chunk_start = 0; |
1205 | chunk_start < nr_pages; |
1206 | chunk_start += chunk_nr_pages) { |
1207 | int j; |
1208 | |
1209 | if (chunk_start + chunk_nr_pages > nr_pages) |
1210 | chunk_nr_pages = nr_pages - chunk_start; |
1211 | |
1212 | /* fill the chunk pm with addrs and nodes from user-space */ |
1213 | for (j = 0; j < chunk_nr_pages; j++) { |
1214 | const void __user *p; |
1215 | int node; |
1216 | |
1217 | err = -EFAULT; |
1218 | if (get_user(p, pages + j + chunk_start)) |
1219 | goto out_pm; |
1220 | pm[j].addr = (unsigned long) p; |
1221 | |
1222 | if (get_user(node, nodes + j + chunk_start)) |
1223 | goto out_pm; |
1224 | |
1225 | err = -ENODEV; |
1226 | if (node < 0 || node >= MAX_NUMNODES) |
1227 | goto out_pm; |
1228 | |
1229 | if (!node_state(node, N_HIGH_MEMORY)) |
1230 | goto out_pm; |
1231 | |
1232 | err = -EACCES; |
1233 | if (!node_isset(node, task_nodes)) |
1234 | goto out_pm; |
1235 | |
1236 | pm[j].node = node; |
1237 | } |
1238 | |
1239 | /* End marker for this chunk */ |
1240 | pm[chunk_nr_pages].node = MAX_NUMNODES; |
1241 | |
1242 | /* Migrate this chunk */ |
1243 | err = do_move_page_to_node_array(mm, pm, |
1244 | flags & MPOL_MF_MOVE_ALL); |
1245 | if (err < 0) |
1246 | goto out_pm; |
1247 | |
1248 | /* Return status information */ |
1249 | for (j = 0; j < chunk_nr_pages; j++) |
1250 | if (put_user(pm[j].status, status + j + chunk_start)) { |
1251 | err = -EFAULT; |
1252 | goto out_pm; |
1253 | } |
1254 | } |
1255 | err = 0; |
1256 | |
1257 | out_pm: |
1258 | free_page((unsigned long)pm); |
1259 | out: |
1260 | return err; |
1261 | } |
1262 | |
1263 | /* |
1264 | * Determine the nodes of an array of pages and store it in an array of status. |
1265 | */ |
1266 | static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, |
1267 | const void __user **pages, int *status) |
1268 | { |
1269 | unsigned long i; |
1270 | |
1271 | down_read(&mm->mmap_sem); |
1272 | |
1273 | for (i = 0; i < nr_pages; i++) { |
1274 | unsigned long addr = (unsigned long)(*pages); |
1275 | struct vm_area_struct *vma; |
1276 | struct page *page; |
1277 | int err = -EFAULT; |
1278 | |
1279 | vma = find_vma(mm, addr); |
1280 | if (!vma || addr < vma->vm_start) |
1281 | goto set_status; |
1282 | |
1283 | page = follow_page(vma, addr, 0); |
1284 | |
1285 | err = PTR_ERR(page); |
1286 | if (IS_ERR(page)) |
1287 | goto set_status; |
1288 | |
1289 | err = -ENOENT; |
1290 | /* Use PageReserved to check for zero page */ |
1291 | if (!page || PageReserved(page) || PageKsm(page)) |
1292 | goto set_status; |
1293 | |
1294 | err = page_to_nid(page); |
1295 | set_status: |
1296 | *status = err; |
1297 | |
1298 | pages++; |
1299 | status++; |
1300 | } |
1301 | |
1302 | up_read(&mm->mmap_sem); |
1303 | } |
1304 | |
1305 | /* |
1306 | * Determine the nodes of a user array of pages and store it in |
1307 | * a user array of status. |
1308 | */ |
1309 | static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, |
1310 | const void __user * __user *pages, |
1311 | int __user *status) |
1312 | { |
1313 | #define DO_PAGES_STAT_CHUNK_NR 16 |
1314 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; |
1315 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; |
1316 | |
1317 | while (nr_pages) { |
1318 | unsigned long chunk_nr; |
1319 | |
1320 | chunk_nr = nr_pages; |
1321 | if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) |
1322 | chunk_nr = DO_PAGES_STAT_CHUNK_NR; |
1323 | |
1324 | if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) |
1325 | break; |
1326 | |
1327 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); |
1328 | |
1329 | if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) |
1330 | break; |
1331 | |
1332 | pages += chunk_nr; |
1333 | status += chunk_nr; |
1334 | nr_pages -= chunk_nr; |
1335 | } |
1336 | return nr_pages ? -EFAULT : 0; |
1337 | } |
1338 | |
1339 | /* |
1340 | * Move a list of pages in the address space of the currently executing |
1341 | * process. |
1342 | */ |
1343 | SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, |
1344 | const void __user * __user *, pages, |
1345 | const int __user *, nodes, |
1346 | int __user *, status, int, flags) |
1347 | { |
1348 | const struct cred *cred = current_cred(), *tcred; |
1349 | struct task_struct *task; |
1350 | struct mm_struct *mm; |
1351 | int err; |
1352 | |
1353 | /* Check flags */ |
1354 | if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) |
1355 | return -EINVAL; |
1356 | |
1357 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) |
1358 | return -EPERM; |
1359 | |
1360 | /* Find the mm_struct */ |
1361 | rcu_read_lock(); |
1362 | task = pid ? find_task_by_vpid(pid) : current; |
1363 | if (!task) { |
1364 | rcu_read_unlock(); |
1365 | return -ESRCH; |
1366 | } |
1367 | mm = get_task_mm(task); |
1368 | rcu_read_unlock(); |
1369 | |
1370 | if (!mm) |
1371 | return -EINVAL; |
1372 | |
1373 | /* |
1374 | * Check if this process has the right to modify the specified |
1375 | * process. The right exists if the process has administrative |
1376 | * capabilities, superuser privileges or the same |
1377 | * userid as the target process. |
1378 | */ |
1379 | rcu_read_lock(); |
1380 | tcred = __task_cred(task); |
1381 | if (cred->euid != tcred->suid && cred->euid != tcred->uid && |
1382 | cred->uid != tcred->suid && cred->uid != tcred->uid && |
1383 | !capable(CAP_SYS_NICE)) { |
1384 | rcu_read_unlock(); |
1385 | err = -EPERM; |
1386 | goto out; |
1387 | } |
1388 | rcu_read_unlock(); |
1389 | |
1390 | err = security_task_movememory(task); |
1391 | if (err) |
1392 | goto out; |
1393 | |
1394 | if (nodes) { |
1395 | err = do_pages_move(mm, task, nr_pages, pages, nodes, status, |
1396 | flags); |
1397 | } else { |
1398 | err = do_pages_stat(mm, nr_pages, pages, status); |
1399 | } |
1400 | |
1401 | out: |
1402 | mmput(mm); |
1403 | return err; |
1404 | } |
1405 | |
1406 | /* |
1407 | * Call migration functions in the vma_ops that may prepare |
1408 | * memory in a vm for migration. migration functions may perform |
1409 | * the migration for vmas that do not have an underlying page struct. |
1410 | */ |
1411 | int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, |
1412 | const nodemask_t *from, unsigned long flags) |
1413 | { |
1414 | struct vm_area_struct *vma; |
1415 | int err = 0; |
1416 | |
1417 | for (vma = mm->mmap; vma && !err; vma = vma->vm_next) { |
1418 | if (vma->vm_ops && vma->vm_ops->migrate) { |
1419 | err = vma->vm_ops->migrate(vma, to, from, flags); |
1420 | if (err) |
1421 | break; |
1422 | } |
1423 | } |
1424 | return err; |
1425 | } |
1426 | #endif |
1427 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9