Root/mm/page_io.c

1/*
2 * linux/mm/page_io.c
3 *
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 *
6 * Swap reorganised 29.12.95,
7 * Asynchronous swapping added 30.12.95. Stephen Tweedie
8 * Removed race in async swapping. 14.4.1996. Bruno Haible
9 * Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
10 * Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
11 */
12
13#include <linux/mm.h>
14#include <linux/kernel_stat.h>
15#include <linux/gfp.h>
16#include <linux/pagemap.h>
17#include <linux/swap.h>
18#include <linux/bio.h>
19#include <linux/swapops.h>
20#include <linux/buffer_head.h>
21#include <linux/writeback.h>
22#include <linux/frontswap.h>
23#include <linux/aio.h>
24#include <linux/blkdev.h>
25#include <asm/pgtable.h>
26
27static struct bio *get_swap_bio(gfp_t gfp_flags,
28                struct page *page, bio_end_io_t end_io)
29{
30    struct bio *bio;
31
32    bio = bio_alloc(gfp_flags, 1);
33    if (bio) {
34        bio->bi_sector = map_swap_page(page, &bio->bi_bdev);
35        bio->bi_sector <<= PAGE_SHIFT - 9;
36        bio->bi_io_vec[0].bv_page = page;
37        bio->bi_io_vec[0].bv_len = PAGE_SIZE;
38        bio->bi_io_vec[0].bv_offset = 0;
39        bio->bi_vcnt = 1;
40        bio->bi_size = PAGE_SIZE;
41        bio->bi_end_io = end_io;
42    }
43    return bio;
44}
45
46void end_swap_bio_write(struct bio *bio, int err)
47{
48    const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
49    struct page *page = bio->bi_io_vec[0].bv_page;
50
51    if (!uptodate) {
52        SetPageError(page);
53        /*
54         * We failed to write the page out to swap-space.
55         * Re-dirty the page in order to avoid it being reclaimed.
56         * Also print a dire warning that things will go BAD (tm)
57         * very quickly.
58         *
59         * Also clear PG_reclaim to avoid rotate_reclaimable_page()
60         */
61        set_page_dirty(page);
62        printk(KERN_ALERT "Write-error on swap-device (%u:%u:%Lu)\n",
63                imajor(bio->bi_bdev->bd_inode),
64                iminor(bio->bi_bdev->bd_inode),
65                (unsigned long long)bio->bi_sector);
66        ClearPageReclaim(page);
67    }
68    end_page_writeback(page);
69    bio_put(bio);
70}
71
72void end_swap_bio_read(struct bio *bio, int err)
73{
74    const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
75    struct page *page = bio->bi_io_vec[0].bv_page;
76
77    if (!uptodate) {
78        SetPageError(page);
79        ClearPageUptodate(page);
80        printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
81                imajor(bio->bi_bdev->bd_inode),
82                iminor(bio->bi_bdev->bd_inode),
83                (unsigned long long)bio->bi_sector);
84        goto out;
85    }
86
87    SetPageUptodate(page);
88
89    /*
90     * There is no guarantee that the page is in swap cache - the software
91     * suspend code (at least) uses end_swap_bio_read() against a non-
92     * swapcache page. So we must check PG_swapcache before proceeding with
93     * this optimization.
94     */
95    if (likely(PageSwapCache(page))) {
96        struct swap_info_struct *sis;
97
98        sis = page_swap_info(page);
99        if (sis->flags & SWP_BLKDEV) {
100            /*
101             * The swap subsystem performs lazy swap slot freeing,
102             * expecting that the page will be swapped out again.
103             * So we can avoid an unnecessary write if the page
104             * isn't redirtied.
105             * This is good for real swap storage because we can
106             * reduce unnecessary I/O and enhance wear-leveling
107             * if an SSD is used as the as swap device.
108             * But if in-memory swap device (eg zram) is used,
109             * this causes a duplicated copy between uncompressed
110             * data in VM-owned memory and compressed data in
111             * zram-owned memory. So let's free zram-owned memory
112             * and make the VM-owned decompressed page *dirty*,
113             * so the page should be swapped out somewhere again if
114             * we again wish to reclaim it.
115             */
116            struct gendisk *disk = sis->bdev->bd_disk;
117            if (disk->fops->swap_slot_free_notify) {
118                swp_entry_t entry;
119                unsigned long offset;
120
121                entry.val = page_private(page);
122                offset = swp_offset(entry);
123
124                SetPageDirty(page);
125                disk->fops->swap_slot_free_notify(sis->bdev,
126                        offset);
127            }
128        }
129    }
130
131out:
132    unlock_page(page);
133    bio_put(bio);
134}
135
136int generic_swapfile_activate(struct swap_info_struct *sis,
137                struct file *swap_file,
138                sector_t *span)
139{
140    struct address_space *mapping = swap_file->f_mapping;
141    struct inode *inode = mapping->host;
142    unsigned blocks_per_page;
143    unsigned long page_no;
144    unsigned blkbits;
145    sector_t probe_block;
146    sector_t last_block;
147    sector_t lowest_block = -1;
148    sector_t highest_block = 0;
149    int nr_extents = 0;
150    int ret;
151
152    blkbits = inode->i_blkbits;
153    blocks_per_page = PAGE_SIZE >> blkbits;
154
155    /*
156     * Map all the blocks into the extent list. This code doesn't try
157     * to be very smart.
158     */
159    probe_block = 0;
160    page_no = 0;
161    last_block = i_size_read(inode) >> blkbits;
162    while ((probe_block + blocks_per_page) <= last_block &&
163            page_no < sis->max) {
164        unsigned block_in_page;
165        sector_t first_block;
166
167        first_block = bmap(inode, probe_block);
168        if (first_block == 0)
169            goto bad_bmap;
170
171        /*
172         * It must be PAGE_SIZE aligned on-disk
173         */
174        if (first_block & (blocks_per_page - 1)) {
175            probe_block++;
176            goto reprobe;
177        }
178
179        for (block_in_page = 1; block_in_page < blocks_per_page;
180                    block_in_page++) {
181            sector_t block;
182
183            block = bmap(inode, probe_block + block_in_page);
184            if (block == 0)
185                goto bad_bmap;
186            if (block != first_block + block_in_page) {
187                /* Discontiguity */
188                probe_block++;
189                goto reprobe;
190            }
191        }
192
193        first_block >>= (PAGE_SHIFT - blkbits);
194        if (page_no) { /* exclude the header page */
195            if (first_block < lowest_block)
196                lowest_block = first_block;
197            if (first_block > highest_block)
198                highest_block = first_block;
199        }
200
201        /*
202         * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
203         */
204        ret = add_swap_extent(sis, page_no, 1, first_block);
205        if (ret < 0)
206            goto out;
207        nr_extents += ret;
208        page_no++;
209        probe_block += blocks_per_page;
210reprobe:
211        continue;
212    }
213    ret = nr_extents;
214    *span = 1 + highest_block - lowest_block;
215    if (page_no == 0)
216        page_no = 1; /* force Empty message */
217    sis->max = page_no;
218    sis->pages = page_no - 1;
219    sis->highest_bit = page_no - 1;
220out:
221    return ret;
222bad_bmap:
223    printk(KERN_ERR "swapon: swapfile has holes\n");
224    ret = -EINVAL;
225    goto out;
226}
227
228/*
229 * We may have stale swap cache pages in memory: notice
230 * them here and get rid of the unnecessary final write.
231 */
232int swap_writepage(struct page *page, struct writeback_control *wbc)
233{
234    int ret = 0;
235
236    if (try_to_free_swap(page)) {
237        unlock_page(page);
238        goto out;
239    }
240    if (frontswap_store(page) == 0) {
241        set_page_writeback(page);
242        unlock_page(page);
243        end_page_writeback(page);
244        goto out;
245    }
246    ret = __swap_writepage(page, wbc, end_swap_bio_write);
247out:
248    return ret;
249}
250
251int __swap_writepage(struct page *page, struct writeback_control *wbc,
252    void (*end_write_func)(struct bio *, int))
253{
254    struct bio *bio;
255    int ret = 0, rw = WRITE;
256    struct swap_info_struct *sis = page_swap_info(page);
257
258    if (sis->flags & SWP_FILE) {
259        struct kiocb kiocb;
260        struct file *swap_file = sis->swap_file;
261        struct address_space *mapping = swap_file->f_mapping;
262        struct iovec iov = {
263            .iov_base = kmap(page),
264            .iov_len = PAGE_SIZE,
265        };
266
267        init_sync_kiocb(&kiocb, swap_file);
268        kiocb.ki_pos = page_file_offset(page);
269        kiocb.ki_nbytes = PAGE_SIZE;
270
271        set_page_writeback(page);
272        unlock_page(page);
273        ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
274                        &kiocb, &iov,
275                        kiocb.ki_pos, 1);
276        kunmap(page);
277        if (ret == PAGE_SIZE) {
278            count_vm_event(PSWPOUT);
279            ret = 0;
280        } else {
281            /*
282             * In the case of swap-over-nfs, this can be a
283             * temporary failure if the system has limited
284             * memory for allocating transmit buffers.
285             * Mark the page dirty and avoid
286             * rotate_reclaimable_page but rate-limit the
287             * messages but do not flag PageError like
288             * the normal direct-to-bio case as it could
289             * be temporary.
290             */
291            set_page_dirty(page);
292            ClearPageReclaim(page);
293            pr_err_ratelimited("Write error on dio swapfile (%Lu)\n",
294                page_file_offset(page));
295        }
296        end_page_writeback(page);
297        return ret;
298    }
299
300    bio = get_swap_bio(GFP_NOIO, page, end_write_func);
301    if (bio == NULL) {
302        set_page_dirty(page);
303        unlock_page(page);
304        ret = -ENOMEM;
305        goto out;
306    }
307    if (wbc->sync_mode == WB_SYNC_ALL)
308        rw |= REQ_SYNC;
309    count_vm_event(PSWPOUT);
310    set_page_writeback(page);
311    unlock_page(page);
312    submit_bio(rw, bio);
313out:
314    return ret;
315}
316
317int swap_readpage(struct page *page)
318{
319    struct bio *bio;
320    int ret = 0;
321    struct swap_info_struct *sis = page_swap_info(page);
322
323    VM_BUG_ON(!PageLocked(page));
324    VM_BUG_ON(PageUptodate(page));
325    if (frontswap_load(page) == 0) {
326        SetPageUptodate(page);
327        unlock_page(page);
328        goto out;
329    }
330
331    if (sis->flags & SWP_FILE) {
332        struct file *swap_file = sis->swap_file;
333        struct address_space *mapping = swap_file->f_mapping;
334
335        ret = mapping->a_ops->readpage(swap_file, page);
336        if (!ret)
337            count_vm_event(PSWPIN);
338        return ret;
339    }
340
341    bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
342    if (bio == NULL) {
343        unlock_page(page);
344        ret = -ENOMEM;
345        goto out;
346    }
347    count_vm_event(PSWPIN);
348    submit_bio(READ, bio);
349out:
350    return ret;
351}
352
353int swap_set_page_dirty(struct page *page)
354{
355    struct swap_info_struct *sis = page_swap_info(page);
356
357    if (sis->flags & SWP_FILE) {
358        struct address_space *mapping = sis->swap_file->f_mapping;
359        return mapping->a_ops->set_page_dirty(page);
360    } else {
361        return __set_page_dirty_no_writeback(page);
362    }
363}
364

Archive Download this file



interactive