Root/fs/ext4/resize.c

1/*
2 * linux/fs/ext4/resize.c
3 *
4 * Support for resizing an ext4 filesystem while it is mounted.
5 *
6 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
7 *
8 * This could probably be made into a module, because it is not often in use.
9 */
10
11
12#define EXT4FS_DEBUG
13
14#include <linux/errno.h>
15#include <linux/slab.h>
16
17#include "ext4_jbd2.h"
18
19#define outside(b, first, last) ((b) < (first) || (b) >= (last))
20#define inside(b, first, last) ((b) >= (first) && (b) < (last))
21
22static int verify_group_input(struct super_block *sb,
23                  struct ext4_new_group_data *input)
24{
25    struct ext4_sb_info *sbi = EXT4_SB(sb);
26    struct ext4_super_block *es = sbi->s_es;
27    ext4_fsblk_t start = ext4_blocks_count(es);
28    ext4_fsblk_t end = start + input->blocks_count;
29    ext4_group_t group = input->group;
30    ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
31    unsigned overhead = ext4_bg_has_super(sb, group) ?
32        (1 + ext4_bg_num_gdb(sb, group) +
33         le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
34    ext4_fsblk_t metaend = start + overhead;
35    struct buffer_head *bh = NULL;
36    ext4_grpblk_t free_blocks_count, offset;
37    int err = -EINVAL;
38
39    input->free_blocks_count = free_blocks_count =
40        input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
41
42    if (test_opt(sb, DEBUG))
43        printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks "
44               "(%d free, %u reserved)\n",
45               ext4_bg_has_super(sb, input->group) ? "normal" :
46               "no-super", input->group, input->blocks_count,
47               free_blocks_count, input->reserved_blocks);
48
49    ext4_get_group_no_and_offset(sb, start, NULL, &offset);
50    if (group != sbi->s_groups_count)
51        ext4_warning(sb, __func__,
52                 "Cannot add at group %u (only %u groups)",
53                 input->group, sbi->s_groups_count);
54    else if (offset != 0)
55            ext4_warning(sb, __func__, "Last group not full");
56    else if (input->reserved_blocks > input->blocks_count / 5)
57        ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
58                 input->reserved_blocks);
59    else if (free_blocks_count < 0)
60        ext4_warning(sb, __func__, "Bad blocks count %u",
61                 input->blocks_count);
62    else if (!(bh = sb_bread(sb, end - 1)))
63        ext4_warning(sb, __func__,
64                 "Cannot read last block (%llu)",
65                 end - 1);
66    else if (outside(input->block_bitmap, start, end))
67        ext4_warning(sb, __func__,
68                 "Block bitmap not in group (block %llu)",
69                 (unsigned long long)input->block_bitmap);
70    else if (outside(input->inode_bitmap, start, end))
71        ext4_warning(sb, __func__,
72                 "Inode bitmap not in group (block %llu)",
73                 (unsigned long long)input->inode_bitmap);
74    else if (outside(input->inode_table, start, end) ||
75         outside(itend - 1, start, end))
76        ext4_warning(sb, __func__,
77                 "Inode table not in group (blocks %llu-%llu)",
78                 (unsigned long long)input->inode_table, itend - 1);
79    else if (input->inode_bitmap == input->block_bitmap)
80        ext4_warning(sb, __func__,
81                 "Block bitmap same as inode bitmap (%llu)",
82                 (unsigned long long)input->block_bitmap);
83    else if (inside(input->block_bitmap, input->inode_table, itend))
84        ext4_warning(sb, __func__,
85                 "Block bitmap (%llu) in inode table (%llu-%llu)",
86                 (unsigned long long)input->block_bitmap,
87                 (unsigned long long)input->inode_table, itend - 1);
88    else if (inside(input->inode_bitmap, input->inode_table, itend))
89        ext4_warning(sb, __func__,
90                 "Inode bitmap (%llu) in inode table (%llu-%llu)",
91                 (unsigned long long)input->inode_bitmap,
92                 (unsigned long long)input->inode_table, itend - 1);
93    else if (inside(input->block_bitmap, start, metaend))
94        ext4_warning(sb, __func__,
95                 "Block bitmap (%llu) in GDT table"
96                 " (%llu-%llu)",
97                 (unsigned long long)input->block_bitmap,
98                 start, metaend - 1);
99    else if (inside(input->inode_bitmap, start, metaend))
100        ext4_warning(sb, __func__,
101                 "Inode bitmap (%llu) in GDT table"
102                 " (%llu-%llu)",
103                 (unsigned long long)input->inode_bitmap,
104                 start, metaend - 1);
105    else if (inside(input->inode_table, start, metaend) ||
106         inside(itend - 1, start, metaend))
107        ext4_warning(sb, __func__,
108                 "Inode table (%llu-%llu) overlaps"
109                 "GDT table (%llu-%llu)",
110                 (unsigned long long)input->inode_table,
111                 itend - 1, start, metaend - 1);
112    else
113        err = 0;
114    brelse(bh);
115
116    return err;
117}
118
119static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
120                  ext4_fsblk_t blk)
121{
122    struct buffer_head *bh;
123    int err;
124
125    bh = sb_getblk(sb, blk);
126    if (!bh)
127        return ERR_PTR(-EIO);
128    if ((err = ext4_journal_get_write_access(handle, bh))) {
129        brelse(bh);
130        bh = ERR_PTR(err);
131    } else {
132        lock_buffer(bh);
133        memset(bh->b_data, 0, sb->s_blocksize);
134        set_buffer_uptodate(bh);
135        unlock_buffer(bh);
136    }
137
138    return bh;
139}
140
141/*
142 * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
143 * If that fails, restart the transaction & regain write access for the
144 * buffer head which is used for block_bitmap modifications.
145 */
146static int extend_or_restart_transaction(handle_t *handle, int thresh,
147                     struct buffer_head *bh)
148{
149    int err;
150
151    if (ext4_handle_has_enough_credits(handle, thresh))
152        return 0;
153
154    err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
155    if (err < 0)
156        return err;
157    if (err) {
158        if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
159            return err;
160        if ((err = ext4_journal_get_write_access(handle, bh)))
161            return err;
162    }
163
164    return 0;
165}
166
167/*
168 * Set up the block and inode bitmaps, and the inode table for the new group.
169 * This doesn't need to be part of the main transaction, since we are only
170 * changing blocks outside the actual filesystem. We still do journaling to
171 * ensure the recovery is correct in case of a failure just after resize.
172 * If any part of this fails, we simply abort the resize.
173 */
174static int setup_new_group_blocks(struct super_block *sb,
175                  struct ext4_new_group_data *input)
176{
177    struct ext4_sb_info *sbi = EXT4_SB(sb);
178    ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group);
179    int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
180        le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
181    unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
182    struct buffer_head *bh;
183    handle_t *handle;
184    ext4_fsblk_t block;
185    ext4_grpblk_t bit;
186    int i;
187    int err = 0, err2;
188
189    /* This transaction may be extended/restarted along the way */
190    handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
191
192    if (IS_ERR(handle))
193        return PTR_ERR(handle);
194
195    mutex_lock(&sbi->s_resize_lock);
196    if (input->group != sbi->s_groups_count) {
197        err = -EBUSY;
198        goto exit_journal;
199    }
200
201    if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
202        err = PTR_ERR(bh);
203        goto exit_journal;
204    }
205
206    if (ext4_bg_has_super(sb, input->group)) {
207        ext4_debug("mark backup superblock %#04llx (+0)\n", start);
208        ext4_set_bit(0, bh->b_data);
209    }
210
211    /* Copy all of the GDT blocks into the backup in this group */
212    for (i = 0, bit = 1, block = start + 1;
213         i < gdblocks; i++, block++, bit++) {
214        struct buffer_head *gdb;
215
216        ext4_debug("update backup group %#04llx (+%d)\n", block, bit);
217
218        if ((err = extend_or_restart_transaction(handle, 1, bh)))
219            goto exit_bh;
220
221        gdb = sb_getblk(sb, block);
222        if (!gdb) {
223            err = -EIO;
224            goto exit_bh;
225        }
226        if ((err = ext4_journal_get_write_access(handle, gdb))) {
227            brelse(gdb);
228            goto exit_bh;
229        }
230        lock_buffer(gdb);
231        memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
232        set_buffer_uptodate(gdb);
233        unlock_buffer(gdb);
234        ext4_handle_dirty_metadata(handle, NULL, gdb);
235        ext4_set_bit(bit, bh->b_data);
236        brelse(gdb);
237    }
238
239    /* Zero out all of the reserved backup group descriptor table blocks */
240    for (i = 0, bit = gdblocks + 1, block = start + bit;
241         i < reserved_gdb; i++, block++, bit++) {
242        struct buffer_head *gdb;
243
244        ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
245
246        if ((err = extend_or_restart_transaction(handle, 1, bh)))
247            goto exit_bh;
248
249        if (IS_ERR(gdb = bclean(handle, sb, block))) {
250            err = PTR_ERR(bh);
251            goto exit_bh;
252        }
253        ext4_handle_dirty_metadata(handle, NULL, gdb);
254        ext4_set_bit(bit, bh->b_data);
255        brelse(gdb);
256    }
257    ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
258           input->block_bitmap - start);
259    ext4_set_bit(input->block_bitmap - start, bh->b_data);
260    ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap,
261           input->inode_bitmap - start);
262    ext4_set_bit(input->inode_bitmap - start, bh->b_data);
263
264    /* Zero out all of the inode table blocks */
265    for (i = 0, block = input->inode_table, bit = block - start;
266         i < sbi->s_itb_per_group; i++, bit++, block++) {
267        struct buffer_head *it;
268
269        ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
270
271        if ((err = extend_or_restart_transaction(handle, 1, bh)))
272            goto exit_bh;
273
274        if (IS_ERR(it = bclean(handle, sb, block))) {
275            err = PTR_ERR(it);
276            goto exit_bh;
277        }
278        ext4_handle_dirty_metadata(handle, NULL, it);
279        brelse(it);
280        ext4_set_bit(bit, bh->b_data);
281    }
282
283    if ((err = extend_or_restart_transaction(handle, 2, bh)))
284        goto exit_bh;
285
286    mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
287    ext4_handle_dirty_metadata(handle, NULL, bh);
288    brelse(bh);
289    /* Mark unused entries in inode bitmap used */
290    ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
291           input->inode_bitmap, input->inode_bitmap - start);
292    if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
293        err = PTR_ERR(bh);
294        goto exit_journal;
295    }
296
297    mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
298            bh->b_data);
299    ext4_handle_dirty_metadata(handle, NULL, bh);
300exit_bh:
301    brelse(bh);
302
303exit_journal:
304    mutex_unlock(&sbi->s_resize_lock);
305    if ((err2 = ext4_journal_stop(handle)) && !err)
306        err = err2;
307
308    return err;
309}
310
311/*
312 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
313 * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before
314 * calling this for the first time. In a sparse filesystem it will be the
315 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
316 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
317 */
318static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
319                  unsigned *five, unsigned *seven)
320{
321    unsigned *min = three;
322    int mult = 3;
323    unsigned ret;
324
325    if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
326                    EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
327        ret = *min;
328        *min += 1;
329        return ret;
330    }
331
332    if (*five < *min) {
333        min = five;
334        mult = 5;
335    }
336    if (*seven < *min) {
337        min = seven;
338        mult = 7;
339    }
340
341    ret = *min;
342    *min *= mult;
343
344    return ret;
345}
346
347/*
348 * Check that all of the backup GDT blocks are held in the primary GDT block.
349 * It is assumed that they are stored in group order. Returns the number of
350 * groups in current filesystem that have BACKUPS, or -ve error code.
351 */
352static int verify_reserved_gdb(struct super_block *sb,
353                   struct buffer_head *primary)
354{
355    const ext4_fsblk_t blk = primary->b_blocknr;
356    const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
357    unsigned three = 1;
358    unsigned five = 5;
359    unsigned seven = 7;
360    unsigned grp;
361    __le32 *p = (__le32 *)primary->b_data;
362    int gdbackups = 0;
363
364    while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
365        if (le32_to_cpu(*p++) !=
366            grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
367            ext4_warning(sb, __func__,
368                     "reserved GDT %llu"
369                     " missing grp %d (%llu)",
370                     blk, grp,
371                     grp *
372                     (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
373                     blk);
374            return -EINVAL;
375        }
376        if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
377            return -EFBIG;
378    }
379
380    return gdbackups;
381}
382
383/*
384 * Called when we need to bring a reserved group descriptor table block into
385 * use from the resize inode. The primary copy of the new GDT block currently
386 * is an indirect block (under the double indirect block in the resize inode).
387 * The new backup GDT blocks will be stored as leaf blocks in this indirect
388 * block, in group order. Even though we know all the block numbers we need,
389 * we check to ensure that the resize inode has actually reserved these blocks.
390 *
391 * Don't need to update the block bitmaps because the blocks are still in use.
392 *
393 * We get all of the error cases out of the way, so that we are sure to not
394 * fail once we start modifying the data on disk, because JBD has no rollback.
395 */
396static int add_new_gdb(handle_t *handle, struct inode *inode,
397               struct ext4_new_group_data *input,
398               struct buffer_head **primary)
399{
400    struct super_block *sb = inode->i_sb;
401    struct ext4_super_block *es = EXT4_SB(sb)->s_es;
402    unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
403    ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
404    struct buffer_head **o_group_desc, **n_group_desc;
405    struct buffer_head *dind;
406    int gdbackups;
407    struct ext4_iloc iloc;
408    __le32 *data;
409    int err;
410
411    if (test_opt(sb, DEBUG))
412        printk(KERN_DEBUG
413               "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
414               gdb_num);
415
416    /*
417     * If we are not using the primary superblock/GDT copy don't resize,
418         * because the user tools have no way of handling this. Probably a
419         * bad time to do it anyways.
420         */
421    if (EXT4_SB(sb)->s_sbh->b_blocknr !=
422        le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
423        ext4_warning(sb, __func__,
424            "won't resize using backup superblock at %llu",
425            (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
426        return -EPERM;
427    }
428
429    *primary = sb_bread(sb, gdblock);
430    if (!*primary)
431        return -EIO;
432
433    if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
434        err = gdbackups;
435        goto exit_bh;
436    }
437
438    data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
439    dind = sb_bread(sb, le32_to_cpu(*data));
440    if (!dind) {
441        err = -EIO;
442        goto exit_bh;
443    }
444
445    data = (__le32 *)dind->b_data;
446    if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
447        ext4_warning(sb, __func__,
448                 "new group %u GDT block %llu not reserved",
449                 input->group, gdblock);
450        err = -EINVAL;
451        goto exit_dind;
452    }
453
454    if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh)))
455        goto exit_dind;
456
457    if ((err = ext4_journal_get_write_access(handle, *primary)))
458        goto exit_sbh;
459
460    if ((err = ext4_journal_get_write_access(handle, dind)))
461        goto exit_primary;
462
463    /* ext4_reserve_inode_write() gets a reference on the iloc */
464    if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
465        goto exit_dindj;
466
467    n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
468            GFP_NOFS);
469    if (!n_group_desc) {
470        err = -ENOMEM;
471        ext4_warning(sb, __func__,
472                  "not enough memory for %lu groups", gdb_num + 1);
473        goto exit_inode;
474    }
475
476    /*
477     * Finally, we have all of the possible failures behind us...
478     *
479     * Remove new GDT block from inode double-indirect block and clear out
480     * the new GDT block for use (which also "frees" the backup GDT blocks
481     * from the reserved inode). We don't need to change the bitmaps for
482     * these blocks, because they are marked as in-use from being in the
483     * reserved inode, and will become GDT blocks (primary and backup).
484     */
485    data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
486    ext4_handle_dirty_metadata(handle, NULL, dind);
487    brelse(dind);
488    inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
489    ext4_mark_iloc_dirty(handle, inode, &iloc);
490    memset((*primary)->b_data, 0, sb->s_blocksize);
491    ext4_handle_dirty_metadata(handle, NULL, *primary);
492
493    o_group_desc = EXT4_SB(sb)->s_group_desc;
494    memcpy(n_group_desc, o_group_desc,
495           EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
496    n_group_desc[gdb_num] = *primary;
497    EXT4_SB(sb)->s_group_desc = n_group_desc;
498    EXT4_SB(sb)->s_gdb_count++;
499    kfree(o_group_desc);
500
501    le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
502    ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
503
504    return 0;
505
506exit_inode:
507    /* ext4_journal_release_buffer(handle, iloc.bh); */
508    brelse(iloc.bh);
509exit_dindj:
510    /* ext4_journal_release_buffer(handle, dind); */
511exit_primary:
512    /* ext4_journal_release_buffer(handle, *primary); */
513exit_sbh:
514    /* ext4_journal_release_buffer(handle, *primary); */
515exit_dind:
516    brelse(dind);
517exit_bh:
518    brelse(*primary);
519
520    ext4_debug("leaving with error %d\n", err);
521    return err;
522}
523
524/*
525 * Called when we are adding a new group which has a backup copy of each of
526 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
527 * We need to add these reserved backup GDT blocks to the resize inode, so
528 * that they are kept for future resizing and not allocated to files.
529 *
530 * Each reserved backup GDT block will go into a different indirect block.
531 * The indirect blocks are actually the primary reserved GDT blocks,
532 * so we know in advance what their block numbers are. We only get the
533 * double-indirect block to verify it is pointing to the primary reserved
534 * GDT blocks so we don't overwrite a data block by accident. The reserved
535 * backup GDT blocks are stored in their reserved primary GDT block.
536 */
537static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
538                  struct ext4_new_group_data *input)
539{
540    struct super_block *sb = inode->i_sb;
541    int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
542    struct buffer_head **primary;
543    struct buffer_head *dind;
544    struct ext4_iloc iloc;
545    ext4_fsblk_t blk;
546    __le32 *data, *end;
547    int gdbackups = 0;
548    int res, i;
549    int err;
550
551    primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
552    if (!primary)
553        return -ENOMEM;
554
555    data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
556    dind = sb_bread(sb, le32_to_cpu(*data));
557    if (!dind) {
558        err = -EIO;
559        goto exit_free;
560    }
561
562    blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
563    data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
564                     EXT4_ADDR_PER_BLOCK(sb));
565    end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
566
567    /* Get each reserved primary GDT block and verify it holds backups */
568    for (res = 0; res < reserved_gdb; res++, blk++) {
569        if (le32_to_cpu(*data) != blk) {
570            ext4_warning(sb, __func__,
571                     "reserved block %llu"
572                     " not at offset %ld",
573                     blk,
574                     (long)(data - (__le32 *)dind->b_data));
575            err = -EINVAL;
576            goto exit_bh;
577        }
578        primary[res] = sb_bread(sb, blk);
579        if (!primary[res]) {
580            err = -EIO;
581            goto exit_bh;
582        }
583        if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
584            brelse(primary[res]);
585            err = gdbackups;
586            goto exit_bh;
587        }
588        if (++data >= end)
589            data = (__le32 *)dind->b_data;
590    }
591
592    for (i = 0; i < reserved_gdb; i++) {
593        if ((err = ext4_journal_get_write_access(handle, primary[i]))) {
594            /*
595            int j;
596            for (j = 0; j < i; j++)
597                ext4_journal_release_buffer(handle, primary[j]);
598             */
599            goto exit_bh;
600        }
601    }
602
603    if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
604        goto exit_bh;
605
606    /*
607     * Finally we can add each of the reserved backup GDT blocks from
608     * the new group to its reserved primary GDT block.
609     */
610    blk = input->group * EXT4_BLOCKS_PER_GROUP(sb);
611    for (i = 0; i < reserved_gdb; i++) {
612        int err2;
613        data = (__le32 *)primary[i]->b_data;
614        /* printk("reserving backup %lu[%u] = %lu\n",
615               primary[i]->b_blocknr, gdbackups,
616               blk + primary[i]->b_blocknr); */
617        data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
618        err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
619        if (!err)
620            err = err2;
621    }
622    inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
623    ext4_mark_iloc_dirty(handle, inode, &iloc);
624
625exit_bh:
626    while (--res >= 0)
627        brelse(primary[res]);
628    brelse(dind);
629
630exit_free:
631    kfree(primary);
632
633    return err;
634}
635
636/*
637 * Update the backup copies of the ext4 metadata. These don't need to be part
638 * of the main resize transaction, because e2fsck will re-write them if there
639 * is a problem (basically only OOM will cause a problem). However, we
640 * _should_ update the backups if possible, in case the primary gets trashed
641 * for some reason and we need to run e2fsck from a backup superblock. The
642 * important part is that the new block and inode counts are in the backup
643 * superblocks, and the location of the new group metadata in the GDT backups.
644 *
645 * We do not need take the s_resize_lock for this, because these
646 * blocks are not otherwise touched by the filesystem code when it is
647 * mounted. We don't need to worry about last changing from
648 * sbi->s_groups_count, because the worst that can happen is that we
649 * do not copy the full number of backups at this time. The resize
650 * which changed s_groups_count will backup again.
651 */
652static void update_backups(struct super_block *sb,
653               int blk_off, char *data, int size)
654{
655    struct ext4_sb_info *sbi = EXT4_SB(sb);
656    const ext4_group_t last = sbi->s_groups_count;
657    const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
658    unsigned three = 1;
659    unsigned five = 5;
660    unsigned seven = 7;
661    ext4_group_t group;
662    int rest = sb->s_blocksize - size;
663    handle_t *handle;
664    int err = 0, err2;
665
666    handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
667    if (IS_ERR(handle)) {
668        group = 1;
669        err = PTR_ERR(handle);
670        goto exit_err;
671    }
672
673    while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) {
674        struct buffer_head *bh;
675
676        /* Out of journal space, and can't get more - abort - so sad */
677        if (ext4_handle_valid(handle) &&
678            handle->h_buffer_credits == 0 &&
679            ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
680            (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
681            break;
682
683        bh = sb_getblk(sb, group * bpg + blk_off);
684        if (!bh) {
685            err = -EIO;
686            break;
687        }
688        ext4_debug("update metadata backup %#04lx\n",
689              (unsigned long)bh->b_blocknr);
690        if ((err = ext4_journal_get_write_access(handle, bh)))
691            break;
692        lock_buffer(bh);
693        memcpy(bh->b_data, data, size);
694        if (rest)
695            memset(bh->b_data + size, 0, rest);
696        set_buffer_uptodate(bh);
697        unlock_buffer(bh);
698        ext4_handle_dirty_metadata(handle, NULL, bh);
699        brelse(bh);
700    }
701    if ((err2 = ext4_journal_stop(handle)) && !err)
702        err = err2;
703
704    /*
705     * Ugh! Need to have e2fsck write the backup copies. It is too
706     * late to revert the resize, we shouldn't fail just because of
707     * the backup copies (they are only needed in case of corruption).
708     *
709     * However, if we got here we have a journal problem too, so we
710     * can't really start a transaction to mark the superblock.
711     * Chicken out and just set the flag on the hope it will be written
712     * to disk, and if not - we will simply wait until next fsck.
713     */
714exit_err:
715    if (err) {
716        ext4_warning(sb, __func__,
717                 "can't update backup for group %u (err %d), "
718                 "forcing fsck on next reboot", group, err);
719        sbi->s_mount_state &= ~EXT4_VALID_FS;
720        sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
721        mark_buffer_dirty(sbi->s_sbh);
722    }
723}
724
725/* Add group descriptor data to an existing or new group descriptor block.
726 * Ensure we handle all possible error conditions _before_ we start modifying
727 * the filesystem, because we cannot abort the transaction and not have it
728 * write the data to disk.
729 *
730 * If we are on a GDT block boundary, we need to get the reserved GDT block.
731 * Otherwise, we may need to add backup GDT blocks for a sparse group.
732 *
733 * We only need to hold the superblock lock while we are actually adding
734 * in the new group's counts to the superblock. Prior to that we have
735 * not really "added" the group at all. We re-check that we are still
736 * adding in the last group in case things have changed since verifying.
737 */
738int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
739{
740    struct ext4_sb_info *sbi = EXT4_SB(sb);
741    struct ext4_super_block *es = sbi->s_es;
742    int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
743        le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
744    struct buffer_head *primary = NULL;
745    struct ext4_group_desc *gdp;
746    struct inode *inode = NULL;
747    handle_t *handle;
748    int gdb_off, gdb_num;
749    int num_grp_locked = 0;
750    int err, err2;
751
752    gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
753    gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
754
755    if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
756                    EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
757        ext4_warning(sb, __func__,
758                 "Can't resize non-sparse filesystem further");
759        return -EPERM;
760    }
761
762    if (ext4_blocks_count(es) + input->blocks_count <
763        ext4_blocks_count(es)) {
764        ext4_warning(sb, __func__, "blocks_count overflow");
765        return -EINVAL;
766    }
767
768    if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
769        le32_to_cpu(es->s_inodes_count)) {
770        ext4_warning(sb, __func__, "inodes_count overflow");
771        return -EINVAL;
772    }
773
774    if (reserved_gdb || gdb_off == 0) {
775        if (!EXT4_HAS_COMPAT_FEATURE(sb,
776                         EXT4_FEATURE_COMPAT_RESIZE_INODE)
777            || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
778            ext4_warning(sb, __func__,
779                     "No reserved GDT blocks, can't resize");
780            return -EPERM;
781        }
782        inode = ext4_iget(sb, EXT4_RESIZE_INO);
783        if (IS_ERR(inode)) {
784            ext4_warning(sb, __func__,
785                     "Error opening resize inode");
786            return PTR_ERR(inode);
787        }
788    }
789
790
791    if ((err = verify_group_input(sb, input)))
792        goto exit_put;
793
794    if ((err = setup_new_group_blocks(sb, input)))
795        goto exit_put;
796
797    /*
798     * We will always be modifying at least the superblock and a GDT
799     * block. If we are adding a group past the last current GDT block,
800     * we will also modify the inode and the dindirect block. If we
801     * are adding a group with superblock/GDT backups we will also
802     * modify each of the reserved GDT dindirect blocks.
803     */
804    handle = ext4_journal_start_sb(sb,
805                       ext4_bg_has_super(sb, input->group) ?
806                       3 + reserved_gdb : 4);
807    if (IS_ERR(handle)) {
808        err = PTR_ERR(handle);
809        goto exit_put;
810    }
811
812    mutex_lock(&sbi->s_resize_lock);
813    if (input->group != sbi->s_groups_count) {
814        ext4_warning(sb, __func__,
815                 "multiple resizers run on filesystem!");
816        err = -EBUSY;
817        goto exit_journal;
818    }
819
820    if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
821        goto exit_journal;
822
823        /*
824         * We will only either add reserved group blocks to a backup group
825         * or remove reserved blocks for the first group in a new group block.
826         * Doing both would be mean more complex code, and sane people don't
827         * use non-sparse filesystems anymore. This is already checked above.
828         */
829    if (gdb_off) {
830        primary = sbi->s_group_desc[gdb_num];
831        if ((err = ext4_journal_get_write_access(handle, primary)))
832            goto exit_journal;
833
834        if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) &&
835            (err = reserve_backup_gdb(handle, inode, input)))
836            goto exit_journal;
837    } else if ((err = add_new_gdb(handle, inode, input, &primary)))
838        goto exit_journal;
839
840        /*
841         * OK, now we've set up the new group. Time to make it active.
842         *
843         * We do not lock all allocations via s_resize_lock
844         * so we have to be safe wrt. concurrent accesses the group
845         * data. So we need to be careful to set all of the relevant
846         * group descriptor data etc. *before* we enable the group.
847         *
848         * The key field here is sbi->s_groups_count: as long as
849         * that retains its old value, nobody is going to access the new
850         * group.
851         *
852         * So first we update all the descriptor metadata for the new
853         * group; then we update the total disk blocks count; then we
854         * update the groups count to enable the group; then finally we
855         * update the free space counts so that the system can start
856         * using the new disk blocks.
857         */
858
859    num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
860    /* Update group descriptor block for new group */
861    gdp = (struct ext4_group_desc *)((char *)primary->b_data +
862                     gdb_off * EXT4_DESC_SIZE(sb));
863
864    memset(gdp, 0, EXT4_DESC_SIZE(sb));
865    ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
866    ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
867    ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
868    ext4_free_blks_set(sb, gdp, input->free_blocks_count);
869    ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
870    gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
871    gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
872
873    /*
874     * We can allocate memory for mb_alloc based on the new group
875     * descriptor
876     */
877    err = ext4_mb_add_groupinfo(sb, input->group, gdp);
878    if (err) {
879        ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
880        goto exit_journal;
881    }
882
883    /*
884     * Make the new blocks and inodes valid next. We do this before
885     * increasing the group count so that once the group is enabled,
886     * all of its blocks and inodes are already valid.
887     *
888     * We always allocate group-by-group, then block-by-block or
889     * inode-by-inode within a group, so enabling these
890     * blocks/inodes before the group is live won't actually let us
891     * allocate the new space yet.
892     */
893    ext4_blocks_count_set(es, ext4_blocks_count(es) +
894        input->blocks_count);
895    le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
896
897    /*
898     * We need to protect s_groups_count against other CPUs seeing
899     * inconsistent state in the superblock.
900     *
901     * The precise rules we use are:
902     *
903     * * Writers of s_groups_count *must* hold s_resize_lock
904     * AND
905     * * Writers must perform a smp_wmb() after updating all dependent
906     * data and before modifying the groups count
907     *
908     * * Readers must hold s_resize_lock over the access
909     * OR
910     * * Readers must perform an smp_rmb() after reading the groups count
911     * and before reading any dependent data.
912     *
913     * NB. These rules can be relaxed when checking the group count
914     * while freeing data, as we can only allocate from a block
915     * group after serialising against the group count, and we can
916     * only then free after serialising in turn against that
917     * allocation.
918     */
919    smp_wmb();
920
921    /* Update the global fs size fields */
922    sbi->s_groups_count++;
923    ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
924
925    ext4_handle_dirty_metadata(handle, NULL, primary);
926
927    /* Update the reserved block counts only once the new group is
928     * active. */
929    ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
930        input->reserved_blocks);
931
932    /* Update the free space counts */
933    percpu_counter_add(&sbi->s_freeblocks_counter,
934               input->free_blocks_count);
935    percpu_counter_add(&sbi->s_freeinodes_counter,
936               EXT4_INODES_PER_GROUP(sb));
937
938    if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
939        ext4_group_t flex_group;
940        flex_group = ext4_flex_group(sbi, input->group);
941        atomic_add(input->free_blocks_count,
942               &sbi->s_flex_groups[flex_group].free_blocks);
943        atomic_add(EXT4_INODES_PER_GROUP(sb),
944               &sbi->s_flex_groups[flex_group].free_inodes);
945    }
946
947    ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
948    sb->s_dirt = 1;
949
950exit_journal:
951    mutex_unlock(&sbi->s_resize_lock);
952    if ((err2 = ext4_journal_stop(handle)) && !err)
953        err = err2;
954    if (!err) {
955        update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
956                   sizeof(struct ext4_super_block));
957        update_backups(sb, primary->b_blocknr, primary->b_data,
958                   primary->b_size);
959    }
960exit_put:
961    iput(inode);
962    return err;
963} /* ext4_group_add */
964
965/*
966 * Extend the filesystem to the new number of blocks specified. This entry
967 * point is only used to extend the current filesystem to the end of the last
968 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
969 * for emergencies (because it has no dependencies on reserved blocks).
970 *
971 * If we _really_ wanted, we could use default values to call ext4_group_add()
972 * allow the "remount" trick to work for arbitrary resizing, assuming enough
973 * GDT blocks are reserved to grow to the desired size.
974 */
975int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
976              ext4_fsblk_t n_blocks_count)
977{
978    ext4_fsblk_t o_blocks_count;
979    ext4_group_t o_groups_count;
980    ext4_grpblk_t last;
981    ext4_grpblk_t add;
982    struct buffer_head *bh;
983    handle_t *handle;
984    int err;
985    ext4_group_t group;
986
987    /* We don't need to worry about locking wrt other resizers just
988     * yet: we're going to revalidate es->s_blocks_count after
989     * taking the s_resize_lock below. */
990    o_blocks_count = ext4_blocks_count(es);
991    o_groups_count = EXT4_SB(sb)->s_groups_count;
992
993    if (test_opt(sb, DEBUG))
994        printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
995               o_blocks_count, n_blocks_count);
996
997    if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
998        return 0;
999
1000    if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1001        printk(KERN_ERR "EXT4-fs: filesystem on %s:"
1002            " too large to resize to %llu blocks safely\n",
1003            sb->s_id, n_blocks_count);
1004        if (sizeof(sector_t) < 8)
1005            ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled");
1006        return -EINVAL;
1007    }
1008
1009    if (n_blocks_count < o_blocks_count) {
1010        ext4_warning(sb, __func__,
1011                 "can't shrink FS - resize aborted");
1012        return -EBUSY;
1013    }
1014
1015    /* Handle the remaining blocks in the last group only. */
1016    ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1017
1018    if (last == 0) {
1019        ext4_warning(sb, __func__,
1020                 "need to use ext2online to resize further");
1021        return -EPERM;
1022    }
1023
1024    add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1025
1026    if (o_blocks_count + add < o_blocks_count) {
1027        ext4_warning(sb, __func__, "blocks_count overflow");
1028        return -EINVAL;
1029    }
1030
1031    if (o_blocks_count + add > n_blocks_count)
1032        add = n_blocks_count - o_blocks_count;
1033
1034    if (o_blocks_count + add < n_blocks_count)
1035        ext4_warning(sb, __func__,
1036                 "will only finish group (%llu"
1037                 " blocks, %u new)",
1038                 o_blocks_count + add, add);
1039
1040    /* See if the device is actually as big as what was requested */
1041    bh = sb_bread(sb, o_blocks_count + add - 1);
1042    if (!bh) {
1043        ext4_warning(sb, __func__,
1044                 "can't read last block, resize aborted");
1045        return -ENOSPC;
1046    }
1047    brelse(bh);
1048
1049    /* We will update the superblock, one block bitmap, and
1050     * one group descriptor via ext4_free_blocks().
1051     */
1052    handle = ext4_journal_start_sb(sb, 3);
1053    if (IS_ERR(handle)) {
1054        err = PTR_ERR(handle);
1055        ext4_warning(sb, __func__, "error %d on journal start", err);
1056        goto exit_put;
1057    }
1058
1059    mutex_lock(&EXT4_SB(sb)->s_resize_lock);
1060    if (o_blocks_count != ext4_blocks_count(es)) {
1061        ext4_warning(sb, __func__,
1062                 "multiple resizers run on filesystem!");
1063        mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1064        ext4_journal_stop(handle);
1065        err = -EBUSY;
1066        goto exit_put;
1067    }
1068
1069    if ((err = ext4_journal_get_write_access(handle,
1070                         EXT4_SB(sb)->s_sbh))) {
1071        ext4_warning(sb, __func__,
1072                 "error %d on journal write access", err);
1073        mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1074        ext4_journal_stop(handle);
1075        goto exit_put;
1076    }
1077    ext4_blocks_count_set(es, o_blocks_count + add);
1078    ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
1079    sb->s_dirt = 1;
1080    mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1081    ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1082           o_blocks_count + add);
1083    /* We add the blocks to the bitmap and set the group need init bit */
1084    ext4_add_groupblocks(handle, sb, o_blocks_count, add);
1085    ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1086           o_blocks_count + add);
1087    if ((err = ext4_journal_stop(handle)))
1088        goto exit_put;
1089
1090    if (test_opt(sb, DEBUG))
1091        printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
1092               ext4_blocks_count(es));
1093    update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
1094               sizeof(struct ext4_super_block));
1095exit_put:
1096    return err;
1097} /* ext4_group_extend */
1098

Archive Download this file



interactive