Root/fs/ext3/resize.c

1/*
2 * linux/fs/ext3/resize.c
3 *
4 * Support for resizing an ext3 filesystem while it is mounted.
5 *
6 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
7 *
8 * This could probably be made into a module, because it is not often in use.
9 */
10
11
12#define EXT3FS_DEBUG
13
14#include <linux/ext3_jbd.h>
15
16#include <linux/errno.h>
17#include <linux/slab.h>
18
19
20#define outside(b, first, last) ((b) < (first) || (b) >= (last))
21#define inside(b, first, last) ((b) >= (first) && (b) < (last))
22
23static int verify_group_input(struct super_block *sb,
24                  struct ext3_new_group_data *input)
25{
26    struct ext3_sb_info *sbi = EXT3_SB(sb);
27    struct ext3_super_block *es = sbi->s_es;
28    ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
29    ext3_fsblk_t end = start + input->blocks_count;
30    unsigned group = input->group;
31    ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
32    unsigned overhead = ext3_bg_has_super(sb, group) ?
33        (1 + ext3_bg_num_gdb(sb, group) +
34         le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
35    ext3_fsblk_t metaend = start + overhead;
36    struct buffer_head *bh = NULL;
37    ext3_grpblk_t free_blocks_count;
38    int err = -EINVAL;
39
40    input->free_blocks_count = free_blocks_count =
41        input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
42
43    if (test_opt(sb, DEBUG))
44        printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
45               "(%d free, %u reserved)\n",
46               ext3_bg_has_super(sb, input->group) ? "normal" :
47               "no-super", input->group, input->blocks_count,
48               free_blocks_count, input->reserved_blocks);
49
50    if (group != sbi->s_groups_count)
51        ext3_warning(sb, __func__,
52                 "Cannot add at group %u (only %lu groups)",
53                 input->group, sbi->s_groups_count);
54    else if ((start - le32_to_cpu(es->s_first_data_block)) %
55         EXT3_BLOCKS_PER_GROUP(sb))
56        ext3_warning(sb, __func__, "Last group not full");
57    else if (input->reserved_blocks > input->blocks_count / 5)
58        ext3_warning(sb, __func__, "Reserved blocks too high (%u)",
59                 input->reserved_blocks);
60    else if (free_blocks_count < 0)
61        ext3_warning(sb, __func__, "Bad blocks count %u",
62                 input->blocks_count);
63    else if (!(bh = sb_bread(sb, end - 1)))
64        ext3_warning(sb, __func__,
65                 "Cannot read last block ("E3FSBLK")",
66                 end - 1);
67    else if (outside(input->block_bitmap, start, end))
68        ext3_warning(sb, __func__,
69                 "Block bitmap not in group (block %u)",
70                 input->block_bitmap);
71    else if (outside(input->inode_bitmap, start, end))
72        ext3_warning(sb, __func__,
73                 "Inode bitmap not in group (block %u)",
74                 input->inode_bitmap);
75    else if (outside(input->inode_table, start, end) ||
76             outside(itend - 1, start, end))
77        ext3_warning(sb, __func__,
78                 "Inode table not in group (blocks %u-"E3FSBLK")",
79                 input->inode_table, itend - 1);
80    else if (input->inode_bitmap == input->block_bitmap)
81        ext3_warning(sb, __func__,
82                 "Block bitmap same as inode bitmap (%u)",
83                 input->block_bitmap);
84    else if (inside(input->block_bitmap, input->inode_table, itend))
85        ext3_warning(sb, __func__,
86                 "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
87                 input->block_bitmap, input->inode_table, itend-1);
88    else if (inside(input->inode_bitmap, input->inode_table, itend))
89        ext3_warning(sb, __func__,
90                 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
91                 input->inode_bitmap, input->inode_table, itend-1);
92    else if (inside(input->block_bitmap, start, metaend))
93        ext3_warning(sb, __func__,
94                 "Block bitmap (%u) in GDT table"
95                 " ("E3FSBLK"-"E3FSBLK")",
96                 input->block_bitmap, start, metaend - 1);
97    else if (inside(input->inode_bitmap, start, metaend))
98        ext3_warning(sb, __func__,
99                 "Inode bitmap (%u) in GDT table"
100                 " ("E3FSBLK"-"E3FSBLK")",
101                 input->inode_bitmap, start, metaend - 1);
102    else if (inside(input->inode_table, start, metaend) ||
103             inside(itend - 1, start, metaend))
104        ext3_warning(sb, __func__,
105                 "Inode table (%u-"E3FSBLK") overlaps"
106                 "GDT table ("E3FSBLK"-"E3FSBLK")",
107                 input->inode_table, itend - 1, start, metaend - 1);
108    else
109        err = 0;
110    brelse(bh);
111
112    return err;
113}
114
115static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
116                  ext3_fsblk_t blk)
117{
118    struct buffer_head *bh;
119    int err;
120
121    bh = sb_getblk(sb, blk);
122    if (!bh)
123        return ERR_PTR(-EIO);
124    if ((err = ext3_journal_get_write_access(handle, bh))) {
125        brelse(bh);
126        bh = ERR_PTR(err);
127    } else {
128        lock_buffer(bh);
129        memset(bh->b_data, 0, sb->s_blocksize);
130        set_buffer_uptodate(bh);
131        unlock_buffer(bh);
132    }
133
134    return bh;
135}
136
137/*
138 * To avoid calling the atomic setbit hundreds or thousands of times, we only
139 * need to use it within a single byte (to ensure we get endianness right).
140 * We can use memset for the rest of the bitmap as there are no other users.
141 */
142static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
143{
144    int i;
145
146    if (start_bit >= end_bit)
147        return;
148
149    ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
150    for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
151        ext3_set_bit(i, bitmap);
152    if (i < end_bit)
153        memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
154}
155
156/*
157 * If we have fewer than thresh credits, extend by EXT3_MAX_TRANS_DATA.
158 * If that fails, restart the transaction & regain write access for the
159 * buffer head which is used for block_bitmap modifications.
160 */
161static int extend_or_restart_transaction(handle_t *handle, int thresh,
162                     struct buffer_head *bh)
163{
164    int err;
165
166    if (handle->h_buffer_credits >= thresh)
167        return 0;
168
169    err = ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA);
170    if (err < 0)
171        return err;
172    if (err) {
173        err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA);
174        if (err)
175            return err;
176        err = ext3_journal_get_write_access(handle, bh);
177        if (err)
178            return err;
179    }
180
181    return 0;
182}
183
184/*
185 * Set up the block and inode bitmaps, and the inode table for the new group.
186 * This doesn't need to be part of the main transaction, since we are only
187 * changing blocks outside the actual filesystem. We still do journaling to
188 * ensure the recovery is correct in case of a failure just after resize.
189 * If any part of this fails, we simply abort the resize.
190 */
191static int setup_new_group_blocks(struct super_block *sb,
192                  struct ext3_new_group_data *input)
193{
194    struct ext3_sb_info *sbi = EXT3_SB(sb);
195    ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
196    int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
197        le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
198    unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
199    struct buffer_head *bh;
200    handle_t *handle;
201    ext3_fsblk_t block;
202    ext3_grpblk_t bit;
203    int i;
204    int err = 0, err2;
205
206    /* This transaction may be extended/restarted along the way */
207    handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
208
209    if (IS_ERR(handle))
210        return PTR_ERR(handle);
211
212    mutex_lock(&sbi->s_resize_lock);
213    if (input->group != sbi->s_groups_count) {
214        err = -EBUSY;
215        goto exit_journal;
216    }
217
218    if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
219        err = PTR_ERR(bh);
220        goto exit_journal;
221    }
222
223    if (ext3_bg_has_super(sb, input->group)) {
224        ext3_debug("mark backup superblock %#04lx (+0)\n", start);
225        ext3_set_bit(0, bh->b_data);
226    }
227
228    /* Copy all of the GDT blocks into the backup in this group */
229    for (i = 0, bit = 1, block = start + 1;
230         i < gdblocks; i++, block++, bit++) {
231        struct buffer_head *gdb;
232
233        ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
234
235        err = extend_or_restart_transaction(handle, 1, bh);
236        if (err)
237            goto exit_bh;
238
239        gdb = sb_getblk(sb, block);
240        if (!gdb) {
241            err = -EIO;
242            goto exit_bh;
243        }
244        if ((err = ext3_journal_get_write_access(handle, gdb))) {
245            brelse(gdb);
246            goto exit_bh;
247        }
248        lock_buffer(gdb);
249        memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
250        set_buffer_uptodate(gdb);
251        unlock_buffer(gdb);
252        err = ext3_journal_dirty_metadata(handle, gdb);
253        if (err) {
254            brelse(gdb);
255            goto exit_bh;
256        }
257        ext3_set_bit(bit, bh->b_data);
258        brelse(gdb);
259    }
260
261    /* Zero out all of the reserved backup group descriptor table blocks */
262    for (i = 0, bit = gdblocks + 1, block = start + bit;
263         i < reserved_gdb; i++, block++, bit++) {
264        struct buffer_head *gdb;
265
266        ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
267
268        err = extend_or_restart_transaction(handle, 1, bh);
269        if (err)
270            goto exit_bh;
271
272        if (IS_ERR(gdb = bclean(handle, sb, block))) {
273            err = PTR_ERR(gdb);
274            goto exit_bh;
275        }
276        err = ext3_journal_dirty_metadata(handle, gdb);
277        if (err) {
278            brelse(gdb);
279            goto exit_bh;
280        }
281        ext3_set_bit(bit, bh->b_data);
282        brelse(gdb);
283    }
284    ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
285           input->block_bitmap - start);
286    ext3_set_bit(input->block_bitmap - start, bh->b_data);
287    ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
288           input->inode_bitmap - start);
289    ext3_set_bit(input->inode_bitmap - start, bh->b_data);
290
291    /* Zero out all of the inode table blocks */
292    for (i = 0, block = input->inode_table, bit = block - start;
293         i < sbi->s_itb_per_group; i++, bit++, block++) {
294        struct buffer_head *it;
295
296        ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
297
298        err = extend_or_restart_transaction(handle, 1, bh);
299        if (err)
300            goto exit_bh;
301
302        if (IS_ERR(it = bclean(handle, sb, block))) {
303            err = PTR_ERR(it);
304            goto exit_bh;
305        }
306        err = ext3_journal_dirty_metadata(handle, it);
307        if (err) {
308            brelse(it);
309            goto exit_bh;
310        }
311        brelse(it);
312        ext3_set_bit(bit, bh->b_data);
313    }
314
315    err = extend_or_restart_transaction(handle, 2, bh);
316    if (err)
317        goto exit_bh;
318
319    mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
320            bh->b_data);
321    err = ext3_journal_dirty_metadata(handle, bh);
322    if (err)
323        goto exit_bh;
324    brelse(bh);
325
326    /* Mark unused entries in inode bitmap used */
327    ext3_debug("clear inode bitmap %#04x (+%ld)\n",
328           input->inode_bitmap, input->inode_bitmap - start);
329    if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
330        err = PTR_ERR(bh);
331        goto exit_journal;
332    }
333
334    mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
335            bh->b_data);
336    err = ext3_journal_dirty_metadata(handle, bh);
337exit_bh:
338    brelse(bh);
339
340exit_journal:
341    mutex_unlock(&sbi->s_resize_lock);
342    if ((err2 = ext3_journal_stop(handle)) && !err)
343        err = err2;
344
345    return err;
346}
347
348/*
349 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
350 * ext3 filesystem. The counters should be initialized to 1, 5, and 7 before
351 * calling this for the first time. In a sparse filesystem it will be the
352 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
353 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
354 */
355static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
356                  unsigned *five, unsigned *seven)
357{
358    unsigned *min = three;
359    int mult = 3;
360    unsigned ret;
361
362    if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
363                    EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
364        ret = *min;
365        *min += 1;
366        return ret;
367    }
368
369    if (*five < *min) {
370        min = five;
371        mult = 5;
372    }
373    if (*seven < *min) {
374        min = seven;
375        mult = 7;
376    }
377
378    ret = *min;
379    *min *= mult;
380
381    return ret;
382}
383
384/*
385 * Check that all of the backup GDT blocks are held in the primary GDT block.
386 * It is assumed that they are stored in group order. Returns the number of
387 * groups in current filesystem that have BACKUPS, or -ve error code.
388 */
389static int verify_reserved_gdb(struct super_block *sb,
390                   struct buffer_head *primary)
391{
392    const ext3_fsblk_t blk = primary->b_blocknr;
393    const unsigned long end = EXT3_SB(sb)->s_groups_count;
394    unsigned three = 1;
395    unsigned five = 5;
396    unsigned seven = 7;
397    unsigned grp;
398    __le32 *p = (__le32 *)primary->b_data;
399    int gdbackups = 0;
400
401    while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
402        if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
403            ext3_warning(sb, __func__,
404                     "reserved GDT "E3FSBLK
405                     " missing grp %d ("E3FSBLK")",
406                     blk, grp,
407                     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
408            return -EINVAL;
409        }
410        if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
411            return -EFBIG;
412    }
413
414    return gdbackups;
415}
416
417/*
418 * Called when we need to bring a reserved group descriptor table block into
419 * use from the resize inode. The primary copy of the new GDT block currently
420 * is an indirect block (under the double indirect block in the resize inode).
421 * The new backup GDT blocks will be stored as leaf blocks in this indirect
422 * block, in group order. Even though we know all the block numbers we need,
423 * we check to ensure that the resize inode has actually reserved these blocks.
424 *
425 * Don't need to update the block bitmaps because the blocks are still in use.
426 *
427 * We get all of the error cases out of the way, so that we are sure to not
428 * fail once we start modifying the data on disk, because JBD has no rollback.
429 */
430static int add_new_gdb(handle_t *handle, struct inode *inode,
431               struct ext3_new_group_data *input,
432               struct buffer_head **primary)
433{
434    struct super_block *sb = inode->i_sb;
435    struct ext3_super_block *es = EXT3_SB(sb)->s_es;
436    unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
437    ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
438    struct buffer_head **o_group_desc, **n_group_desc;
439    struct buffer_head *dind;
440    int gdbackups;
441    struct ext3_iloc iloc;
442    __le32 *data;
443    int err;
444
445    if (test_opt(sb, DEBUG))
446        printk(KERN_DEBUG
447               "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
448               gdb_num);
449
450    /*
451     * If we are not using the primary superblock/GDT copy don't resize,
452     * because the user tools have no way of handling this. Probably a
453     * bad time to do it anyways.
454     */
455    if (EXT3_SB(sb)->s_sbh->b_blocknr !=
456        le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
457        ext3_warning(sb, __func__,
458            "won't resize using backup superblock at %llu",
459            (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
460        return -EPERM;
461    }
462
463    *primary = sb_bread(sb, gdblock);
464    if (!*primary)
465        return -EIO;
466
467    if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
468        err = gdbackups;
469        goto exit_bh;
470    }
471
472    data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
473    dind = sb_bread(sb, le32_to_cpu(*data));
474    if (!dind) {
475        err = -EIO;
476        goto exit_bh;
477    }
478
479    data = (__le32 *)dind->b_data;
480    if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
481        ext3_warning(sb, __func__,
482                 "new group %u GDT block "E3FSBLK" not reserved",
483                 input->group, gdblock);
484        err = -EINVAL;
485        goto exit_dind;
486    }
487
488    if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
489        goto exit_dind;
490
491    if ((err = ext3_journal_get_write_access(handle, *primary)))
492        goto exit_sbh;
493
494    if ((err = ext3_journal_get_write_access(handle, dind)))
495        goto exit_primary;
496
497    /* ext3_reserve_inode_write() gets a reference on the iloc */
498    if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
499        goto exit_dindj;
500
501    n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
502            GFP_NOFS);
503    if (!n_group_desc) {
504        err = -ENOMEM;
505        ext3_warning (sb, __func__,
506                  "not enough memory for %lu groups", gdb_num + 1);
507        goto exit_inode;
508    }
509
510    /*
511     * Finally, we have all of the possible failures behind us...
512     *
513     * Remove new GDT block from inode double-indirect block and clear out
514     * the new GDT block for use (which also "frees" the backup GDT blocks
515     * from the reserved inode). We don't need to change the bitmaps for
516     * these blocks, because they are marked as in-use from being in the
517     * reserved inode, and will become GDT blocks (primary and backup).
518     */
519    data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
520    err = ext3_journal_dirty_metadata(handle, dind);
521    if (err)
522        goto exit_group_desc;
523    brelse(dind);
524    dind = NULL;
525    inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
526    err = ext3_mark_iloc_dirty(handle, inode, &iloc);
527    if (err)
528        goto exit_group_desc;
529    memset((*primary)->b_data, 0, sb->s_blocksize);
530    err = ext3_journal_dirty_metadata(handle, *primary);
531    if (err)
532        goto exit_group_desc;
533
534    o_group_desc = EXT3_SB(sb)->s_group_desc;
535    memcpy(n_group_desc, o_group_desc,
536           EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
537    n_group_desc[gdb_num] = *primary;
538    EXT3_SB(sb)->s_group_desc = n_group_desc;
539    EXT3_SB(sb)->s_gdb_count++;
540    kfree(o_group_desc);
541
542    le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
543    err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
544    if (err)
545        goto exit_inode;
546
547    return 0;
548
549exit_group_desc:
550    kfree(n_group_desc);
551exit_inode:
552    //ext3_journal_release_buffer(handle, iloc.bh);
553    brelse(iloc.bh);
554exit_dindj:
555    //ext3_journal_release_buffer(handle, dind);
556exit_primary:
557    //ext3_journal_release_buffer(handle, *primary);
558exit_sbh:
559    //ext3_journal_release_buffer(handle, *primary);
560exit_dind:
561    brelse(dind);
562exit_bh:
563    brelse(*primary);
564
565    ext3_debug("leaving with error %d\n", err);
566    return err;
567}
568
569/*
570 * Called when we are adding a new group which has a backup copy of each of
571 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
572 * We need to add these reserved backup GDT blocks to the resize inode, so
573 * that they are kept for future resizing and not allocated to files.
574 *
575 * Each reserved backup GDT block will go into a different indirect block.
576 * The indirect blocks are actually the primary reserved GDT blocks,
577 * so we know in advance what their block numbers are. We only get the
578 * double-indirect block to verify it is pointing to the primary reserved
579 * GDT blocks so we don't overwrite a data block by accident. The reserved
580 * backup GDT blocks are stored in their reserved primary GDT block.
581 */
582static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
583                  struct ext3_new_group_data *input)
584{
585    struct super_block *sb = inode->i_sb;
586    int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
587    struct buffer_head **primary;
588    struct buffer_head *dind;
589    struct ext3_iloc iloc;
590    ext3_fsblk_t blk;
591    __le32 *data, *end;
592    int gdbackups = 0;
593    int res, i;
594    int err;
595
596    primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
597    if (!primary)
598        return -ENOMEM;
599
600    data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
601    dind = sb_bread(sb, le32_to_cpu(*data));
602    if (!dind) {
603        err = -EIO;
604        goto exit_free;
605    }
606
607    blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
608    data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count %
609                     EXT3_ADDR_PER_BLOCK(sb));
610    end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
611
612    /* Get each reserved primary GDT block and verify it holds backups */
613    for (res = 0; res < reserved_gdb; res++, blk++) {
614        if (le32_to_cpu(*data) != blk) {
615            ext3_warning(sb, __func__,
616                     "reserved block "E3FSBLK
617                     " not at offset %ld",
618                     blk,
619                     (long)(data - (__le32 *)dind->b_data));
620            err = -EINVAL;
621            goto exit_bh;
622        }
623        primary[res] = sb_bread(sb, blk);
624        if (!primary[res]) {
625            err = -EIO;
626            goto exit_bh;
627        }
628        if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
629            brelse(primary[res]);
630            err = gdbackups;
631            goto exit_bh;
632        }
633        if (++data >= end)
634            data = (__le32 *)dind->b_data;
635    }
636
637    for (i = 0; i < reserved_gdb; i++) {
638        if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
639            /*
640            int j;
641            for (j = 0; j < i; j++)
642                ext3_journal_release_buffer(handle, primary[j]);
643             */
644            goto exit_bh;
645        }
646    }
647
648    if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
649        goto exit_bh;
650
651    /*
652     * Finally we can add each of the reserved backup GDT blocks from
653     * the new group to its reserved primary GDT block.
654     */
655    blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
656    for (i = 0; i < reserved_gdb; i++) {
657        int err2;
658        data = (__le32 *)primary[i]->b_data;
659        /* printk("reserving backup %lu[%u] = %lu\n",
660               primary[i]->b_blocknr, gdbackups,
661               blk + primary[i]->b_blocknr); */
662        data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
663        err2 = ext3_journal_dirty_metadata(handle, primary[i]);
664        if (!err)
665            err = err2;
666    }
667    inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
668    ext3_mark_iloc_dirty(handle, inode, &iloc);
669
670exit_bh:
671    while (--res >= 0)
672        brelse(primary[res]);
673    brelse(dind);
674
675exit_free:
676    kfree(primary);
677
678    return err;
679}
680
681/*
682 * Update the backup copies of the ext3 metadata. These don't need to be part
683 * of the main resize transaction, because e2fsck will re-write them if there
684 * is a problem (basically only OOM will cause a problem). However, we
685 * _should_ update the backups if possible, in case the primary gets trashed
686 * for some reason and we need to run e2fsck from a backup superblock. The
687 * important part is that the new block and inode counts are in the backup
688 * superblocks, and the location of the new group metadata in the GDT backups.
689 *
690 * We do not need take the s_resize_lock for this, because these
691 * blocks are not otherwise touched by the filesystem code when it is
692 * mounted. We don't need to worry about last changing from
693 * sbi->s_groups_count, because the worst that can happen is that we
694 * do not copy the full number of backups at this time. The resize
695 * which changed s_groups_count will backup again.
696 */
697static void update_backups(struct super_block *sb,
698               int blk_off, char *data, int size)
699{
700    struct ext3_sb_info *sbi = EXT3_SB(sb);
701    const unsigned long last = sbi->s_groups_count;
702    const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
703    unsigned three = 1;
704    unsigned five = 5;
705    unsigned seven = 7;
706    unsigned group;
707    int rest = sb->s_blocksize - size;
708    handle_t *handle;
709    int err = 0, err2;
710
711    handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
712    if (IS_ERR(handle)) {
713        group = 1;
714        err = PTR_ERR(handle);
715        goto exit_err;
716    }
717
718    while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
719        struct buffer_head *bh;
720
721        /* Out of journal space, and can't get more - abort - so sad */
722        if (handle->h_buffer_credits == 0 &&
723            ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
724            (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
725            break;
726
727        bh = sb_getblk(sb, group * bpg + blk_off);
728        if (!bh) {
729            err = -EIO;
730            break;
731        }
732        ext3_debug("update metadata backup %#04lx\n",
733              (unsigned long)bh->b_blocknr);
734        if ((err = ext3_journal_get_write_access(handle, bh))) {
735            brelse(bh);
736            break;
737        }
738        lock_buffer(bh);
739        memcpy(bh->b_data, data, size);
740        if (rest)
741            memset(bh->b_data + size, 0, rest);
742        set_buffer_uptodate(bh);
743        unlock_buffer(bh);
744        err = ext3_journal_dirty_metadata(handle, bh);
745        brelse(bh);
746        if (err)
747            break;
748    }
749    if ((err2 = ext3_journal_stop(handle)) && !err)
750        err = err2;
751
752    /*
753     * Ugh! Need to have e2fsck write the backup copies. It is too
754     * late to revert the resize, we shouldn't fail just because of
755     * the backup copies (they are only needed in case of corruption).
756     *
757     * However, if we got here we have a journal problem too, so we
758     * can't really start a transaction to mark the superblock.
759     * Chicken out and just set the flag on the hope it will be written
760     * to disk, and if not - we will simply wait until next fsck.
761     */
762exit_err:
763    if (err) {
764        ext3_warning(sb, __func__,
765                 "can't update backup for group %d (err %d), "
766                 "forcing fsck on next reboot", group, err);
767        sbi->s_mount_state &= ~EXT3_VALID_FS;
768        sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
769        mark_buffer_dirty(sbi->s_sbh);
770    }
771}
772
773/* Add group descriptor data to an existing or new group descriptor block.
774 * Ensure we handle all possible error conditions _before_ we start modifying
775 * the filesystem, because we cannot abort the transaction and not have it
776 * write the data to disk.
777 *
778 * If we are on a GDT block boundary, we need to get the reserved GDT block.
779 * Otherwise, we may need to add backup GDT blocks for a sparse group.
780 *
781 * We only need to hold the superblock lock while we are actually adding
782 * in the new group's counts to the superblock. Prior to that we have
783 * not really "added" the group at all. We re-check that we are still
784 * adding in the last group in case things have changed since verifying.
785 */
786int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
787{
788    struct ext3_sb_info *sbi = EXT3_SB(sb);
789    struct ext3_super_block *es = sbi->s_es;
790    int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
791        le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
792    struct buffer_head *primary = NULL;
793    struct ext3_group_desc *gdp;
794    struct inode *inode = NULL;
795    handle_t *handle;
796    int gdb_off, gdb_num;
797    int err, err2;
798
799    gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
800    gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
801
802    if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
803                    EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
804        ext3_warning(sb, __func__,
805                 "Can't resize non-sparse filesystem further");
806        return -EPERM;
807    }
808
809    if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
810        le32_to_cpu(es->s_blocks_count)) {
811        ext3_warning(sb, __func__, "blocks_count overflow\n");
812        return -EINVAL;
813    }
814
815    if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
816        le32_to_cpu(es->s_inodes_count)) {
817        ext3_warning(sb, __func__, "inodes_count overflow\n");
818        return -EINVAL;
819    }
820
821    if (reserved_gdb || gdb_off == 0) {
822        if (!EXT3_HAS_COMPAT_FEATURE(sb,
823                         EXT3_FEATURE_COMPAT_RESIZE_INODE)
824            || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
825            ext3_warning(sb, __func__,
826                     "No reserved GDT blocks, can't resize");
827            return -EPERM;
828        }
829        inode = ext3_iget(sb, EXT3_RESIZE_INO);
830        if (IS_ERR(inode)) {
831            ext3_warning(sb, __func__,
832                     "Error opening resize inode");
833            return PTR_ERR(inode);
834        }
835    }
836
837    if ((err = verify_group_input(sb, input)))
838        goto exit_put;
839
840    if ((err = setup_new_group_blocks(sb, input)))
841        goto exit_put;
842
843    /*
844     * We will always be modifying at least the superblock and a GDT
845     * block. If we are adding a group past the last current GDT block,
846     * we will also modify the inode and the dindirect block. If we
847     * are adding a group with superblock/GDT backups we will also
848     * modify each of the reserved GDT dindirect blocks.
849     */
850    handle = ext3_journal_start_sb(sb,
851                       ext3_bg_has_super(sb, input->group) ?
852                       3 + reserved_gdb : 4);
853    if (IS_ERR(handle)) {
854        err = PTR_ERR(handle);
855        goto exit_put;
856    }
857
858    mutex_lock(&sbi->s_resize_lock);
859    if (input->group != sbi->s_groups_count) {
860        ext3_warning(sb, __func__,
861                 "multiple resizers run on filesystem!");
862        err = -EBUSY;
863        goto exit_journal;
864    }
865
866    if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
867        goto exit_journal;
868
869    /*
870     * We will only either add reserved group blocks to a backup group
871     * or remove reserved blocks for the first group in a new group block.
872     * Doing both would be mean more complex code, and sane people don't
873     * use non-sparse filesystems anymore. This is already checked above.
874     */
875    if (gdb_off) {
876        primary = sbi->s_group_desc[gdb_num];
877        if ((err = ext3_journal_get_write_access(handle, primary)))
878            goto exit_journal;
879
880        if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
881            (err = reserve_backup_gdb(handle, inode, input)))
882            goto exit_journal;
883    } else if ((err = add_new_gdb(handle, inode, input, &primary)))
884        goto exit_journal;
885
886    /*
887     * OK, now we've set up the new group. Time to make it active.
888     *
889     * We do not lock all allocations via s_resize_lock
890     * so we have to be safe wrt. concurrent accesses the group
891     * data. So we need to be careful to set all of the relevant
892     * group descriptor data etc. *before* we enable the group.
893     *
894     * The key field here is sbi->s_groups_count: as long as
895     * that retains its old value, nobody is going to access the new
896     * group.
897     *
898     * So first we update all the descriptor metadata for the new
899     * group; then we update the total disk blocks count; then we
900     * update the groups count to enable the group; then finally we
901     * update the free space counts so that the system can start
902     * using the new disk blocks.
903     */
904
905    /* Update group descriptor block for new group */
906    gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
907
908    gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
909    gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
910    gdp->bg_inode_table = cpu_to_le32(input->inode_table);
911    gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
912    gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
913
914    /*
915     * Make the new blocks and inodes valid next. We do this before
916     * increasing the group count so that once the group is enabled,
917     * all of its blocks and inodes are already valid.
918     *
919     * We always allocate group-by-group, then block-by-block or
920     * inode-by-inode within a group, so enabling these
921     * blocks/inodes before the group is live won't actually let us
922     * allocate the new space yet.
923     */
924    le32_add_cpu(&es->s_blocks_count, input->blocks_count);
925    le32_add_cpu(&es->s_inodes_count, EXT3_INODES_PER_GROUP(sb));
926
927    /*
928     * We need to protect s_groups_count against other CPUs seeing
929     * inconsistent state in the superblock.
930     *
931     * The precise rules we use are:
932     *
933     * * Writers of s_groups_count *must* hold s_resize_lock
934     * AND
935     * * Writers must perform a smp_wmb() after updating all dependent
936     * data and before modifying the groups count
937     *
938     * * Readers must hold s_resize_lock over the access
939     * OR
940     * * Readers must perform an smp_rmb() after reading the groups count
941     * and before reading any dependent data.
942     *
943     * NB. These rules can be relaxed when checking the group count
944     * while freeing data, as we can only allocate from a block
945     * group after serialising against the group count, and we can
946     * only then free after serialising in turn against that
947     * allocation.
948     */
949    smp_wmb();
950
951    /* Update the global fs size fields */
952    sbi->s_groups_count++;
953
954    err = ext3_journal_dirty_metadata(handle, primary);
955    if (err)
956        goto exit_journal;
957
958    /* Update the reserved block counts only once the new group is
959     * active. */
960    le32_add_cpu(&es->s_r_blocks_count, input->reserved_blocks);
961
962    /* Update the free space counts */
963    percpu_counter_add(&sbi->s_freeblocks_counter,
964               input->free_blocks_count);
965    percpu_counter_add(&sbi->s_freeinodes_counter,
966               EXT3_INODES_PER_GROUP(sb));
967
968    err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
969
970exit_journal:
971    mutex_unlock(&sbi->s_resize_lock);
972    if ((err2 = ext3_journal_stop(handle)) && !err)
973        err = err2;
974    if (!err) {
975        update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
976                   sizeof(struct ext3_super_block));
977        update_backups(sb, primary->b_blocknr, primary->b_data,
978                   primary->b_size);
979    }
980exit_put:
981    iput(inode);
982    return err;
983} /* ext3_group_add */
984
985/* Extend the filesystem to the new number of blocks specified. This entry
986 * point is only used to extend the current filesystem to the end of the last
987 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
988 * for emergencies (because it has no dependencies on reserved blocks).
989 *
990 * If we _really_ wanted, we could use default values to call ext3_group_add()
991 * allow the "remount" trick to work for arbitrary resizing, assuming enough
992 * GDT blocks are reserved to grow to the desired size.
993 */
994int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
995              ext3_fsblk_t n_blocks_count)
996{
997    ext3_fsblk_t o_blocks_count;
998    ext3_grpblk_t last;
999    ext3_grpblk_t add;
1000    struct buffer_head * bh;
1001    handle_t *handle;
1002    int err;
1003    unsigned long freed_blocks;
1004
1005    /* We don't need to worry about locking wrt other resizers just
1006     * yet: we're going to revalidate es->s_blocks_count after
1007     * taking the s_resize_lock below. */
1008    o_blocks_count = le32_to_cpu(es->s_blocks_count);
1009
1010    if (test_opt(sb, DEBUG))
1011        printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
1012               " up to "E3FSBLK" blocks\n",
1013               o_blocks_count, n_blocks_count);
1014
1015    if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
1016        return 0;
1017
1018    if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1019        printk(KERN_ERR "EXT3-fs: filesystem on %s:"
1020            " too large to resize to "E3FSBLK" blocks safely\n",
1021            sb->s_id, n_blocks_count);
1022        if (sizeof(sector_t) < 8)
1023            ext3_warning(sb, __func__,
1024            "CONFIG_LBDAF not enabled\n");
1025        return -EINVAL;
1026    }
1027
1028    if (n_blocks_count < o_blocks_count) {
1029        ext3_warning(sb, __func__,
1030                 "can't shrink FS - resize aborted");
1031        return -EBUSY;
1032    }
1033
1034    /* Handle the remaining blocks in the last group only. */
1035    last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
1036        EXT3_BLOCKS_PER_GROUP(sb);
1037
1038    if (last == 0) {
1039        ext3_warning(sb, __func__,
1040                 "need to use ext2online to resize further");
1041        return -EPERM;
1042    }
1043
1044    add = EXT3_BLOCKS_PER_GROUP(sb) - last;
1045
1046    if (o_blocks_count + add < o_blocks_count) {
1047        ext3_warning(sb, __func__, "blocks_count overflow");
1048        return -EINVAL;
1049    }
1050
1051    if (o_blocks_count + add > n_blocks_count)
1052        add = n_blocks_count - o_blocks_count;
1053
1054    if (o_blocks_count + add < n_blocks_count)
1055        ext3_warning(sb, __func__,
1056                 "will only finish group ("E3FSBLK
1057                 " blocks, %u new)",
1058                 o_blocks_count + add, add);
1059
1060    /* See if the device is actually as big as what was requested */
1061    bh = sb_bread(sb, o_blocks_count + add -1);
1062    if (!bh) {
1063        ext3_warning(sb, __func__,
1064                 "can't read last block, resize aborted");
1065        return -ENOSPC;
1066    }
1067    brelse(bh);
1068
1069    /* We will update the superblock, one block bitmap, and
1070     * one group descriptor via ext3_free_blocks().
1071     */
1072    handle = ext3_journal_start_sb(sb, 3);
1073    if (IS_ERR(handle)) {
1074        err = PTR_ERR(handle);
1075        ext3_warning(sb, __func__, "error %d on journal start",err);
1076        goto exit_put;
1077    }
1078
1079    mutex_lock(&EXT3_SB(sb)->s_resize_lock);
1080    if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1081        ext3_warning(sb, __func__,
1082                 "multiple resizers run on filesystem!");
1083        mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1084        ext3_journal_stop(handle);
1085        err = -EBUSY;
1086        goto exit_put;
1087    }
1088
1089    if ((err = ext3_journal_get_write_access(handle,
1090                         EXT3_SB(sb)->s_sbh))) {
1091        ext3_warning(sb, __func__,
1092                 "error %d on journal write access", err);
1093        mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1094        ext3_journal_stop(handle);
1095        goto exit_put;
1096    }
1097    es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1098    err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1099    mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1100    if (err) {
1101        ext3_warning(sb, __func__,
1102                 "error %d on journal dirty metadata", err);
1103        ext3_journal_stop(handle);
1104        goto exit_put;
1105    }
1106    ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
1107           o_blocks_count, o_blocks_count + add);
1108    ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
1109    ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n",
1110           o_blocks_count, o_blocks_count + add);
1111    if ((err = ext3_journal_stop(handle)))
1112        goto exit_put;
1113    if (test_opt(sb, DEBUG))
1114        printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
1115               le32_to_cpu(es->s_blocks_count));
1116    update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
1117               sizeof(struct ext3_super_block));
1118exit_put:
1119    return err;
1120} /* ext3_group_extend */
1121

Archive Download this file



interactive