Root/fs/jfs/jfs_imap.c

1/*
2 * Copyright (C) International Business Machines Corp., 2000-2004
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19/*
20 * jfs_imap.c: inode allocation map manager
21 *
22 * Serialization:
23 * Each AG has a simple lock which is used to control the serialization of
24 * the AG level lists. This lock should be taken first whenever an AG
25 * level list will be modified or accessed.
26 *
27 * Each IAG is locked by obtaining the buffer for the IAG page.
28 *
29 * There is also a inode lock for the inode map inode. A read lock needs to
30 * be taken whenever an IAG is read from the map or the global level
31 * information is read. A write lock needs to be taken whenever the global
32 * level information is modified or an atomic operation needs to be used.
33 *
34 * If more than one IAG is read at one time, the read lock may not
35 * be given up until all of the IAG's are read. Otherwise, a deadlock
36 * may occur when trying to obtain the read lock while another thread
37 * holding the read lock is waiting on the IAG already being held.
38 *
39 * The control page of the inode map is read into memory by diMount().
40 * Thereafter it should only be modified in memory and then it will be
41 * written out when the filesystem is unmounted by diUnmount().
42 */
43
44#include <linux/fs.h>
45#include <linux/buffer_head.h>
46#include <linux/pagemap.h>
47#include <linux/quotaops.h>
48#include <linux/slab.h>
49
50#include "jfs_incore.h"
51#include "jfs_inode.h"
52#include "jfs_filsys.h"
53#include "jfs_dinode.h"
54#include "jfs_dmap.h"
55#include "jfs_imap.h"
56#include "jfs_metapage.h"
57#include "jfs_superblock.h"
58#include "jfs_debug.h"
59
60/*
61 * imap locks
62 */
63/* iag free list lock */
64#define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock)
65#define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock)
66#define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock)
67
68/* per ag iag list locks */
69#define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index]))
70#define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno])
71#define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno])
72
73/*
74 * forward references
75 */
76static int diAllocAG(struct inomap *, int, bool, struct inode *);
77static int diAllocAny(struct inomap *, int, bool, struct inode *);
78static int diAllocBit(struct inomap *, struct iag *, int);
79static int diAllocExt(struct inomap *, int, struct inode *);
80static int diAllocIno(struct inomap *, int, struct inode *);
81static int diFindFree(u32, int);
82static int diNewExt(struct inomap *, struct iag *, int);
83static int diNewIAG(struct inomap *, int *, int, struct metapage **);
84static void duplicateIXtree(struct super_block *, s64, int, s64 *);
85
86static int diIAGRead(struct inomap * imap, int, struct metapage **);
87static int copy_from_dinode(struct dinode *, struct inode *);
88static void copy_to_dinode(struct dinode *, struct inode *);
89
90/*
91 * NAME: diMount()
92 *
93 * FUNCTION: initialize the incore inode map control structures for
94 * a fileset or aggregate init time.
95 *
96 * the inode map's control structure (dinomap) is
97 * brought in from disk and placed in virtual memory.
98 *
99 * PARAMETERS:
100 * ipimap - pointer to inode map inode for the aggregate or fileset.
101 *
102 * RETURN VALUES:
103 * 0 - success
104 * -ENOMEM - insufficient free virtual memory.
105 * -EIO - i/o error.
106 */
107int diMount(struct inode *ipimap)
108{
109    struct inomap *imap;
110    struct metapage *mp;
111    int index;
112    struct dinomap_disk *dinom_le;
113
114    /*
115     * allocate/initialize the in-memory inode map control structure
116     */
117    /* allocate the in-memory inode map control structure. */
118    imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
119    if (imap == NULL) {
120        jfs_err("diMount: kmalloc returned NULL!");
121        return -ENOMEM;
122    }
123
124    /* read the on-disk inode map control structure. */
125
126    mp = read_metapage(ipimap,
127               IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
128               PSIZE, 0);
129    if (mp == NULL) {
130        kfree(imap);
131        return -EIO;
132    }
133
134    /* copy the on-disk version to the in-memory version. */
135    dinom_le = (struct dinomap_disk *) mp->data;
136    imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
137    imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
138    atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
139    atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
140    imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
141    imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
142    for (index = 0; index < MAXAG; index++) {
143        imap->im_agctl[index].inofree =
144            le32_to_cpu(dinom_le->in_agctl[index].inofree);
145        imap->im_agctl[index].extfree =
146            le32_to_cpu(dinom_le->in_agctl[index].extfree);
147        imap->im_agctl[index].numinos =
148            le32_to_cpu(dinom_le->in_agctl[index].numinos);
149        imap->im_agctl[index].numfree =
150            le32_to_cpu(dinom_le->in_agctl[index].numfree);
151    }
152
153    /* release the buffer. */
154    release_metapage(mp);
155
156    /*
157     * allocate/initialize inode allocation map locks
158     */
159    /* allocate and init iag free list lock */
160    IAGFREE_LOCK_INIT(imap);
161
162    /* allocate and init ag list locks */
163    for (index = 0; index < MAXAG; index++) {
164        AG_LOCK_INIT(imap, index);
165    }
166
167    /* bind the inode map inode and inode map control structure
168     * to each other.
169     */
170    imap->im_ipimap = ipimap;
171    JFS_IP(ipimap)->i_imap = imap;
172
173    return (0);
174}
175
176
177/*
178 * NAME: diUnmount()
179 *
180 * FUNCTION: write to disk the incore inode map control structures for
181 * a fileset or aggregate at unmount time.
182 *
183 * PARAMETERS:
184 * ipimap - pointer to inode map inode for the aggregate or fileset.
185 *
186 * RETURN VALUES:
187 * 0 - success
188 * -ENOMEM - insufficient free virtual memory.
189 * -EIO - i/o error.
190 */
191int diUnmount(struct inode *ipimap, int mounterror)
192{
193    struct inomap *imap = JFS_IP(ipimap)->i_imap;
194
195    /*
196     * update the on-disk inode map control structure
197     */
198
199    if (!(mounterror || isReadOnly(ipimap)))
200        diSync(ipimap);
201
202    /*
203     * Invalidate the page cache buffers
204     */
205    truncate_inode_pages(ipimap->i_mapping, 0);
206
207    /*
208     * free in-memory control structure
209     */
210    kfree(imap);
211
212    return (0);
213}
214
215
216/*
217 * diSync()
218 */
219int diSync(struct inode *ipimap)
220{
221    struct dinomap_disk *dinom_le;
222    struct inomap *imp = JFS_IP(ipimap)->i_imap;
223    struct metapage *mp;
224    int index;
225
226    /*
227     * write imap global conrol page
228     */
229    /* read the on-disk inode map control structure */
230    mp = get_metapage(ipimap,
231              IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
232              PSIZE, 0);
233    if (mp == NULL) {
234        jfs_err("diSync: get_metapage failed!");
235        return -EIO;
236    }
237
238    /* copy the in-memory version to the on-disk version */
239    dinom_le = (struct dinomap_disk *) mp->data;
240    dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
241    dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
242    dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
243    dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
244    dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
245    dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
246    for (index = 0; index < MAXAG; index++) {
247        dinom_le->in_agctl[index].inofree =
248            cpu_to_le32(imp->im_agctl[index].inofree);
249        dinom_le->in_agctl[index].extfree =
250            cpu_to_le32(imp->im_agctl[index].extfree);
251        dinom_le->in_agctl[index].numinos =
252            cpu_to_le32(imp->im_agctl[index].numinos);
253        dinom_le->in_agctl[index].numfree =
254            cpu_to_le32(imp->im_agctl[index].numfree);
255    }
256
257    /* write out the control structure */
258    write_metapage(mp);
259
260    /*
261     * write out dirty pages of imap
262     */
263    filemap_write_and_wait(ipimap->i_mapping);
264
265    diWriteSpecial(ipimap, 0);
266
267    return (0);
268}
269
270
271/*
272 * NAME: diRead()
273 *
274 * FUNCTION: initialize an incore inode from disk.
275 *
276 * on entry, the specifed incore inode should itself
277 * specify the disk inode number corresponding to the
278 * incore inode (i.e. i_number should be initialized).
279 *
280 * this routine handles incore inode initialization for
281 * both "special" and "regular" inodes. special inodes
282 * are those required early in the mount process and
283 * require special handling since much of the file system
284 * is not yet initialized. these "special" inodes are
285 * identified by a NULL inode map inode pointer and are
286 * actually initialized by a call to diReadSpecial().
287 *
288 * for regular inodes, the iag describing the disk inode
289 * is read from disk to determine the inode extent address
290 * for the disk inode. with the inode extent address in
291 * hand, the page of the extent that contains the disk
292 * inode is read and the disk inode is copied to the
293 * incore inode.
294 *
295 * PARAMETERS:
296 * ip - pointer to incore inode to be initialized from disk.
297 *
298 * RETURN VALUES:
299 * 0 - success
300 * -EIO - i/o error.
301 * -ENOMEM - insufficient memory
302 *
303 */
304int diRead(struct inode *ip)
305{
306    struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
307    int iagno, ino, extno, rc;
308    struct inode *ipimap;
309    struct dinode *dp;
310    struct iag *iagp;
311    struct metapage *mp;
312    s64 blkno, agstart;
313    struct inomap *imap;
314    int block_offset;
315    int inodes_left;
316    unsigned long pageno;
317    int rel_inode;
318
319    jfs_info("diRead: ino = %ld", ip->i_ino);
320
321    ipimap = sbi->ipimap;
322    JFS_IP(ip)->ipimap = ipimap;
323
324    /* determine the iag number for this inode (number) */
325    iagno = INOTOIAG(ip->i_ino);
326
327    /* read the iag */
328    imap = JFS_IP(ipimap)->i_imap;
329    IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
330    rc = diIAGRead(imap, iagno, &mp);
331    IREAD_UNLOCK(ipimap);
332    if (rc) {
333        jfs_err("diRead: diIAGRead returned %d", rc);
334        return (rc);
335    }
336
337    iagp = (struct iag *) mp->data;
338
339    /* determine inode extent that holds the disk inode */
340    ino = ip->i_ino & (INOSPERIAG - 1);
341    extno = ino >> L2INOSPEREXT;
342
343    if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
344        (addressPXD(&iagp->inoext[extno]) == 0)) {
345        release_metapage(mp);
346        return -ESTALE;
347    }
348
349    /* get disk block number of the page within the inode extent
350     * that holds the disk inode.
351     */
352    blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
353
354    /* get the ag for the iag */
355    agstart = le64_to_cpu(iagp->agstart);
356
357    release_metapage(mp);
358
359    rel_inode = (ino & (INOSPERPAGE - 1));
360    pageno = blkno >> sbi->l2nbperpage;
361
362    if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
363        /*
364         * OS/2 didn't always align inode extents on page boundaries
365         */
366        inodes_left =
367             (sbi->nbperpage - block_offset) << sbi->l2niperblk;
368
369        if (rel_inode < inodes_left)
370            rel_inode += block_offset << sbi->l2niperblk;
371        else {
372            pageno += 1;
373            rel_inode -= inodes_left;
374        }
375    }
376
377    /* read the page of disk inode */
378    mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
379    if (!mp) {
380        jfs_err("diRead: read_metapage failed");
381        return -EIO;
382    }
383
384    /* locate the disk inode requested */
385    dp = (struct dinode *) mp->data;
386    dp += rel_inode;
387
388    if (ip->i_ino != le32_to_cpu(dp->di_number)) {
389        jfs_error(ip->i_sb, "diRead: i_ino != di_number");
390        rc = -EIO;
391    } else if (le32_to_cpu(dp->di_nlink) == 0)
392        rc = -ESTALE;
393    else
394        /* copy the disk inode to the in-memory inode */
395        rc = copy_from_dinode(dp, ip);
396
397    release_metapage(mp);
398
399    /* set the ag for the inode */
400    JFS_IP(ip)->agstart = agstart;
401    JFS_IP(ip)->active_ag = -1;
402
403    return (rc);
404}
405
406
407/*
408 * NAME: diReadSpecial()
409 *
410 * FUNCTION: initialize a 'special' inode from disk.
411 *
412 * this routines handles aggregate level inodes. The
413 * inode cache cannot differentiate between the
414 * aggregate inodes and the filesystem inodes, so we
415 * handle these here. We don't actually use the aggregate
416 * inode map, since these inodes are at a fixed location
417 * and in some cases the aggregate inode map isn't initialized
418 * yet.
419 *
420 * PARAMETERS:
421 * sb - filesystem superblock
422 * inum - aggregate inode number
423 * secondary - 1 if secondary aggregate inode table
424 *
425 * RETURN VALUES:
426 * new inode - success
427 * NULL - i/o error.
428 */
429struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
430{
431    struct jfs_sb_info *sbi = JFS_SBI(sb);
432    uint address;
433    struct dinode *dp;
434    struct inode *ip;
435    struct metapage *mp;
436
437    ip = new_inode(sb);
438    if (ip == NULL) {
439        jfs_err("diReadSpecial: new_inode returned NULL!");
440        return ip;
441    }
442
443    if (secondary) {
444        address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
445        JFS_IP(ip)->ipimap = sbi->ipaimap2;
446    } else {
447        address = AITBL_OFF >> L2PSIZE;
448        JFS_IP(ip)->ipimap = sbi->ipaimap;
449    }
450
451    ASSERT(inum < INOSPEREXT);
452
453    ip->i_ino = inum;
454
455    address += inum >> 3; /* 8 inodes per 4K page */
456
457    /* read the page of fixed disk inode (AIT) in raw mode */
458    mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
459    if (mp == NULL) {
460        ip->i_nlink = 1; /* Don't want iput() deleting it */
461        iput(ip);
462        return (NULL);
463    }
464
465    /* get the pointer to the disk inode of interest */
466    dp = (struct dinode *) (mp->data);
467    dp += inum % 8; /* 8 inodes per 4K page */
468
469    /* copy on-disk inode to in-memory inode */
470    if ((copy_from_dinode(dp, ip)) != 0) {
471        /* handle bad return by returning NULL for ip */
472        ip->i_nlink = 1; /* Don't want iput() deleting it */
473        iput(ip);
474        /* release the page */
475        release_metapage(mp);
476        return (NULL);
477
478    }
479
480    ip->i_mapping->a_ops = &jfs_metapage_aops;
481    mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
482
483    /* Allocations to metadata inodes should not affect quotas */
484    ip->i_flags |= S_NOQUOTA;
485
486    if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
487        sbi->gengen = le32_to_cpu(dp->di_gengen);
488        sbi->inostamp = le32_to_cpu(dp->di_inostamp);
489    }
490
491    /* release the page */
492    release_metapage(mp);
493
494    /*
495     * __mark_inode_dirty expects inodes to be hashed. Since we don't
496     * want special inodes in the fileset inode space, we make them
497     * appear hashed, but do not put on any lists. hlist_del()
498     * will work fine and require no locking.
499     */
500    hlist_add_fake(&ip->i_hash);
501
502    return (ip);
503}
504
505/*
506 * NAME: diWriteSpecial()
507 *
508 * FUNCTION: Write the special inode to disk
509 *
510 * PARAMETERS:
511 * ip - special inode
512 * secondary - 1 if secondary aggregate inode table
513 *
514 * RETURN VALUES: none
515 */
516
517void diWriteSpecial(struct inode *ip, int secondary)
518{
519    struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
520    uint address;
521    struct dinode *dp;
522    ino_t inum = ip->i_ino;
523    struct metapage *mp;
524
525    if (secondary)
526        address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
527    else
528        address = AITBL_OFF >> L2PSIZE;
529
530    ASSERT(inum < INOSPEREXT);
531
532    address += inum >> 3; /* 8 inodes per 4K page */
533
534    /* read the page of fixed disk inode (AIT) in raw mode */
535    mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
536    if (mp == NULL) {
537        jfs_err("diWriteSpecial: failed to read aggregate inode "
538            "extent!");
539        return;
540    }
541
542    /* get the pointer to the disk inode of interest */
543    dp = (struct dinode *) (mp->data);
544    dp += inum % 8; /* 8 inodes per 4K page */
545
546    /* copy on-disk inode to in-memory inode */
547    copy_to_dinode(dp, ip);
548    memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
549
550    if (inum == FILESYSTEM_I)
551        dp->di_gengen = cpu_to_le32(sbi->gengen);
552
553    /* write the page */
554    write_metapage(mp);
555}
556
557/*
558 * NAME: diFreeSpecial()
559 *
560 * FUNCTION: Free allocated space for special inode
561 */
562void diFreeSpecial(struct inode *ip)
563{
564    if (ip == NULL) {
565        jfs_err("diFreeSpecial called with NULL ip!");
566        return;
567    }
568    filemap_write_and_wait(ip->i_mapping);
569    truncate_inode_pages(ip->i_mapping, 0);
570    iput(ip);
571}
572
573
574
575/*
576 * NAME: diWrite()
577 *
578 * FUNCTION: write the on-disk inode portion of the in-memory inode
579 * to its corresponding on-disk inode.
580 *
581 * on entry, the specifed incore inode should itself
582 * specify the disk inode number corresponding to the
583 * incore inode (i.e. i_number should be initialized).
584 *
585 * the inode contains the inode extent address for the disk
586 * inode. with the inode extent address in hand, the
587 * page of the extent that contains the disk inode is
588 * read and the disk inode portion of the incore inode
589 * is copied to the disk inode.
590 *
591 * PARAMETERS:
592 * tid - transacation id
593 * ip - pointer to incore inode to be written to the inode extent.
594 *
595 * RETURN VALUES:
596 * 0 - success
597 * -EIO - i/o error.
598 */
599int diWrite(tid_t tid, struct inode *ip)
600{
601    struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
602    struct jfs_inode_info *jfs_ip = JFS_IP(ip);
603    int rc = 0;
604    s32 ino;
605    struct dinode *dp;
606    s64 blkno;
607    int block_offset;
608    int inodes_left;
609    struct metapage *mp;
610    unsigned long pageno;
611    int rel_inode;
612    int dioffset;
613    struct inode *ipimap;
614    uint type;
615    lid_t lid;
616    struct tlock *ditlck, *tlck;
617    struct linelock *dilinelock, *ilinelock;
618    struct lv *lv;
619    int n;
620
621    ipimap = jfs_ip->ipimap;
622
623    ino = ip->i_ino & (INOSPERIAG - 1);
624
625    if (!addressPXD(&(jfs_ip->ixpxd)) ||
626        (lengthPXD(&(jfs_ip->ixpxd)) !=
627         JFS_IP(ipimap)->i_imap->im_nbperiext)) {
628        jfs_error(ip->i_sb, "diWrite: ixpxd invalid");
629        return -EIO;
630    }
631
632    /*
633     * read the page of disk inode containing the specified inode:
634     */
635    /* compute the block address of the page */
636    blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
637
638    rel_inode = (ino & (INOSPERPAGE - 1));
639    pageno = blkno >> sbi->l2nbperpage;
640
641    if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
642        /*
643         * OS/2 didn't always align inode extents on page boundaries
644         */
645        inodes_left =
646            (sbi->nbperpage - block_offset) << sbi->l2niperblk;
647
648        if (rel_inode < inodes_left)
649            rel_inode += block_offset << sbi->l2niperblk;
650        else {
651            pageno += 1;
652            rel_inode -= inodes_left;
653        }
654    }
655    /* read the page of disk inode */
656      retry:
657    mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
658    if (!mp)
659        return -EIO;
660
661    /* get the pointer to the disk inode */
662    dp = (struct dinode *) mp->data;
663    dp += rel_inode;
664
665    dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
666
667    /*
668     * acquire transaction lock on the on-disk inode;
669     * N.B. tlock is acquired on ipimap not ip;
670     */
671    if ((ditlck =
672         txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
673        goto retry;
674    dilinelock = (struct linelock *) & ditlck->lock;
675
676    /*
677     * copy btree root from in-memory inode to on-disk inode
678     *
679     * (tlock is taken from inline B+-tree root in in-memory
680     * inode when the B+-tree root is updated, which is pointed
681     * by jfs_ip->blid as well as being on tx tlock list)
682     *
683     * further processing of btree root is based on the copy
684     * in in-memory inode, where txLog() will log from, and,
685     * for xtree root, txUpdateMap() will update map and reset
686     * XAD_NEW bit;
687     */
688
689    if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
690        /*
691         * This is the special xtree inside the directory for storing
692         * the directory table
693         */
694        xtpage_t *p, *xp;
695        xad_t *xad;
696
697        jfs_ip->xtlid = 0;
698        tlck = lid_to_tlock(lid);
699        assert(tlck->type & tlckXTREE);
700        tlck->type |= tlckBTROOT;
701        tlck->mp = mp;
702        ilinelock = (struct linelock *) & tlck->lock;
703
704        /*
705         * copy xtree root from inode to dinode:
706         */
707        p = &jfs_ip->i_xtroot;
708        xp = (xtpage_t *) &dp->di_dirtable;
709        lv = ilinelock->lv;
710        for (n = 0; n < ilinelock->index; n++, lv++) {
711            memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
712                   lv->length << L2XTSLOTSIZE);
713        }
714
715        /* reset on-disk (metadata page) xtree XAD_NEW bit */
716        xad = &xp->xad[XTENTRYSTART];
717        for (n = XTENTRYSTART;
718             n < le16_to_cpu(xp->header.nextindex); n++, xad++)
719            if (xad->flag & (XAD_NEW | XAD_EXTENDED))
720                xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
721    }
722
723    if ((lid = jfs_ip->blid) == 0)
724        goto inlineData;
725    jfs_ip->blid = 0;
726
727    tlck = lid_to_tlock(lid);
728    type = tlck->type;
729    tlck->type |= tlckBTROOT;
730    tlck->mp = mp;
731    ilinelock = (struct linelock *) & tlck->lock;
732
733    /*
734     * regular file: 16 byte (XAD slot) granularity
735     */
736    if (type & tlckXTREE) {
737        xtpage_t *p, *xp;
738        xad_t *xad;
739
740        /*
741         * copy xtree root from inode to dinode:
742         */
743        p = &jfs_ip->i_xtroot;
744        xp = &dp->di_xtroot;
745        lv = ilinelock->lv;
746        for (n = 0; n < ilinelock->index; n++, lv++) {
747            memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
748                   lv->length << L2XTSLOTSIZE);
749        }
750
751        /* reset on-disk (metadata page) xtree XAD_NEW bit */
752        xad = &xp->xad[XTENTRYSTART];
753        for (n = XTENTRYSTART;
754             n < le16_to_cpu(xp->header.nextindex); n++, xad++)
755            if (xad->flag & (XAD_NEW | XAD_EXTENDED))
756                xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
757    }
758    /*
759     * directory: 32 byte (directory entry slot) granularity
760     */
761    else if (type & tlckDTREE) {
762        dtpage_t *p, *xp;
763
764        /*
765         * copy dtree root from inode to dinode:
766         */
767        p = (dtpage_t *) &jfs_ip->i_dtroot;
768        xp = (dtpage_t *) & dp->di_dtroot;
769        lv = ilinelock->lv;
770        for (n = 0; n < ilinelock->index; n++, lv++) {
771            memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
772                   lv->length << L2DTSLOTSIZE);
773        }
774    } else {
775        jfs_err("diWrite: UFO tlock");
776    }
777
778      inlineData:
779    /*
780     * copy inline symlink from in-memory inode to on-disk inode
781     */
782    if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
783        lv = & dilinelock->lv[dilinelock->index];
784        lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
785        lv->length = 2;
786        memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE);
787        dilinelock->index++;
788    }
789    /*
790     * copy inline data from in-memory inode to on-disk inode:
791     * 128 byte slot granularity
792     */
793    if (test_cflag(COMMIT_Inlineea, ip)) {
794        lv = & dilinelock->lv[dilinelock->index];
795        lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
796        lv->length = 1;
797        memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE);
798        dilinelock->index++;
799
800        clear_cflag(COMMIT_Inlineea, ip);
801    }
802
803    /*
804     * lock/copy inode base: 128 byte slot granularity
805     */
806    lv = & dilinelock->lv[dilinelock->index];
807    lv->offset = dioffset >> L2INODESLOTSIZE;
808    copy_to_dinode(dp, ip);
809    if (test_and_clear_cflag(COMMIT_Dirtable, ip)) {
810        lv->length = 2;
811        memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
812    } else
813        lv->length = 1;
814    dilinelock->index++;
815
816    /* release the buffer holding the updated on-disk inode.
817     * the buffer will be later written by commit processing.
818     */
819    write_metapage(mp);
820
821    return (rc);
822}
823
824
825/*
826 * NAME: diFree(ip)
827 *
828 * FUNCTION: free a specified inode from the inode working map
829 * for a fileset or aggregate.
830 *
831 * if the inode to be freed represents the first (only)
832 * free inode within the iag, the iag will be placed on
833 * the ag free inode list.
834 *
835 * freeing the inode will cause the inode extent to be
836 * freed if the inode is the only allocated inode within
837 * the extent. in this case all the disk resource backing
838 * up the inode extent will be freed. in addition, the iag
839 * will be placed on the ag extent free list if the extent
840 * is the first free extent in the iag. if freeing the
841 * extent also means that no free inodes will exist for
842 * the iag, the iag will also be removed from the ag free
843 * inode list.
844 *
845 * the iag describing the inode will be freed if the extent
846 * is to be freed and it is the only backed extent within
847 * the iag. in this case, the iag will be removed from the
848 * ag free extent list and ag free inode list and placed on
849 * the inode map's free iag list.
850 *
851 * a careful update approach is used to provide consistency
852 * in the face of updates to multiple buffers. under this
853 * approach, all required buffers are obtained before making
854 * any updates and are held until all updates are complete.
855 *
856 * PARAMETERS:
857 * ip - inode to be freed.
858 *
859 * RETURN VALUES:
860 * 0 - success
861 * -EIO - i/o error.
862 */
863int diFree(struct inode *ip)
864{
865    int rc;
866    ino_t inum = ip->i_ino;
867    struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp;
868    struct metapage *mp, *amp, *bmp, *cmp, *dmp;
869    int iagno, ino, extno, bitno, sword, agno;
870    int back, fwd;
871    u32 bitmap, mask;
872    struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
873    struct inomap *imap = JFS_IP(ipimap)->i_imap;
874    pxd_t freepxd;
875    tid_t tid;
876    struct inode *iplist[3];
877    struct tlock *tlck;
878    struct pxd_lock *pxdlock;
879
880    /*
881     * This is just to suppress compiler warnings. The same logic that
882     * references these variables is used to initialize them.
883     */
884    aiagp = biagp = ciagp = diagp = NULL;
885
886    /* get the iag number containing the inode.
887     */
888    iagno = INOTOIAG(inum);
889
890    /* make sure that the iag is contained within
891     * the map.
892     */
893    if (iagno >= imap->im_nextiag) {
894        print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
895                   imap, 32, 0);
896        jfs_error(ip->i_sb,
897              "diFree: inum = %d, iagno = %d, nextiag = %d",
898              (uint) inum, iagno, imap->im_nextiag);
899        return -EIO;
900    }
901
902    /* get the allocation group for this ino.
903     */
904    agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
905
906    /* Lock the AG specific inode map information
907     */
908    AG_LOCK(imap, agno);
909
910    /* Obtain read lock in imap inode. Don't release it until we have
911     * read all of the IAG's that we are going to.
912     */
913    IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
914
915    /* read the iag.
916     */
917    if ((rc = diIAGRead(imap, iagno, &mp))) {
918        IREAD_UNLOCK(ipimap);
919        AG_UNLOCK(imap, agno);
920        return (rc);
921    }
922    iagp = (struct iag *) mp->data;
923
924    /* get the inode number and extent number of the inode within
925     * the iag and the inode number within the extent.
926     */
927    ino = inum & (INOSPERIAG - 1);
928    extno = ino >> L2INOSPEREXT;
929    bitno = ino & (INOSPEREXT - 1);
930    mask = HIGHORDER >> bitno;
931
932    if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
933        jfs_error(ip->i_sb,
934              "diFree: wmap shows inode already free");
935    }
936
937    if (!addressPXD(&iagp->inoext[extno])) {
938        release_metapage(mp);
939        IREAD_UNLOCK(ipimap);
940        AG_UNLOCK(imap, agno);
941        jfs_error(ip->i_sb, "diFree: invalid inoext");
942        return -EIO;
943    }
944
945    /* compute the bitmap for the extent reflecting the freed inode.
946     */
947    bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
948
949    if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
950        release_metapage(mp);
951        IREAD_UNLOCK(ipimap);
952        AG_UNLOCK(imap, agno);
953        jfs_error(ip->i_sb, "diFree: numfree > numinos");
954        return -EIO;
955    }
956    /*
957     * inode extent still has some inodes or below low water mark:
958     * keep the inode extent;
959     */
960    if (bitmap ||
961        imap->im_agctl[agno].numfree < 96 ||
962        (imap->im_agctl[agno].numfree < 288 &&
963         (((imap->im_agctl[agno].numfree * 100) /
964           imap->im_agctl[agno].numinos) <= 25))) {
965        /* if the iag currently has no free inodes (i.e.,
966         * the inode being freed is the first free inode of iag),
967         * insert the iag at head of the inode free list for the ag.
968         */
969        if (iagp->nfreeinos == 0) {
970            /* check if there are any iags on the ag inode
971             * free list. if so, read the first one so that
972             * we can link the current iag onto the list at
973             * the head.
974             */
975            if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
976                /* read the iag that currently is the head
977                 * of the list.
978                 */
979                if ((rc = diIAGRead(imap, fwd, &amp))) {
980                    IREAD_UNLOCK(ipimap);
981                    AG_UNLOCK(imap, agno);
982                    release_metapage(mp);
983                    return (rc);
984                }
985                aiagp = (struct iag *) amp->data;
986
987                /* make current head point back to the iag.
988                 */
989                aiagp->inofreeback = cpu_to_le32(iagno);
990
991                write_metapage(amp);
992            }
993
994            /* iag points forward to current head and iag
995             * becomes the new head of the list.
996             */
997            iagp->inofreefwd =
998                cpu_to_le32(imap->im_agctl[agno].inofree);
999            iagp->inofreeback = cpu_to_le32(-1);
1000            imap->im_agctl[agno].inofree = iagno;
1001        }
1002        IREAD_UNLOCK(ipimap);
1003
1004        /* update the free inode summary map for the extent if
1005         * freeing the inode means the extent will now have free
1006         * inodes (i.e., the inode being freed is the first free
1007         * inode of extent),
1008         */
1009        if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
1010            sword = extno >> L2EXTSPERSUM;
1011            bitno = extno & (EXTSPERSUM - 1);
1012            iagp->inosmap[sword] &=
1013                cpu_to_le32(~(HIGHORDER >> bitno));
1014        }
1015
1016        /* update the bitmap.
1017         */
1018        iagp->wmap[extno] = cpu_to_le32(bitmap);
1019
1020        /* update the free inode counts at the iag, ag and
1021         * map level.
1022         */
1023        le32_add_cpu(&iagp->nfreeinos, 1);
1024        imap->im_agctl[agno].numfree += 1;
1025        atomic_inc(&imap->im_numfree);
1026
1027        /* release the AG inode map lock
1028         */
1029        AG_UNLOCK(imap, agno);
1030
1031        /* write the iag */
1032        write_metapage(mp);
1033
1034        return (0);
1035    }
1036
1037
1038    /*
1039     * inode extent has become free and above low water mark:
1040     * free the inode extent;
1041     */
1042
1043    /*
1044     * prepare to update iag list(s) (careful update step 1)
1045     */
1046    amp = bmp = cmp = dmp = NULL;
1047    fwd = back = -1;
1048
1049    /* check if the iag currently has no free extents. if so,
1050     * it will be placed on the head of the ag extent free list.
1051     */
1052    if (iagp->nfreeexts == 0) {
1053        /* check if the ag extent free list has any iags.
1054         * if so, read the iag at the head of the list now.
1055         * this (head) iag will be updated later to reflect
1056         * the addition of the current iag at the head of
1057         * the list.
1058         */
1059        if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
1060            if ((rc = diIAGRead(imap, fwd, &amp)))
1061                goto error_out;
1062            aiagp = (struct iag *) amp->data;
1063        }
1064    } else {
1065        /* iag has free extents. check if the addition of a free
1066         * extent will cause all extents to be free within this
1067         * iag. if so, the iag will be removed from the ag extent
1068         * free list and placed on the inode map's free iag list.
1069         */
1070        if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1071            /* in preparation for removing the iag from the
1072             * ag extent free list, read the iags preceding
1073             * and following the iag on the ag extent free
1074             * list.
1075             */
1076            if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
1077                if ((rc = diIAGRead(imap, fwd, &amp)))
1078                    goto error_out;
1079                aiagp = (struct iag *) amp->data;
1080            }
1081
1082            if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
1083                if ((rc = diIAGRead(imap, back, &bmp)))
1084                    goto error_out;
1085                biagp = (struct iag *) bmp->data;
1086            }
1087        }
1088    }
1089
1090    /* remove the iag from the ag inode free list if freeing
1091     * this extent cause the iag to have no free inodes.
1092     */
1093    if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1094        int inofreeback = le32_to_cpu(iagp->inofreeback);
1095        int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1096
1097        /* in preparation for removing the iag from the
1098         * ag inode free list, read the iags preceding
1099         * and following the iag on the ag inode free
1100         * list. before reading these iags, we must make
1101         * sure that we already don't have them in hand
1102         * from up above, since re-reading an iag (buffer)
1103         * we are currently holding would cause a deadlock.
1104         */
1105        if (inofreefwd >= 0) {
1106
1107            if (inofreefwd == fwd)
1108                ciagp = (struct iag *) amp->data;
1109            else if (inofreefwd == back)
1110                ciagp = (struct iag *) bmp->data;
1111            else {
1112                if ((rc =
1113                     diIAGRead(imap, inofreefwd, &cmp)))
1114                    goto error_out;
1115                ciagp = (struct iag *) cmp->data;
1116            }
1117            assert(ciagp != NULL);
1118        }
1119
1120        if (inofreeback >= 0) {
1121            if (inofreeback == fwd)
1122                diagp = (struct iag *) amp->data;
1123            else if (inofreeback == back)
1124                diagp = (struct iag *) bmp->data;
1125            else {
1126                if ((rc =
1127                     diIAGRead(imap, inofreeback, &dmp)))
1128                    goto error_out;
1129                diagp = (struct iag *) dmp->data;
1130            }
1131            assert(diagp != NULL);
1132        }
1133    }
1134
1135    IREAD_UNLOCK(ipimap);
1136
1137    /*
1138     * invalidate any page of the inode extent freed from buffer cache;
1139     */
1140    freepxd = iagp->inoext[extno];
1141    invalidate_pxd_metapages(ip, freepxd);
1142
1143    /*
1144     * update iag list(s) (careful update step 2)
1145     */
1146    /* add the iag to the ag extent free list if this is the
1147     * first free extent for the iag.
1148     */
1149    if (iagp->nfreeexts == 0) {
1150        if (fwd >= 0)
1151            aiagp->extfreeback = cpu_to_le32(iagno);
1152
1153        iagp->extfreefwd =
1154            cpu_to_le32(imap->im_agctl[agno].extfree);
1155        iagp->extfreeback = cpu_to_le32(-1);
1156        imap->im_agctl[agno].extfree = iagno;
1157    } else {
1158        /* remove the iag from the ag extent list if all extents
1159         * are now free and place it on the inode map iag free list.
1160         */
1161        if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1162            if (fwd >= 0)
1163                aiagp->extfreeback = iagp->extfreeback;
1164
1165            if (back >= 0)
1166                biagp->extfreefwd = iagp->extfreefwd;
1167            else
1168                imap->im_agctl[agno].extfree =
1169                    le32_to_cpu(iagp->extfreefwd);
1170
1171            iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
1172
1173            IAGFREE_LOCK(imap);
1174            iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1175            imap->im_freeiag = iagno;
1176            IAGFREE_UNLOCK(imap);
1177        }
1178    }
1179
1180    /* remove the iag from the ag inode free list if freeing
1181     * this extent causes the iag to have no free inodes.
1182     */
1183    if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1184        if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
1185            ciagp->inofreeback = iagp->inofreeback;
1186
1187        if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
1188            diagp->inofreefwd = iagp->inofreefwd;
1189        else
1190            imap->im_agctl[agno].inofree =
1191                le32_to_cpu(iagp->inofreefwd);
1192
1193        iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
1194    }
1195
1196    /* update the inode extent address and working map
1197     * to reflect the free extent.
1198     * the permanent map should have been updated already
1199     * for the inode being freed.
1200     */
1201    if (iagp->pmap[extno] != 0) {
1202        jfs_error(ip->i_sb, "diFree: the pmap does not show inode free");
1203    }
1204    iagp->wmap[extno] = 0;
1205    PXDlength(&iagp->inoext[extno], 0);
1206    PXDaddress(&iagp->inoext[extno], 0);
1207
1208    /* update the free extent and free inode summary maps
1209     * to reflect the freed extent.
1210     * the inode summary map is marked to indicate no inodes
1211     * available for the freed extent.
1212     */
1213    sword = extno >> L2EXTSPERSUM;
1214    bitno = extno & (EXTSPERSUM - 1);
1215    mask = HIGHORDER >> bitno;
1216    iagp->inosmap[sword] |= cpu_to_le32(mask);
1217    iagp->extsmap[sword] &= cpu_to_le32(~mask);
1218
1219    /* update the number of free inodes and number of free extents
1220     * for the iag.
1221     */
1222    le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
1223    le32_add_cpu(&iagp->nfreeexts, 1);
1224
1225    /* update the number of free inodes and backed inodes
1226     * at the ag and inode map level.
1227     */
1228    imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
1229    imap->im_agctl[agno].numinos -= INOSPEREXT;
1230    atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
1231    atomic_sub(INOSPEREXT, &imap->im_numinos);
1232
1233    if (amp)
1234        write_metapage(amp);
1235    if (bmp)
1236        write_metapage(bmp);
1237    if (cmp)
1238        write_metapage(cmp);
1239    if (dmp)
1240        write_metapage(dmp);
1241
1242    /*
1243     * start transaction to update block allocation map
1244     * for the inode extent freed;
1245     *
1246     * N.B. AG_LOCK is released and iag will be released below, and
1247     * other thread may allocate inode from/reusing the ixad freed
1248     * BUT with new/different backing inode extent from the extent
1249     * to be freed by the transaction;
1250     */
1251    tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
1252    mutex_lock(&JFS_IP(ipimap)->commit_mutex);
1253
1254    /* acquire tlock of the iag page of the freed ixad
1255     * to force the page NOHOMEOK (even though no data is
1256     * logged from the iag page) until NOREDOPAGE|FREEXTENT log
1257     * for the free of the extent is committed;
1258     * write FREEXTENT|NOREDOPAGE log record
1259     * N.B. linelock is overlaid as freed extent descriptor;
1260     */
1261    tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
1262    pxdlock = (struct pxd_lock *) & tlck->lock;
1263    pxdlock->flag = mlckFREEPXD;
1264    pxdlock->pxd = freepxd;
1265    pxdlock->index = 1;
1266
1267    write_metapage(mp);
1268
1269    iplist[0] = ipimap;
1270
1271    /*
1272     * logredo needs the IAG number and IAG extent index in order
1273     * to ensure that the IMap is consistent. The least disruptive
1274     * way to pass these values through to the transaction manager
1275     * is in the iplist array.
1276     *
1277     * It's not pretty, but it works.
1278     */
1279    iplist[1] = (struct inode *) (size_t)iagno;
1280    iplist[2] = (struct inode *) (size_t)extno;
1281
1282    rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
1283
1284    txEnd(tid);
1285    mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
1286
1287    /* unlock the AG inode map information */
1288    AG_UNLOCK(imap, agno);
1289
1290    return (0);
1291
1292      error_out:
1293    IREAD_UNLOCK(ipimap);
1294
1295    if (amp)
1296        release_metapage(amp);
1297    if (bmp)
1298        release_metapage(bmp);
1299    if (cmp)
1300        release_metapage(cmp);
1301    if (dmp)
1302        release_metapage(dmp);
1303
1304    AG_UNLOCK(imap, agno);
1305
1306    release_metapage(mp);
1307
1308    return (rc);
1309}
1310
1311/*
1312 * There are several places in the diAlloc* routines where we initialize
1313 * the inode.
1314 */
1315static inline void
1316diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1317{
1318    struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1319
1320    ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1321    jfs_ip->ixpxd = iagp->inoext[extno];
1322    jfs_ip->agstart = le64_to_cpu(iagp->agstart);
1323    jfs_ip->active_ag = -1;
1324}
1325
1326
1327/*
1328 * NAME: diAlloc(pip,dir,ip)
1329 *
1330 * FUNCTION: allocate a disk inode from the inode working map
1331 * for a fileset or aggregate.
1332 *
1333 * PARAMETERS:
1334 * pip - pointer to incore inode for the parent inode.
1335 * dir - 'true' if the new disk inode is for a directory.
1336 * ip - pointer to a new inode
1337 *
1338 * RETURN VALUES:
1339 * 0 - success.
1340 * -ENOSPC - insufficient disk resources.
1341 * -EIO - i/o error.
1342 */
1343int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1344{
1345    int rc, ino, iagno, addext, extno, bitno, sword;
1346    int nwords, rem, i, agno;
1347    u32 mask, inosmap, extsmap;
1348    struct inode *ipimap;
1349    struct metapage *mp;
1350    ino_t inum;
1351    struct iag *iagp;
1352    struct inomap *imap;
1353
1354    /* get the pointers to the inode map inode and the
1355     * corresponding imap control structure.
1356     */
1357    ipimap = JFS_SBI(pip->i_sb)->ipimap;
1358    imap = JFS_IP(ipimap)->i_imap;
1359    JFS_IP(ip)->ipimap = ipimap;
1360    JFS_IP(ip)->fileset = FILESYSTEM_I;
1361
1362    /* for a directory, the allocation policy is to start
1363     * at the ag level using the preferred ag.
1364     */
1365    if (dir) {
1366        agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1367        AG_LOCK(imap, agno);
1368        goto tryag;
1369    }
1370
1371    /* for files, the policy starts off by trying to allocate from
1372     * the same iag containing the parent disk inode:
1373     * try to allocate the new disk inode close to the parent disk
1374     * inode, using parent disk inode number + 1 as the allocation
1375     * hint. (we use a left-to-right policy to attempt to avoid
1376     * moving backward on the disk.) compute the hint within the
1377     * file system and the iag.
1378     */
1379
1380    /* get the ag number of this iag */
1381    agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
1382
1383    if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
1384        /*
1385         * There is an open file actively growing. We want to
1386         * allocate new inodes from a different ag to avoid
1387         * fragmentation problems.
1388         */
1389        agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1390        AG_LOCK(imap, agno);
1391        goto tryag;
1392    }
1393
1394    inum = pip->i_ino + 1;
1395    ino = inum & (INOSPERIAG - 1);
1396
1397    /* back off the hint if it is outside of the iag */
1398    if (ino == 0)
1399        inum = pip->i_ino;
1400
1401    /* lock the AG inode map information */
1402    AG_LOCK(imap, agno);
1403
1404    /* Get read lock on imap inode */
1405    IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
1406
1407    /* get the iag number and read the iag */
1408    iagno = INOTOIAG(inum);
1409    if ((rc = diIAGRead(imap, iagno, &mp))) {
1410        IREAD_UNLOCK(ipimap);
1411        AG_UNLOCK(imap, agno);
1412        return (rc);
1413    }
1414    iagp = (struct iag *) mp->data;
1415
1416    /* determine if new inode extent is allowed to be added to the iag.
1417     * new inode extent can be added to the iag if the ag
1418     * has less than 32 free disk inodes and the iag has free extents.
1419     */
1420    addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
1421
1422    /*
1423     * try to allocate from the IAG
1424     */
1425    /* check if the inode may be allocated from the iag
1426     * (i.e. the inode has free inodes or new extent can be added).
1427     */
1428    if (iagp->nfreeinos || addext) {
1429        /* determine the extent number of the hint.
1430         */
1431        extno = ino >> L2INOSPEREXT;
1432
1433        /* check if the extent containing the hint has backed
1434         * inodes. if so, try to allocate within this extent.
1435         */
1436        if (addressPXD(&iagp->inoext[extno])) {
1437            bitno = ino & (INOSPEREXT - 1);
1438            if ((bitno =
1439                 diFindFree(le32_to_cpu(iagp->wmap[extno]),
1440                    bitno))
1441                < INOSPEREXT) {
1442                ino = (extno << L2INOSPEREXT) + bitno;
1443
1444                /* a free inode (bit) was found within this
1445                 * extent, so allocate it.
1446                 */
1447                rc = diAllocBit(imap, iagp, ino);
1448                IREAD_UNLOCK(ipimap);
1449                if (rc) {
1450                    assert(rc == -EIO);
1451                } else {
1452                    /* set the results of the allocation
1453                     * and write the iag.
1454                     */
1455                    diInitInode(ip, iagno, ino, extno,
1456                            iagp);
1457                    mark_metapage_dirty(mp);
1458                }
1459                release_metapage(mp);
1460
1461                /* free the AG lock and return.
1462                 */
1463                AG_UNLOCK(imap, agno);
1464                return (rc);
1465            }
1466
1467            if (!addext)
1468                extno =
1469                    (extno ==
1470                     EXTSPERIAG - 1) ? 0 : extno + 1;
1471        }
1472
1473        /*
1474         * no free inodes within the extent containing the hint.
1475         *
1476         * try to allocate from the backed extents following
1477         * hint or, if appropriate (i.e. addext is true), allocate
1478         * an extent of free inodes at or following the extent
1479         * containing the hint.
1480         *
1481         * the free inode and free extent summary maps are used
1482         * here, so determine the starting summary map position
1483         * and the number of words we'll have to examine. again,
1484         * the approach is to allocate following the hint, so we
1485         * might have to initially ignore prior bits of the summary
1486         * map that represent extents prior to the extent containing
1487         * the hint and later revisit these bits.
1488         */
1489        bitno = extno & (EXTSPERSUM - 1);
1490        nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
1491        sword = extno >> L2EXTSPERSUM;
1492
1493        /* mask any prior bits for the starting words of the
1494         * summary map.
1495         */
1496        mask = ONES << (EXTSPERSUM - bitno);
1497        inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
1498        extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
1499
1500        /* scan the free inode and free extent summary maps for
1501         * free resources.
1502         */
1503        for (i = 0; i < nwords; i++) {
1504            /* check if this word of the free inode summary
1505             * map describes an extent with free inodes.
1506             */
1507            if (~inosmap) {
1508                /* an extent with free inodes has been
1509                 * found. determine the extent number
1510                 * and the inode number within the extent.
1511                 */
1512                rem = diFindFree(inosmap, 0);
1513                extno = (sword << L2EXTSPERSUM) + rem;
1514                rem = diFindFree(le32_to_cpu(iagp->wmap[extno]),
1515                         0);
1516                if (rem >= INOSPEREXT) {
1517                    IREAD_UNLOCK(ipimap);
1518                    release_metapage(mp);
1519                    AG_UNLOCK(imap, agno);
1520                    jfs_error(ip->i_sb,
1521                          "diAlloc: can't find free bit "
1522                          "in wmap");
1523                    return -EIO;
1524                }
1525
1526                /* determine the inode number within the
1527                 * iag and allocate the inode from the
1528                 * map.
1529                 */
1530                ino = (extno << L2INOSPEREXT) + rem;
1531                rc = diAllocBit(imap, iagp, ino);
1532                IREAD_UNLOCK(ipimap);
1533                if (rc)
1534                    assert(rc == -EIO);
1535                else {
1536                    /* set the results of the allocation
1537                     * and write the iag.
1538                     */
1539                    diInitInode(ip, iagno, ino, extno,
1540                            iagp);
1541                    mark_metapage_dirty(mp);
1542                }
1543                release_metapage(mp);
1544
1545                /* free the AG lock and return.
1546                 */
1547                AG_UNLOCK(imap, agno);
1548                return (rc);
1549
1550            }
1551
1552            /* check if we may allocate an extent of free
1553             * inodes and whether this word of the free
1554             * extents summary map describes a free extent.
1555             */
1556            if (addext && ~extsmap) {
1557                /* a free extent has been found. determine
1558                 * the extent number.
1559                 */
1560                rem = diFindFree(extsmap, 0);
1561                extno = (sword << L2EXTSPERSUM) + rem;
1562
1563                /* allocate an extent of free inodes.
1564                 */
1565                if ((rc = diNewExt(imap, iagp, extno))) {
1566                    /* if there is no disk space for a
1567                     * new extent, try to allocate the
1568                     * disk inode from somewhere else.
1569                     */
1570                    if (rc == -ENOSPC)
1571                        break;
1572
1573                    assert(rc == -EIO);
1574                } else {
1575                    /* set the results of the allocation
1576                     * and write the iag.
1577                     */
1578                    diInitInode(ip, iagno,
1579                            extno << L2INOSPEREXT,
1580                            extno, iagp);
1581                    mark_metapage_dirty(mp);
1582                }
1583                release_metapage(mp);
1584                /* free the imap inode & the AG lock & return.
1585                 */
1586                IREAD_UNLOCK(ipimap);
1587                AG_UNLOCK(imap, agno);
1588                return (rc);
1589            }
1590
1591            /* move on to the next set of summary map words.
1592             */
1593            sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
1594            inosmap = le32_to_cpu(iagp->inosmap[sword]);
1595            extsmap = le32_to_cpu(iagp->extsmap[sword]);
1596        }
1597    }
1598    /* unlock imap inode */
1599    IREAD_UNLOCK(ipimap);
1600
1601    /* nothing doing in this iag, so release it. */
1602    release_metapage(mp);
1603
1604      tryag:
1605    /*
1606     * try to allocate anywhere within the same AG as the parent inode.
1607     */
1608    rc = diAllocAG(imap, agno, dir, ip);
1609
1610    AG_UNLOCK(imap, agno);
1611
1612    if (rc != -ENOSPC)
1613        return (rc);
1614
1615    /*
1616     * try to allocate in any AG.
1617     */
1618    return (diAllocAny(imap, agno, dir, ip));
1619}
1620
1621
1622/*
1623 * NAME: diAllocAG(imap,agno,dir,ip)
1624 *
1625 * FUNCTION: allocate a disk inode from the allocation group.
1626 *
1627 * this routine first determines if a new extent of free
1628 * inodes should be added for the allocation group, with
1629 * the current request satisfied from this extent. if this
1630 * is the case, an attempt will be made to do just that. if
1631 * this attempt fails or it has been determined that a new
1632 * extent should not be added, an attempt is made to satisfy
1633 * the request by allocating an existing (backed) free inode
1634 * from the allocation group.
1635 *
1636 * PRE CONDITION: Already have the AG lock for this AG.
1637 *
1638 * PARAMETERS:
1639 * imap - pointer to inode map control structure.
1640 * agno - allocation group to allocate from.
1641 * dir - 'true' if the new disk inode is for a directory.
1642 * ip - pointer to the new inode to be filled in on successful return
1643 * with the disk inode number allocated, its extent address
1644 * and the start of the ag.
1645 *
1646 * RETURN VALUES:
1647 * 0 - success.
1648 * -ENOSPC - insufficient disk resources.
1649 * -EIO - i/o error.
1650 */
1651static int
1652diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1653{
1654    int rc, addext, numfree, numinos;
1655
1656    /* get the number of free and the number of backed disk
1657     * inodes currently within the ag.
1658     */
1659    numfree = imap->im_agctl[agno].numfree;
1660    numinos = imap->im_agctl[agno].numinos;
1661
1662    if (numfree > numinos) {
1663        jfs_error(ip->i_sb, "diAllocAG: numfree > numinos");
1664        return -EIO;
1665    }
1666
1667    /* determine if we should allocate a new extent of free inodes
1668     * within the ag: for directory inodes, add a new extent
1669     * if there are a small number of free inodes or number of free
1670     * inodes is a small percentage of the number of backed inodes.
1671     */
1672    if (dir)
1673        addext = (numfree < 64 ||
1674              (numfree < 256
1675               && ((numfree * 100) / numinos) <= 20));
1676    else
1677        addext = (numfree == 0);
1678
1679    /*
1680     * try to allocate a new extent of free inodes.
1681     */
1682    if (addext) {
1683        /* if free space is not available for this new extent, try
1684         * below to allocate a free and existing (already backed)
1685         * inode from the ag.
1686         */
1687        if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC)
1688            return (rc);
1689    }
1690
1691    /*
1692     * try to allocate an existing free inode from the ag.
1693     */
1694    return (diAllocIno(imap, agno, ip));
1695}
1696
1697
1698/*
1699 * NAME: diAllocAny(imap,agno,dir,iap)
1700 *
1701 * FUNCTION: allocate a disk inode from any other allocation group.
1702 *
1703 * this routine is called when an allocation attempt within
1704 * the primary allocation group has failed. if attempts to
1705 * allocate an inode from any allocation group other than the
1706 * specified primary group.
1707 *
1708 * PARAMETERS:
1709 * imap - pointer to inode map control structure.
1710 * agno - primary allocation group (to avoid).
1711 * dir - 'true' if the new disk inode is for a directory.
1712 * ip - pointer to a new inode to be filled in on successful return
1713 * with the disk inode number allocated, its extent address
1714 * and the start of the ag.
1715 *
1716 * RETURN VALUES:
1717 * 0 - success.
1718 * -ENOSPC - insufficient disk resources.
1719 * -EIO - i/o error.
1720 */
1721static int
1722diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1723{
1724    int ag, rc;
1725    int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
1726
1727
1728    /* try to allocate from the ags following agno up to
1729     * the maximum ag number.
1730     */
1731    for (ag = agno + 1; ag <= maxag; ag++) {
1732        AG_LOCK(imap, ag);
1733
1734        rc = diAllocAG(imap, ag, dir, ip);
1735
1736        AG_UNLOCK(imap, ag);
1737
1738        if (rc != -ENOSPC)
1739            return (rc);
1740    }
1741
1742    /* try to allocate from the ags in front of agno.
1743     */
1744    for (ag = 0; ag < agno; ag++) {
1745        AG_LOCK(imap, ag);
1746
1747        rc = diAllocAG(imap, ag, dir, ip);
1748
1749        AG_UNLOCK(imap, ag);
1750
1751        if (rc != -ENOSPC)
1752            return (rc);
1753    }
1754
1755    /* no free disk inodes.
1756     */
1757    return -ENOSPC;
1758}
1759
1760
1761/*
1762 * NAME: diAllocIno(imap,agno,ip)
1763 *
1764 * FUNCTION: allocate a disk inode from the allocation group's free
1765 * inode list, returning an error if this free list is
1766 * empty (i.e. no iags on the list).
1767 *
1768 * allocation occurs from the first iag on the list using
1769 * the iag's free inode summary map to find the leftmost
1770 * free inode in the iag.
1771 *
1772 * PRE CONDITION: Already have AG lock for this AG.
1773 *
1774 * PARAMETERS:
1775 * imap - pointer to inode map control structure.
1776 * agno - allocation group.
1777 * ip - pointer to new inode to be filled in on successful return
1778 * with the disk inode number allocated, its extent address
1779 * and the start of the ag.
1780 *
1781 * RETURN VALUES:
1782 * 0 - success.
1783 * -ENOSPC - insufficient disk resources.
1784 * -EIO - i/o error.
1785 */
1786static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1787{
1788    int iagno, ino, rc, rem, extno, sword;
1789    struct metapage *mp;
1790    struct iag *iagp;
1791
1792    /* check if there are iags on the ag's free inode list.
1793     */
1794    if ((iagno = imap->im_agctl[agno].inofree) < 0)
1795        return -ENOSPC;
1796
1797    /* obtain read lock on imap inode */
1798    IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1799
1800    /* read the iag at the head of the list.
1801     */
1802    if ((rc = diIAGRead(imap, iagno, &mp))) {
1803        IREAD_UNLOCK(imap->im_ipimap);
1804        return (rc);
1805    }
1806    iagp = (struct iag *) mp->data;
1807
1808    /* better be free inodes in this iag if it is on the
1809     * list.
1810     */
1811    if (!iagp->nfreeinos) {
1812        IREAD_UNLOCK(imap->im_ipimap);
1813        release_metapage(mp);
1814        jfs_error(ip->i_sb,
1815              "diAllocIno: nfreeinos = 0, but iag on freelist");
1816        return -EIO;
1817    }
1818
1819    /* scan the free inode summary map to find an extent
1820     * with free inodes.
1821     */
1822    for (sword = 0;; sword++) {
1823        if (sword >= SMAPSZ) {
1824            IREAD_UNLOCK(imap->im_ipimap);
1825            release_metapage(mp);
1826            jfs_error(ip->i_sb,
1827                  "diAllocIno: free inode not found in summary map");
1828            return -EIO;
1829        }
1830
1831        if (~iagp->inosmap[sword])
1832            break;
1833    }
1834
1835    /* found a extent with free inodes. determine
1836     * the extent number.
1837     */
1838    rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
1839    if (rem >= EXTSPERSUM) {
1840        IREAD_UNLOCK(imap->im_ipimap);
1841        release_metapage(mp);
1842        jfs_error(ip->i_sb, "diAllocIno: no free extent found");
1843        return -EIO;
1844    }
1845    extno = (sword << L2EXTSPERSUM) + rem;
1846
1847    /* find the first free inode in the extent.
1848     */
1849    rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0);
1850    if (rem >= INOSPEREXT) {
1851        IREAD_UNLOCK(imap->im_ipimap);
1852        release_metapage(mp);
1853        jfs_error(ip->i_sb, "diAllocIno: free inode not found");
1854        return -EIO;
1855    }
1856
1857    /* compute the inode number within the iag.
1858     */
1859    ino = (extno << L2INOSPEREXT) + rem;
1860
1861    /* allocate the inode.
1862     */
1863    rc = diAllocBit(imap, iagp, ino);
1864    IREAD_UNLOCK(imap->im_ipimap);
1865    if (rc) {
1866        release_metapage(mp);
1867        return (rc);
1868    }
1869
1870    /* set the results of the allocation and write the iag.
1871     */
1872    diInitInode(ip, iagno, ino, extno, iagp);
1873    write_metapage(mp);
1874
1875    return (0);
1876}
1877
1878
1879/*
1880 * NAME: diAllocExt(imap,agno,ip)
1881 *
1882 * FUNCTION: add a new extent of free inodes to an iag, allocating
1883 * an inode from this extent to satisfy the current allocation
1884 * request.
1885 *
1886 * this routine first tries to find an existing iag with free
1887 * extents through the ag free extent list. if list is not
1888 * empty, the head of the list will be selected as the home
1889 * of the new extent of free inodes. otherwise (the list is
1890 * empty), a new iag will be allocated for the ag to contain
1891 * the extent.
1892 *
1893 * once an iag has been selected, the free extent summary map
1894 * is used to locate a free extent within the iag and diNewExt()
1895 * is called to initialize the extent, with initialization
1896 * including the allocation of the first inode of the extent
1897 * for the purpose of satisfying this request.
1898 *
1899 * PARAMETERS:
1900 * imap - pointer to inode map control structure.
1901 * agno - allocation group number.
1902 * ip - pointer to new inode to be filled in on successful return
1903 * with the disk inode number allocated, its extent address
1904 * and the start of the ag.
1905 *
1906 * RETURN VALUES:
1907 * 0 - success.
1908 * -ENOSPC - insufficient disk resources.
1909 * -EIO - i/o error.
1910 */
1911static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1912{
1913    int rem, iagno, sword, extno, rc;
1914    struct metapage *mp;
1915    struct iag *iagp;
1916
1917    /* check if the ag has any iags with free extents. if not,
1918     * allocate a new iag for the ag.
1919     */
1920    if ((iagno = imap->im_agctl[agno].extfree) < 0) {
1921        /* If successful, diNewIAG will obtain the read lock on the
1922         * imap inode.
1923         */
1924        if ((rc = diNewIAG(imap, &iagno, agno, &mp))) {
1925            return (rc);
1926        }
1927        iagp = (struct iag *) mp->data;
1928
1929        /* set the ag number if this a brand new iag
1930         */
1931        iagp->agstart =
1932            cpu_to_le64(AGTOBLK(agno, imap->im_ipimap));
1933    } else {
1934        /* read the iag.
1935         */
1936        IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1937        if ((rc = diIAGRead(imap, iagno, &mp))) {
1938            IREAD_UNLOCK(imap->im_ipimap);
1939            jfs_error(ip->i_sb, "diAllocExt: error reading iag");
1940            return rc;
1941        }
1942        iagp = (struct iag *) mp->data;
1943    }
1944
1945    /* using the free extent summary map, find a free extent.
1946     */
1947    for (sword = 0;; sword++) {
1948        if (sword >= SMAPSZ) {
1949            release_metapage(mp);
1950            IREAD_UNLOCK(imap->im_ipimap);
1951            jfs_error(ip->i_sb,
1952                  "diAllocExt: free ext summary map not found");
1953            return -EIO;
1954        }
1955        if (~iagp->extsmap[sword])
1956            break;
1957    }
1958
1959    /* determine the extent number of the free extent.
1960     */
1961    rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0);
1962    if (rem >= EXTSPERSUM) {
1963        release_metapage(mp);
1964        IREAD_UNLOCK(imap->im_ipimap);
1965        jfs_error(ip->i_sb, "diAllocExt: free extent not found");
1966        return -EIO;
1967    }
1968    extno = (sword << L2EXTSPERSUM) + rem;
1969
1970    /* initialize the new extent.
1971     */
1972    rc = diNewExt(imap, iagp, extno);
1973    IREAD_UNLOCK(imap->im_ipimap);
1974    if (rc) {
1975        /* something bad happened. if a new iag was allocated,
1976         * place it back on the inode map's iag free list, and
1977         * clear the ag number information.
1978         */
1979        if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
1980            IAGFREE_LOCK(imap);
1981            iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1982            imap->im_freeiag = iagno;
1983            IAGFREE_UNLOCK(imap);
1984        }
1985        write_metapage(mp);
1986        return (rc);
1987    }
1988
1989    /* set the results of the allocation and write the iag.
1990     */
1991    diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp);
1992
1993    write_metapage(mp);
1994
1995    return (0);
1996}
1997
1998
1999/*
2000 * NAME: diAllocBit(imap,iagp,ino)
2001 *
2002 * FUNCTION: allocate a backed inode from an iag.
2003 *
2004 * this routine performs the mechanics of allocating a
2005 * specified inode from a backed extent.
2006 *
2007 * if the inode to be allocated represents the last free
2008 * inode within the iag, the iag will be removed from the
2009 * ag free inode list.
2010 *
2011 * a careful update approach is used to provide consistency
2012 * in the face of updates to multiple buffers. under this
2013 * approach, all required buffers are obtained before making
2014 * any updates and are held all are updates are complete.
2015 *
2016 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on
2017 * this AG. Must have read lock on imap inode.
2018 *
2019 * PARAMETERS:
2020 * imap - pointer to inode map control structure.
2021 * iagp - pointer to iag.
2022 * ino - inode number to be allocated within the iag.
2023 *
2024 * RETURN VALUES:
2025 * 0 - success.
2026 * -ENOSPC - insufficient disk resources.
2027 * -EIO - i/o error.
2028 */
2029static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2030{
2031    int extno, bitno, agno, sword, rc;
2032    struct metapage *amp = NULL, *bmp = NULL;
2033    struct iag *aiagp = NULL, *biagp = NULL;
2034    u32 mask;
2035
2036    /* check if this is the last free inode within the iag.
2037     * if so, it will have to be removed from the ag free
2038     * inode list, so get the iags preceding and following
2039     * it on the list.
2040     */
2041    if (iagp->nfreeinos == cpu_to_le32(1)) {
2042        if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) {
2043            if ((rc =
2044                 diIAGRead(imap, le32_to_cpu(iagp->inofreefwd),
2045                       &amp)))
2046                return (rc);
2047            aiagp = (struct iag *) amp->data;
2048        }
2049
2050        if ((int) le32_to_cpu(iagp->inofreeback) >= 0) {
2051            if ((rc =
2052                 diIAGRead(imap,
2053                       le32_to_cpu(iagp->inofreeback),
2054                       &bmp))) {
2055                if (amp)
2056                    release_metapage(amp);
2057                return (rc);
2058            }
2059            biagp = (struct iag *) bmp->data;
2060        }
2061    }
2062
2063    /* get the ag number, extent number, inode number within
2064     * the extent.
2065     */
2066    agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb));
2067    extno = ino >> L2INOSPEREXT;
2068    bitno = ino & (INOSPEREXT - 1);
2069
2070    /* compute the mask for setting the map.
2071     */
2072    mask = HIGHORDER >> bitno;
2073
2074    /* the inode should be free and backed.
2075     */
2076    if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) ||
2077        ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) ||
2078        (addressPXD(&iagp->inoext[extno]) == 0)) {
2079        if (amp)
2080            release_metapage(amp);
2081        if (bmp)
2082            release_metapage(bmp);
2083
2084        jfs_error(imap->im_ipimap->i_sb,
2085              "diAllocBit: iag inconsistent");
2086        return -EIO;
2087    }
2088
2089    /* mark the inode as allocated in the working map.
2090     */
2091    iagp->wmap[extno] |= cpu_to_le32(mask);
2092
2093    /* check if all inodes within the extent are now
2094     * allocated. if so, update the free inode summary
2095     * map to reflect this.
2096     */
2097    if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
2098        sword = extno >> L2EXTSPERSUM;
2099        bitno = extno & (EXTSPERSUM - 1);
2100        iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno);
2101    }
2102
2103    /* if this was the last free inode in the iag, remove the
2104     * iag from the ag free inode list.
2105     */
2106    if (iagp->nfreeinos == cpu_to_le32(1)) {
2107        if (amp) {
2108            aiagp->inofreeback = iagp->inofreeback;
2109            write_metapage(amp);
2110        }
2111
2112        if (bmp) {
2113            biagp->inofreefwd = iagp->inofreefwd;
2114            write_metapage(bmp);
2115        } else {
2116            imap->im_agctl[agno].inofree =
2117                le32_to_cpu(iagp->inofreefwd);
2118        }
2119        iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2120    }
2121
2122    /* update the free inode count at the iag, ag, inode
2123     * map levels.
2124     */
2125    le32_add_cpu(&iagp->nfreeinos, -1);
2126    imap->im_agctl[agno].numfree -= 1;
2127    atomic_dec(&imap->im_numfree);
2128
2129    return (0);
2130}
2131
2132
2133/*
2134 * NAME: diNewExt(imap,iagp,extno)
2135 *
2136 * FUNCTION: initialize a new extent of inodes for an iag, allocating
2137 * the first inode of the extent for use for the current
2138 * allocation request.
2139 *
2140 * disk resources are allocated for the new extent of inodes
2141 * and the inodes themselves are initialized to reflect their
2142 * existence within the extent (i.e. their inode numbers and
2143 * inode extent addresses are set) and their initial state
2144 * (mode and link count are set to zero).
2145 *
2146 * if the iag is new, it is not yet on an ag extent free list
2147 * but will now be placed on this list.
2148 *
2149 * if the allocation of the new extent causes the iag to
2150 * have no free extent, the iag will be removed from the
2151 * ag extent free list.
2152 *
2153 * if the iag has no free backed inodes, it will be placed
2154 * on the ag free inode list, since the addition of the new
2155 * extent will now cause it to have free inodes.
2156 *
2157 * a careful update approach is used to provide consistency
2158 * (i.e. list consistency) in the face of updates to multiple
2159 * buffers. under this approach, all required buffers are
2160 * obtained before making any updates and are held until all
2161 * updates are complete.
2162 *
2163 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on
2164 * this AG. Must have read lock on imap inode.
2165 *
2166 * PARAMETERS:
2167 * imap - pointer to inode map control structure.
2168 * iagp - pointer to iag.
2169 * extno - extent number.
2170 *
2171 * RETURN VALUES:
2172 * 0 - success.
2173 * -ENOSPC - insufficient disk resources.
2174 * -EIO - i/o error.
2175 */
2176static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2177{
2178    int agno, iagno, fwd, back, freei = 0, sword, rc;
2179    struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL;
2180    struct metapage *amp, *bmp, *cmp, *dmp;
2181    struct inode *ipimap;
2182    s64 blkno, hint;
2183    int i, j;
2184    u32 mask;
2185    ino_t ino;
2186    struct dinode *dp;
2187    struct jfs_sb_info *sbi;
2188
2189    /* better have free extents.
2190     */
2191    if (!iagp->nfreeexts) {
2192        jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents");
2193        return -EIO;
2194    }
2195
2196    /* get the inode map inode.
2197     */
2198    ipimap = imap->im_ipimap;
2199    sbi = JFS_SBI(ipimap->i_sb);
2200
2201    amp = bmp = cmp = NULL;
2202
2203    /* get the ag and iag numbers for this iag.
2204     */
2205    agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
2206    iagno = le32_to_cpu(iagp->iagnum);
2207
2208    /* check if this is the last free extent within the
2209     * iag. if so, the iag must be removed from the ag
2210     * free extent list, so get the iags preceding and
2211     * following the iag on this list.
2212     */
2213    if (iagp->nfreeexts == cpu_to_le32(1)) {
2214        if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
2215            if ((rc = diIAGRead(imap, fwd, &amp)))
2216                return (rc);
2217            aiagp = (struct iag *) amp->data;
2218        }
2219
2220        if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
2221            if ((rc = diIAGRead(imap, back, &bmp)))
2222                goto error_out;
2223            biagp = (struct iag *) bmp->data;
2224        }
2225    } else {
2226        /* the iag has free extents. if all extents are free
2227         * (as is the case for a newly allocated iag), the iag
2228         * must be added to the ag free extent list, so get
2229         * the iag at the head of the list in preparation for
2230         * adding this iag to this list.
2231         */
2232        fwd = back = -1;
2233        if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2234            if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
2235                if ((rc = diIAGRead(imap, fwd, &amp)))
2236                    goto error_out;
2237                aiagp = (struct iag *) amp->data;
2238            }
2239        }
2240    }
2241
2242    /* check if the iag has no free inodes. if so, the iag
2243     * will have to be added to the ag free inode list, so get
2244     * the iag at the head of the list in preparation for
2245     * adding this iag to this list. in doing this, we must
2246     * check if we already have the iag at the head of
2247     * the list in hand.
2248     */
2249    if (iagp->nfreeinos == 0) {
2250        freei = imap->im_agctl[agno].inofree;
2251
2252        if (freei >= 0) {
2253            if (freei == fwd) {
2254                ciagp = aiagp;
2255            } else if (freei == back) {
2256                ciagp = biagp;
2257            } else {
2258                if ((rc = diIAGRead(imap, freei, &cmp)))
2259                    goto error_out;
2260                ciagp = (struct iag *) cmp->data;
2261            }
2262            if (ciagp == NULL) {
2263                jfs_error(imap->im_ipimap->i_sb,
2264                      "diNewExt: ciagp == NULL");
2265                rc = -EIO;
2266                goto error_out;
2267            }
2268        }
2269    }
2270
2271    /* allocate disk space for the inode extent.
2272     */
2273    if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0))
2274        hint = ((s64) agno << sbi->bmap->db_agl2size) - 1;
2275    else
2276        hint = addressPXD(&iagp->inoext[extno - 1]) +
2277            lengthPXD(&iagp->inoext[extno - 1]) - 1;
2278
2279    if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno)))
2280        goto error_out;
2281
2282    /* compute the inode number of the first inode within the
2283     * extent.
2284     */
2285    ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT);
2286
2287    /* initialize the inodes within the newly allocated extent a
2288     * page at a time.
2289     */
2290    for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) {
2291        /* get a buffer for this page of disk inodes.
2292         */
2293        dmp = get_metapage(ipimap, blkno + i, PSIZE, 1);
2294        if (dmp == NULL) {
2295            rc = -EIO;
2296            goto error_out;
2297        }
2298        dp = (struct dinode *) dmp->data;
2299
2300        /* initialize the inode number, mode, link count and
2301         * inode extent address.
2302         */
2303        for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) {
2304            dp->di_inostamp = cpu_to_le32(sbi->inostamp);
2305            dp->di_number = cpu_to_le32(ino);
2306            dp->di_fileset = cpu_to_le32(FILESYSTEM_I);
2307            dp->di_mode = 0;
2308            dp->di_nlink = 0;
2309            PXDaddress(&(dp->di_ixpxd), blkno);
2310            PXDlength(&(dp->di_ixpxd), imap->im_nbperiext);
2311        }
2312        write_metapage(dmp);
2313    }
2314
2315    /* if this is the last free extent within the iag, remove the
2316     * iag from the ag free extent list.
2317     */
2318    if (iagp->nfreeexts == cpu_to_le32(1)) {
2319        if (fwd >= 0)
2320            aiagp->extfreeback = iagp->extfreeback;
2321
2322        if (back >= 0)
2323            biagp->extfreefwd = iagp->extfreefwd;
2324        else
2325            imap->im_agctl[agno].extfree =
2326                le32_to_cpu(iagp->extfreefwd);
2327
2328        iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2329    } else {
2330        /* if the iag has all free extents (newly allocated iag),
2331         * add the iag to the ag free extent list.
2332         */
2333        if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2334            if (fwd >= 0)
2335                aiagp->extfreeback = cpu_to_le32(iagno);
2336
2337            iagp->extfreefwd = cpu_to_le32(fwd);
2338            iagp->extfreeback = cpu_to_le32(-1);
2339            imap->im_agctl[agno].extfree = iagno;
2340        }
2341    }
2342
2343    /* if the iag has no free inodes, add the iag to the
2344     * ag free inode list.
2345     */
2346    if (iagp->nfreeinos == 0) {
2347        if (freei >= 0)
2348            ciagp->inofreeback = cpu_to_le32(iagno);
2349
2350        iagp->inofreefwd =
2351            cpu_to_le32(imap->im_agctl[agno].inofree);
2352        iagp->inofreeback = cpu_to_le32(-1);
2353        imap->im_agctl[agno].inofree = iagno;
2354    }
2355
2356    /* initialize the extent descriptor of the extent. */
2357    PXDlength(&iagp->inoext[extno], imap->im_nbperiext);
2358    PXDaddress(&iagp->inoext[extno], blkno);
2359
2360    /* initialize the working and persistent map of the extent.
2361     * the working map will be initialized such that
2362     * it indicates the first inode of the extent is allocated.
2363     */
2364    iagp->wmap[extno] = cpu_to_le32(HIGHORDER);
2365    iagp->pmap[extno] = 0;
2366
2367    /* update the free inode and free extent summary maps
2368     * for the extent to indicate the extent has free inodes
2369     * and no longer represents a free extent.
2370     */
2371    sword = extno >> L2EXTSPERSUM;
2372    mask = HIGHORDER >> (extno & (EXTSPERSUM - 1));
2373    iagp->extsmap[sword] |= cpu_to_le32(mask);
2374    iagp->inosmap[sword] &= cpu_to_le32(~mask);
2375
2376    /* update the free inode and free extent counts for the
2377     * iag.
2378     */
2379    le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
2380    le32_add_cpu(&iagp->nfreeexts, -1);
2381
2382    /* update the free and backed inode counts for the ag.
2383     */
2384    imap->im_agctl[agno].numfree += (INOSPEREXT - 1);
2385    imap->im_agctl[agno].numinos += INOSPEREXT;
2386
2387    /* update the free and backed inode counts for the inode map.
2388     */
2389    atomic_add(INOSPEREXT - 1, &imap->im_numfree);
2390    atomic_add(INOSPEREXT, &imap->im_numinos);
2391
2392    /* write the iags.
2393     */
2394    if (amp)
2395        write_metapage(amp);
2396    if (bmp)
2397        write_metapage(bmp);
2398    if (cmp)
2399        write_metapage(cmp);
2400
2401    return (0);
2402
2403      error_out:
2404
2405    /* release the iags.
2406     */
2407    if (amp)
2408        release_metapage(amp);
2409    if (bmp)
2410        release_metapage(bmp);
2411    if (cmp)
2412        release_metapage(cmp);
2413
2414    return (rc);
2415}
2416
2417
2418/*
2419 * NAME: diNewIAG(imap,iagnop,agno)
2420 *
2421 * FUNCTION: allocate a new iag for an allocation group.
2422 *
2423 * first tries to allocate the iag from the inode map
2424 * iagfree list:
2425 * if the list has free iags, the head of the list is removed
2426 * and returned to satisfy the request.
2427 * if the inode map's iag free list is empty, the inode map
2428 * is extended to hold a new iag. this new iag is initialized
2429 * and returned to satisfy the request.
2430 *
2431 * PARAMETERS:
2432 * imap - pointer to inode map control structure.
2433 * iagnop - pointer to an iag number set with the number of the
2434 * newly allocated iag upon successful return.
2435 * agno - allocation group number.
2436 * bpp - Buffer pointer to be filled in with new IAG's buffer
2437 *
2438 * RETURN VALUES:
2439 * 0 - success.
2440 * -ENOSPC - insufficient disk resources.
2441 * -EIO - i/o error.
2442 *
2443 * serialization:
2444 * AG lock held on entry/exit;
2445 * write lock on the map is held inside;
2446 * read lock on the map is held on successful completion;
2447 *
2448 * note: new iag transaction:
2449 * . synchronously write iag;
2450 * . write log of xtree and inode of imap;
2451 * . commit;
2452 * . synchronous write of xtree (right to left, bottom to top);
2453 * . at start of logredo(): init in-memory imap with one additional iag page;
2454 * . at end of logredo(): re-read imap inode to determine
2455 * new imap size;
2456 */
2457static int
2458diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2459{
2460    int rc;
2461    int iagno, i, xlen;
2462    struct inode *ipimap;
2463    struct super_block *sb;
2464    struct jfs_sb_info *sbi;
2465    struct metapage *mp;
2466    struct iag *iagp;
2467    s64 xaddr = 0;
2468    s64 blkno;
2469    tid_t tid;
2470    struct inode *iplist[1];
2471
2472    /* pick up pointers to the inode map and mount inodes */
2473    ipimap = imap->im_ipimap;
2474    sb = ipimap->i_sb;
2475    sbi = JFS_SBI(sb);
2476
2477    /* acquire the free iag lock */
2478    IAGFREE_LOCK(imap);
2479
2480    /* if there are any iags on the inode map free iag list,
2481     * allocate the iag from the head of the list.
2482     */
2483    if (imap->im_freeiag >= 0) {
2484        /* pick up the iag number at the head of the list */
2485        iagno = imap->im_freeiag;
2486
2487        /* determine the logical block number of the iag */
2488        blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2489    } else {
2490        /* no free iags. the inode map will have to be extented
2491         * to include a new iag.
2492         */
2493
2494        /* acquire inode map lock */
2495        IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
2496
2497        if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
2498            IWRITE_UNLOCK(ipimap);
2499            IAGFREE_UNLOCK(imap);
2500            jfs_error(imap->im_ipimap->i_sb,
2501                  "diNewIAG: ipimap->i_size is wrong");
2502            return -EIO;
2503        }
2504
2505
2506        /* get the next available iag number */
2507        iagno = imap->im_nextiag;
2508
2509        /* make sure that we have not exceeded the maximum inode
2510         * number limit.
2511         */
2512        if (iagno > (MAXIAGS - 1)) {
2513            /* release the inode map lock */
2514            IWRITE_UNLOCK(ipimap);
2515
2516            rc = -ENOSPC;
2517            goto out;
2518        }
2519
2520        /*
2521         * synchronously append new iag page.
2522         */
2523        /* determine the logical address of iag page to append */
2524        blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2525
2526        /* Allocate extent for new iag page */
2527        xlen = sbi->nbperpage;
2528        if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) {
2529            /* release the inode map lock */
2530            IWRITE_UNLOCK(ipimap);
2531
2532            goto out;
2533        }
2534
2535        /*
2536         * start transaction of update of the inode map
2537         * addressing structure pointing to the new iag page;
2538         */
2539        tid = txBegin(sb, COMMIT_FORCE);
2540        mutex_lock(&JFS_IP(ipimap)->commit_mutex);
2541
2542        /* update the inode map addressing structure to point to it */
2543        if ((rc =
2544             xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
2545            txEnd(tid);
2546            mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2547            /* Free the blocks allocated for the iag since it was
2548             * not successfully added to the inode map
2549             */
2550            dbFree(ipimap, xaddr, (s64) xlen);
2551
2552            /* release the inode map lock */
2553            IWRITE_UNLOCK(ipimap);
2554
2555            goto out;
2556        }
2557
2558        /* update the inode map's inode to reflect the extension */
2559        ipimap->i_size += PSIZE;
2560        inode_add_bytes(ipimap, PSIZE);
2561
2562        /* assign a buffer for the page */
2563        mp = get_metapage(ipimap, blkno, PSIZE, 0);
2564        if (!mp) {
2565            /*
2566             * This is very unlikely since we just created the
2567             * extent, but let's try to handle it correctly
2568             */
2569            xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
2570                   COMMIT_PWMAP);
2571
2572            txAbort(tid, 0);
2573            txEnd(tid);
2574            mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2575
2576            /* release the inode map lock */
2577            IWRITE_UNLOCK(ipimap);
2578
2579            rc = -EIO;
2580            goto out;
2581        }
2582        iagp = (struct iag *) mp->data;
2583
2584        /* init the iag */
2585        memset(iagp, 0, sizeof(struct iag));
2586        iagp->iagnum = cpu_to_le32(iagno);
2587        iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2588        iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2589        iagp->iagfree = cpu_to_le32(-1);
2590        iagp->nfreeinos = 0;
2591        iagp->nfreeexts = cpu_to_le32(EXTSPERIAG);
2592
2593        /* initialize the free inode summary map (free extent
2594         * summary map initialization handled by bzero).
2595         */
2596        for (i = 0; i < SMAPSZ; i++)
2597            iagp->inosmap[i] = cpu_to_le32(ONES);
2598
2599        /*
2600         * Write and sync the metapage
2601         */
2602        flush_metapage(mp);
2603
2604        /*
2605         * txCommit(COMMIT_FORCE) will synchronously write address
2606         * index pages and inode after commit in careful update order
2607         * of address index pages (right to left, bottom up);
2608         */
2609        iplist[0] = ipimap;
2610        rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
2611
2612        txEnd(tid);
2613        mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2614
2615        duplicateIXtree(sb, blkno, xlen, &xaddr);
2616
2617        /* update the next available iag number */
2618        imap->im_nextiag += 1;
2619
2620        /* Add the iag to the iag free list so we don't lose the iag
2621         * if a failure happens now.
2622         */
2623        imap->im_freeiag = iagno;
2624
2625        /* Until we have logredo working, we want the imap inode &
2626         * control page to be up to date.
2627         */
2628        diSync(ipimap);
2629
2630        /* release the inode map lock */
2631        IWRITE_UNLOCK(ipimap);
2632    }
2633
2634    /* obtain read lock on map */
2635    IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2636
2637    /* read the iag */
2638    if ((rc = diIAGRead(imap, iagno, &mp))) {
2639        IREAD_UNLOCK(ipimap);
2640        rc = -EIO;
2641        goto out;
2642    }
2643    iagp = (struct iag *) mp->data;
2644
2645    /* remove the iag from the iag free list */
2646    imap->im_freeiag = le32_to_cpu(iagp->iagfree);
2647    iagp->iagfree = cpu_to_le32(-1);
2648
2649    /* set the return iag number and buffer pointer */
2650    *iagnop = iagno;
2651    *mpp = mp;
2652
2653      out:
2654    /* release the iag free lock */
2655    IAGFREE_UNLOCK(imap);
2656
2657    return (rc);
2658}
2659
2660/*
2661 * NAME: diIAGRead()
2662 *
2663 * FUNCTION: get the buffer for the specified iag within a fileset
2664 * or aggregate inode map.
2665 *
2666 * PARAMETERS:
2667 * imap - pointer to inode map control structure.
2668 * iagno - iag number.
2669 * bpp - point to buffer pointer to be filled in on successful
2670 * exit.
2671 *
2672 * SERIALIZATION:
2673 * must have read lock on imap inode
2674 * (When called by diExtendFS, the filesystem is quiesced, therefore
2675 * the read lock is unnecessary.)
2676 *
2677 * RETURN VALUES:
2678 * 0 - success.
2679 * -EIO - i/o error.
2680 */
2681static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2682{
2683    struct inode *ipimap = imap->im_ipimap;
2684    s64 blkno;
2685
2686    /* compute the logical block number of the iag. */
2687    blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage);
2688
2689    /* read the iag. */
2690    *mpp = read_metapage(ipimap, blkno, PSIZE, 0);
2691    if (*mpp == NULL) {
2692        return -EIO;
2693    }
2694
2695    return (0);
2696}
2697
2698/*
2699 * NAME: diFindFree()
2700 *
2701 * FUNCTION: find the first free bit in a word starting at
2702 * the specified bit position.
2703 *
2704 * PARAMETERS:
2705 * word - word to be examined.
2706 * start - starting bit position.
2707 *
2708 * RETURN VALUES:
2709 * bit position of first free bit in the word or 32 if
2710 * no free bits were found.
2711 */
2712static int diFindFree(u32 word, int start)
2713{
2714    int bitno;
2715    assert(start < 32);
2716    /* scan the word for the first free bit. */
2717    for (word <<= start, bitno = start; bitno < 32;
2718         bitno++, word <<= 1) {
2719        if ((word & HIGHORDER) == 0)
2720            break;
2721    }
2722    return (bitno);
2723}
2724
2725/*
2726 * NAME: diUpdatePMap()
2727 *
2728 * FUNCTION: Update the persistent map in an IAG for the allocation or
2729 * freeing of the specified inode.
2730 *
2731 * PRE CONDITIONS: Working map has already been updated for allocate.
2732 *
2733 * PARAMETERS:
2734 * ipimap - Incore inode map inode
2735 * inum - Number of inode to mark in permanent map
2736 * is_free - If 'true' indicates inode should be marked freed, otherwise
2737 * indicates inode should be marked allocated.
2738 *
2739 * RETURN VALUES:
2740 * 0 for success
2741 */
2742int
2743diUpdatePMap(struct inode *ipimap,
2744         unsigned long inum, bool is_free, struct tblock * tblk)
2745{
2746    int rc;
2747    struct iag *iagp;
2748    struct metapage *mp;
2749    int iagno, ino, extno, bitno;
2750    struct inomap *imap;
2751    u32 mask;
2752    struct jfs_log *log;
2753    int lsn, difft, diffp;
2754    unsigned long flags;
2755
2756    imap = JFS_IP(ipimap)->i_imap;
2757    /* get the iag number containing the inode */
2758    iagno = INOTOIAG(inum);
2759    /* make sure that the iag is contained within the map */
2760    if (iagno >= imap->im_nextiag) {
2761        jfs_error(ipimap->i_sb,
2762              "diUpdatePMap: the iag is outside the map");
2763        return -EIO;
2764    }
2765    /* read the iag */
2766    IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2767    rc = diIAGRead(imap, iagno, &mp);
2768    IREAD_UNLOCK(ipimap);
2769    if (rc)
2770        return (rc);
2771    metapage_wait_for_io(mp);
2772    iagp = (struct iag *) mp->data;
2773    /* get the inode number and extent number of the inode within
2774     * the iag and the inode number within the extent.
2775     */
2776    ino = inum & (INOSPERIAG - 1);
2777    extno = ino >> L2INOSPEREXT;
2778    bitno = ino & (INOSPEREXT - 1);
2779    mask = HIGHORDER >> bitno;
2780    /*
2781     * mark the inode free in persistent map:
2782     */
2783    if (is_free) {
2784        /* The inode should have been allocated both in working
2785         * map and in persistent map;
2786         * the inode will be freed from working map at the release
2787         * of last reference release;
2788         */
2789        if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2790            jfs_error(ipimap->i_sb,
2791                  "diUpdatePMap: inode %ld not marked as "
2792                  "allocated in wmap!", inum);
2793        }
2794        if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) {
2795            jfs_error(ipimap->i_sb,
2796                  "diUpdatePMap: inode %ld not marked as "
2797                  "allocated in pmap!", inum);
2798        }
2799        /* update the bitmap for the extent of the freed inode */
2800        iagp->pmap[extno] &= cpu_to_le32(~mask);
2801    }
2802    /*
2803     * mark the inode allocated in persistent map:
2804     */
2805    else {
2806        /* The inode should be already allocated in the working map
2807         * and should be free in persistent map;
2808         */
2809        if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2810            release_metapage(mp);
2811            jfs_error(ipimap->i_sb,
2812                  "diUpdatePMap: the inode is not allocated in "
2813                  "the working map");
2814            return -EIO;
2815        }
2816        if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) {
2817            release_metapage(mp);
2818            jfs_error(ipimap->i_sb,
2819                  "diUpdatePMap: the inode is not free in the "
2820                  "persistent map");
2821            return -EIO;
2822        }
2823        /* update the bitmap for the extent of the allocated inode */
2824        iagp->pmap[extno] |= cpu_to_le32(mask);
2825    }
2826    /*
2827     * update iag lsn
2828     */
2829    lsn = tblk->lsn;
2830    log = JFS_SBI(tblk->sb)->log;
2831    LOGSYNC_LOCK(log, flags);
2832    if (mp->lsn != 0) {
2833        /* inherit older/smaller lsn */
2834        logdiff(difft, lsn, log);
2835        logdiff(diffp, mp->lsn, log);
2836        if (difft < diffp) {
2837            mp->lsn = lsn;
2838            /* move mp after tblock in logsync list */
2839            list_move(&mp->synclist, &tblk->synclist);
2840        }
2841        /* inherit younger/larger clsn */
2842        assert(mp->clsn);
2843        logdiff(difft, tblk->clsn, log);
2844        logdiff(diffp, mp->clsn, log);
2845        if (difft > diffp)
2846            mp->clsn = tblk->clsn;
2847    } else {
2848        mp->log = log;
2849        mp->lsn = lsn;
2850        /* insert mp after tblock in logsync list */
2851        log->count++;
2852        list_add(&mp->synclist, &tblk->synclist);
2853        mp->clsn = tblk->clsn;
2854    }
2855    LOGSYNC_UNLOCK(log, flags);
2856    write_metapage(mp);
2857    return (0);
2858}
2859
2860/*
2861 * diExtendFS()
2862 *
2863 * function: update imap for extendfs();
2864 *
2865 * note: AG size has been increased s.t. each k old contiguous AGs are
2866 * coalesced into a new AG;
2867 */
2868int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2869{
2870    int rc, rcx = 0;
2871    struct inomap *imap = JFS_IP(ipimap)->i_imap;
2872    struct iag *iagp = NULL, *hiagp = NULL;
2873    struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap;
2874    struct metapage *bp, *hbp;
2875    int i, n, head;
2876    int numinos, xnuminos = 0, xnumfree = 0;
2877    s64 agstart;
2878
2879    jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d",
2880           imap->im_nextiag, atomic_read(&imap->im_numinos),
2881           atomic_read(&imap->im_numfree));
2882
2883    /*
2884     * reconstruct imap
2885     *
2886     * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2887     * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
2888     * note: new AG size = old AG size * (2**x).
2889     */
2890
2891    /* init per AG control information im_agctl[] */
2892    for (i = 0; i < MAXAG; i++) {
2893        imap->im_agctl[i].inofree = -1;
2894        imap->im_agctl[i].extfree = -1;
2895        imap->im_agctl[i].numinos = 0; /* number of backed inodes */
2896        imap->im_agctl[i].numfree = 0; /* number of free backed inodes */
2897    }
2898
2899    /*
2900     * process each iag page of the map.
2901     *
2902     * rebuild AG Free Inode List, AG Free Inode Extent List;
2903     */
2904    for (i = 0; i < imap->im_nextiag; i++) {
2905        if ((rc = diIAGRead(imap, i, &bp))) {
2906            rcx = rc;
2907            continue;
2908        }
2909        iagp = (struct iag *) bp->data;
2910        if (le32_to_cpu(iagp->iagnum) != i) {
2911            release_metapage(bp);
2912            jfs_error(ipimap->i_sb,
2913                  "diExtendFs: unexpected value of iagnum");
2914            return -EIO;
2915        }
2916
2917        /* leave free iag in the free iag list */
2918        if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2919            release_metapage(bp);
2920            continue;
2921        }
2922
2923        agstart = le64_to_cpu(iagp->agstart);
2924        n = agstart >> mp->db_agl2size;
2925        iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
2926
2927        /* compute backed inodes */
2928        numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
2929            << L2INOSPEREXT;
2930        if (numinos > 0) {
2931            /* merge AG backed inodes */
2932            imap->im_agctl[n].numinos += numinos;
2933            xnuminos += numinos;
2934        }
2935
2936        /* if any backed free inodes, insert at AG free inode list */
2937        if ((int) le32_to_cpu(iagp->nfreeinos) > 0) {
2938            if ((head = imap->im_agctl[n].inofree) == -1) {
2939                iagp->inofreefwd = cpu_to_le32(-1);
2940                iagp->inofreeback = cpu_to_le32(-1);
2941            } else {
2942                if ((rc = diIAGRead(imap, head, &hbp))) {
2943                    rcx = rc;
2944                    goto nextiag;
2945                }
2946                hiagp = (struct iag *) hbp->data;
2947                hiagp->inofreeback = iagp->iagnum;
2948                iagp->inofreefwd = cpu_to_le32(head);
2949                iagp->inofreeback = cpu_to_le32(-1);
2950                write_metapage(hbp);
2951            }
2952
2953            imap->im_agctl[n].inofree =
2954                le32_to_cpu(iagp->iagnum);
2955
2956            /* merge AG backed free inodes */
2957            imap->im_agctl[n].numfree +=
2958                le32_to_cpu(iagp->nfreeinos);
2959            xnumfree += le32_to_cpu(iagp->nfreeinos);
2960        }
2961
2962        /* if any free extents, insert at AG free extent list */
2963        if (le32_to_cpu(iagp->nfreeexts) > 0) {
2964            if ((head = imap->im_agctl[n].extfree) == -1) {
2965                iagp->extfreefwd = cpu_to_le32(-1);
2966                iagp->extfreeback = cpu_to_le32(-1);
2967            } else {
2968                if ((rc = diIAGRead(imap, head, &hbp))) {
2969                    rcx = rc;
2970                    goto nextiag;
2971                }
2972                hiagp = (struct iag *) hbp->data;
2973                hiagp->extfreeback = iagp->iagnum;
2974                iagp->extfreefwd = cpu_to_le32(head);
2975                iagp->extfreeback = cpu_to_le32(-1);
2976                write_metapage(hbp);
2977            }
2978
2979            imap->im_agctl[n].extfree =
2980                le32_to_cpu(iagp->iagnum);
2981        }
2982
2983          nextiag:
2984        write_metapage(bp);
2985    }
2986
2987    if (xnuminos != atomic_read(&imap->im_numinos) ||
2988        xnumfree != atomic_read(&imap->im_numfree)) {
2989        jfs_error(ipimap->i_sb,
2990              "diExtendFs: numinos or numfree incorrect");
2991        return -EIO;
2992    }
2993
2994    return rcx;
2995}
2996
2997
2998/*
2999 * duplicateIXtree()
3000 *
3001 * serialization: IWRITE_LOCK held on entry/exit
3002 *
3003 * note: shadow page with regular inode (rel.2);
3004 */
3005static void duplicateIXtree(struct super_block *sb, s64 blkno,
3006                int xlen, s64 *xaddr)
3007{
3008    struct jfs_superblock *j_sb;
3009    struct buffer_head *bh;
3010    struct inode *ip;
3011    tid_t tid;
3012
3013    /* if AIT2 ipmap2 is bad, do not try to update it */
3014    if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */
3015        return;
3016    ip = diReadSpecial(sb, FILESYSTEM_I, 1);
3017    if (ip == NULL) {
3018        JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3019        if (readSuper(sb, &bh))
3020            return;
3021        j_sb = (struct jfs_superblock *)bh->b_data;
3022        j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
3023
3024        mark_buffer_dirty(bh);
3025        sync_dirty_buffer(bh);
3026        brelse(bh);
3027        return;
3028    }
3029
3030    /* start transaction */
3031    tid = txBegin(sb, COMMIT_FORCE);
3032    /* update the inode map addressing structure to point to it */
3033    if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) {
3034        JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3035        txAbort(tid, 1);
3036        goto cleanup;
3037
3038    }
3039    /* update the inode map's inode to reflect the extension */
3040    ip->i_size += PSIZE;
3041    inode_add_bytes(ip, PSIZE);
3042    txCommit(tid, 1, &ip, COMMIT_FORCE);
3043      cleanup:
3044    txEnd(tid);
3045    diFreeSpecial(ip);
3046}
3047
3048/*
3049 * NAME: copy_from_dinode()
3050 *
3051 * FUNCTION: Copies inode info from disk inode to in-memory inode
3052 *
3053 * RETURN VALUES:
3054 * 0 - success
3055 * -ENOMEM - insufficient memory
3056 */
3057static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3058{
3059    struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3060    struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3061
3062    jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3063    jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3064    jfs_set_inode_flags(ip);
3065
3066    ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
3067    if (sbi->umask != -1) {
3068        ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
3069        /* For directories, add x permission if r is allowed by umask */
3070        if (S_ISDIR(ip->i_mode)) {
3071            if (ip->i_mode & 0400)
3072                ip->i_mode |= 0100;
3073            if (ip->i_mode & 0040)
3074                ip->i_mode |= 0010;
3075            if (ip->i_mode & 0004)
3076                ip->i_mode |= 0001;
3077        }
3078    }
3079    ip->i_nlink = le32_to_cpu(dip->di_nlink);
3080
3081    jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
3082    if (sbi->uid == -1)
3083        ip->i_uid = jfs_ip->saved_uid;
3084    else {
3085        ip->i_uid = sbi->uid;
3086    }
3087
3088    jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
3089    if (sbi->gid == -1)
3090        ip->i_gid = jfs_ip->saved_gid;
3091    else {
3092        ip->i_gid = sbi->gid;
3093    }
3094
3095    ip->i_size = le64_to_cpu(dip->di_size);
3096    ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
3097    ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
3098    ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
3099    ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
3100    ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
3101    ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
3102    ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
3103    ip->i_generation = le32_to_cpu(dip->di_gen);
3104
3105    jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */
3106    jfs_ip->acl = dip->di_acl; /* as are dxd's */
3107    jfs_ip->ea = dip->di_ea;
3108    jfs_ip->next_index = le32_to_cpu(dip->di_next_index);
3109    jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec);
3110    jfs_ip->acltype = le32_to_cpu(dip->di_acltype);
3111
3112    if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) {
3113        jfs_ip->dev = le32_to_cpu(dip->di_rdev);
3114        ip->i_rdev = new_decode_dev(jfs_ip->dev);
3115    }
3116
3117    if (S_ISDIR(ip->i_mode)) {
3118        memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384);
3119    } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) {
3120        memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288);
3121    } else
3122        memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128);
3123
3124    /* Zero the in-memory-only stuff */
3125    jfs_ip->cflag = 0;
3126    jfs_ip->btindex = 0;
3127    jfs_ip->btorder = 0;
3128    jfs_ip->bxflag = 0;
3129    jfs_ip->blid = 0;
3130    jfs_ip->atlhead = 0;
3131    jfs_ip->atltail = 0;
3132    jfs_ip->xtlid = 0;
3133    return (0);
3134}
3135
3136/*
3137 * NAME: copy_to_dinode()
3138 *
3139 * FUNCTION: Copies inode info from in-memory inode to disk inode
3140 */
3141static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3142{
3143    struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3144    struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3145
3146    dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
3147    dip->di_inostamp = cpu_to_le32(sbi->inostamp);
3148    dip->di_number = cpu_to_le32(ip->i_ino);
3149    dip->di_gen = cpu_to_le32(ip->i_generation);
3150    dip->di_size = cpu_to_le64(ip->i_size);
3151    dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3152    dip->di_nlink = cpu_to_le32(ip->i_nlink);
3153    if (sbi->uid == -1)
3154        dip->di_uid = cpu_to_le32(ip->i_uid);
3155    else
3156        dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
3157    if (sbi->gid == -1)
3158        dip->di_gid = cpu_to_le32(ip->i_gid);
3159    else
3160        dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
3161    jfs_get_inode_flags(jfs_ip);
3162    /*
3163     * mode2 is only needed for storing the higher order bits.
3164     * Trust i_mode for the lower order ones
3165     */
3166    if (sbi->umask == -1)
3167        dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
3168                       ip->i_mode);
3169    else /* Leave the original permissions alone */
3170        dip->di_mode = cpu_to_le32(jfs_ip->mode2);
3171
3172    dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
3173    dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
3174    dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
3175    dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
3176    dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
3177    dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
3178    dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */
3179    dip->di_acl = jfs_ip->acl; /* as are dxd's */
3180    dip->di_ea = jfs_ip->ea;
3181    dip->di_next_index = cpu_to_le32(jfs_ip->next_index);
3182    dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime);
3183    dip->di_otime.tv_nsec = 0;
3184    dip->di_acltype = cpu_to_le32(jfs_ip->acltype);
3185    if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
3186        dip->di_rdev = cpu_to_le32(jfs_ip->dev);
3187}
3188

Archive Download this file



interactive