Root/fs/jfs/jfs_logmgr.c

1/*
2 * Copyright (C) International Business Machines Corp., 2000-2004
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20/*
21 * jfs_logmgr.c: log manager
22 *
23 * for related information, see transaction manager (jfs_txnmgr.c), and
24 * recovery manager (jfs_logredo.c).
25 *
26 * note: for detail, RTFS.
27 *
28 * log buffer manager:
29 * special purpose buffer manager supporting log i/o requirements.
30 * per log serial pageout of logpage
31 * queuing i/o requests and redrive i/o at iodone
32 * maintain current logpage buffer
33 * no caching since append only
34 * appropriate jfs buffer cache buffers as needed
35 *
36 * group commit:
37 * transactions which wrote COMMIT records in the same in-memory
38 * log page during the pageout of previous/current log page(s) are
39 * committed together by the pageout of the page.
40 *
41 * TBD lazy commit:
42 * transactions are committed asynchronously when the log page
43 * containing it COMMIT is paged out when it becomes full;
44 *
45 * serialization:
46 * . a per log lock serialize log write.
47 * . a per log lock serialize group commit.
48 * . a per log lock serialize log open/close;
49 *
50 * TBD log integrity:
51 * careful-write (ping-pong) of last logpage to recover from crash
52 * in overwrite.
53 * detection of split (out-of-order) write of physical sectors
54 * of last logpage via timestamp at end of each sector
55 * with its mirror data array at trailer).
56 *
57 * alternatives:
58 * lsn - 64-bit monotonically increasing integer vs
59 * 32-bit lspn and page eor.
60 */
61
62#include <linux/fs.h>
63#include <linux/blkdev.h>
64#include <linux/interrupt.h>
65#include <linux/completion.h>
66#include <linux/kthread.h>
67#include <linux/buffer_head.h> /* for sync_blockdev() */
68#include <linux/bio.h>
69#include <linux/freezer.h>
70#include <linux/delay.h>
71#include <linux/mutex.h>
72#include <linux/seq_file.h>
73#include <linux/slab.h>
74#include "jfs_incore.h"
75#include "jfs_filsys.h"
76#include "jfs_metapage.h"
77#include "jfs_superblock.h"
78#include "jfs_txnmgr.h"
79#include "jfs_debug.h"
80
81
82/*
83 * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread)
84 */
85static struct lbuf *log_redrive_list;
86static DEFINE_SPINLOCK(log_redrive_lock);
87
88
89/*
90 * log read/write serialization (per log)
91 */
92#define LOG_LOCK_INIT(log) mutex_init(&(log)->loglock)
93#define LOG_LOCK(log) mutex_lock(&((log)->loglock))
94#define LOG_UNLOCK(log) mutex_unlock(&((log)->loglock))
95
96
97/*
98 * log group commit serialization (per log)
99 */
100
101#define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock)
102#define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock)
103#define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock)
104#define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait)
105
106/*
107 * log sync serialization (per log)
108 */
109#define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE)
110#define LOGSYNC_BARRIER(logsize) ((logsize)/4)
111/*
112#define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE)
113#define LOGSYNC_BARRIER(logsize) ((logsize)/2)
114*/
115
116
117/*
118 * log buffer cache synchronization
119 */
120static DEFINE_SPINLOCK(jfsLCacheLock);
121
122#define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags)
123#define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags)
124
125/*
126 * See __SLEEP_COND in jfs_locks.h
127 */
128#define LCACHE_SLEEP_COND(wq, cond, flags) \
129do { \
130    if (cond) \
131        break; \
132    __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
133} while (0)
134
135#define LCACHE_WAKEUP(event) wake_up(event)
136
137
138/*
139 * lbuf buffer cache (lCache) control
140 */
141/* log buffer manager pageout control (cumulative, inclusive) */
142#define lbmREAD 0x0001
143#define lbmWRITE 0x0002 /* enqueue at tail of write queue;
144                 * init pageout if at head of queue;
145                 */
146#define lbmRELEASE 0x0004 /* remove from write queue
147                 * at completion of pageout;
148                 * do not free/recycle it yet:
149                 * caller will free it;
150                 */
151#define lbmSYNC 0x0008 /* do not return to freelist
152                 * when removed from write queue;
153                 */
154#define lbmFREE 0x0010 /* return to freelist
155                 * at completion of pageout;
156                 * the buffer may be recycled;
157                 */
158#define lbmDONE 0x0020
159#define lbmERROR 0x0040
160#define lbmGC 0x0080 /* lbmIODone to perform post-GC processing
161                 * of log page
162                 */
163#define lbmDIRECT 0x0100
164
165/*
166 * Global list of active external journals
167 */
168static LIST_HEAD(jfs_external_logs);
169static struct jfs_log *dummy_log = NULL;
170static DEFINE_MUTEX(jfs_log_mutex);
171
172/*
173 * forward references
174 */
175static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
176             struct lrd * lrd, struct tlock * tlck);
177
178static int lmNextPage(struct jfs_log * log);
179static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
180               int activate);
181
182static int open_inline_log(struct super_block *sb);
183static int open_dummy_log(struct super_block *sb);
184static int lbmLogInit(struct jfs_log * log);
185static void lbmLogShutdown(struct jfs_log * log);
186static struct lbuf *lbmAllocate(struct jfs_log * log, int);
187static void lbmFree(struct lbuf * bp);
188static void lbmfree(struct lbuf * bp);
189static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
190static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
191static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
192static int lbmIOWait(struct lbuf * bp, int flag);
193static bio_end_io_t lbmIODone;
194static void lbmStartIO(struct lbuf * bp);
195static void lmGCwrite(struct jfs_log * log, int cant_block);
196static int lmLogSync(struct jfs_log * log, int hard_sync);
197
198
199
200/*
201 * statistics
202 */
203#ifdef CONFIG_JFS_STATISTICS
204static struct lmStat {
205    uint commit; /* # of commit */
206    uint pagedone; /* # of page written */
207    uint submitted; /* # of pages submitted */
208    uint full_page; /* # of full pages submitted */
209    uint partial_page; /* # of partial pages submitted */
210} lmStat;
211#endif
212
213static void write_special_inodes(struct jfs_log *log,
214                 int (*writer)(struct address_space *))
215{
216    struct jfs_sb_info *sbi;
217
218    list_for_each_entry(sbi, &log->sb_list, log_list) {
219        writer(sbi->ipbmap->i_mapping);
220        writer(sbi->ipimap->i_mapping);
221        writer(sbi->direct_inode->i_mapping);
222    }
223}
224
225/*
226 * NAME: lmLog()
227 *
228 * FUNCTION: write a log record;
229 *
230 * PARAMETER:
231 *
232 * RETURN: lsn - offset to the next log record to write (end-of-log);
233 * -1 - error;
234 *
235 * note: todo: log error handler
236 */
237int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
238      struct tlock * tlck)
239{
240    int lsn;
241    int diffp, difft;
242    struct metapage *mp = NULL;
243    unsigned long flags;
244
245    jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
246         log, tblk, lrd, tlck);
247
248    LOG_LOCK(log);
249
250    /* log by (out-of-transaction) JFS ? */
251    if (tblk == NULL)
252        goto writeRecord;
253
254    /* log from page ? */
255    if (tlck == NULL ||
256        tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
257        goto writeRecord;
258
259    /*
260     * initialize/update page/transaction recovery lsn
261     */
262    lsn = log->lsn;
263
264    LOGSYNC_LOCK(log, flags);
265
266    /*
267     * initialize page lsn if first log write of the page
268     */
269    if (mp->lsn == 0) {
270        mp->log = log;
271        mp->lsn = lsn;
272        log->count++;
273
274        /* insert page at tail of logsynclist */
275        list_add_tail(&mp->synclist, &log->synclist);
276    }
277
278    /*
279     * initialize/update lsn of tblock of the page
280     *
281     * transaction inherits oldest lsn of pages associated
282     * with allocation/deallocation of resources (their
283     * log records are used to reconstruct allocation map
284     * at recovery time: inode for inode allocation map,
285     * B+-tree index of extent descriptors for block
286     * allocation map);
287     * allocation map pages inherit transaction lsn at
288     * commit time to allow forwarding log syncpt past log
289     * records associated with allocation/deallocation of
290     * resources only after persistent map of these map pages
291     * have been updated and propagated to home.
292     */
293    /*
294     * initialize transaction lsn:
295     */
296    if (tblk->lsn == 0) {
297        /* inherit lsn of its first page logged */
298        tblk->lsn = mp->lsn;
299        log->count++;
300
301        /* insert tblock after the page on logsynclist */
302        list_add(&tblk->synclist, &mp->synclist);
303    }
304    /*
305     * update transaction lsn:
306     */
307    else {
308        /* inherit oldest/smallest lsn of page */
309        logdiff(diffp, mp->lsn, log);
310        logdiff(difft, tblk->lsn, log);
311        if (diffp < difft) {
312            /* update tblock lsn with page lsn */
313            tblk->lsn = mp->lsn;
314
315            /* move tblock after page on logsynclist */
316            list_move(&tblk->synclist, &mp->synclist);
317        }
318    }
319
320    LOGSYNC_UNLOCK(log, flags);
321
322    /*
323     * write the log record
324     */
325      writeRecord:
326    lsn = lmWriteRecord(log, tblk, lrd, tlck);
327
328    /*
329     * forward log syncpt if log reached next syncpt trigger
330     */
331    logdiff(diffp, lsn, log);
332    if (diffp >= log->nextsync)
333        lsn = lmLogSync(log, 0);
334
335    /* update end-of-log lsn */
336    log->lsn = lsn;
337
338    LOG_UNLOCK(log);
339
340    /* return end-of-log address */
341    return lsn;
342}
343
344/*
345 * NAME: lmWriteRecord()
346 *
347 * FUNCTION: move the log record to current log page
348 *
349 * PARAMETER: cd - commit descriptor
350 *
351 * RETURN: end-of-log address
352 *
353 * serialization: LOG_LOCK() held on entry/exit
354 */
355static int
356lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
357          struct tlock * tlck)
358{
359    int lsn = 0; /* end-of-log address */
360    struct lbuf *bp; /* dst log page buffer */
361    struct logpage *lp; /* dst log page */
362    caddr_t dst; /* destination address in log page */
363    int dstoffset; /* end-of-log offset in log page */
364    int freespace; /* free space in log page */
365    caddr_t p; /* src meta-data page */
366    caddr_t src;
367    int srclen;
368    int nbytes; /* number of bytes to move */
369    int i;
370    int len;
371    struct linelock *linelock;
372    struct lv *lv;
373    struct lvd *lvd;
374    int l2linesize;
375
376    len = 0;
377
378    /* retrieve destination log page to write */
379    bp = (struct lbuf *) log->bp;
380    lp = (struct logpage *) bp->l_ldata;
381    dstoffset = log->eor;
382
383    /* any log data to write ? */
384    if (tlck == NULL)
385        goto moveLrd;
386
387    /*
388     * move log record data
389     */
390    /* retrieve source meta-data page to log */
391    if (tlck->flag & tlckPAGELOCK) {
392        p = (caddr_t) (tlck->mp->data);
393        linelock = (struct linelock *) & tlck->lock;
394    }
395    /* retrieve source in-memory inode to log */
396    else if (tlck->flag & tlckINODELOCK) {
397        if (tlck->type & tlckDTREE)
398            p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
399        else
400            p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
401        linelock = (struct linelock *) & tlck->lock;
402    }
403#ifdef _JFS_WIP
404    else if (tlck->flag & tlckINLINELOCK) {
405
406        inlinelock = (struct inlinelock *) & tlck;
407        p = (caddr_t) & inlinelock->pxd;
408        linelock = (struct linelock *) & tlck;
409    }
410#endif /* _JFS_WIP */
411    else {
412        jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
413        return 0; /* Probably should trap */
414    }
415    l2linesize = linelock->l2linesize;
416
417      moveData:
418    ASSERT(linelock->index <= linelock->maxcnt);
419
420    lv = linelock->lv;
421    for (i = 0; i < linelock->index; i++, lv++) {
422        if (lv->length == 0)
423            continue;
424
425        /* is page full ? */
426        if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
427            /* page become full: move on to next page */
428            lmNextPage(log);
429
430            bp = log->bp;
431            lp = (struct logpage *) bp->l_ldata;
432            dstoffset = LOGPHDRSIZE;
433        }
434
435        /*
436         * move log vector data
437         */
438        src = (u8 *) p + (lv->offset << l2linesize);
439        srclen = lv->length << l2linesize;
440        len += srclen;
441        while (srclen > 0) {
442            freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
443            nbytes = min(freespace, srclen);
444            dst = (caddr_t) lp + dstoffset;
445            memcpy(dst, src, nbytes);
446            dstoffset += nbytes;
447
448            /* is page not full ? */
449            if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
450                break;
451
452            /* page become full: move on to next page */
453            lmNextPage(log);
454
455            bp = (struct lbuf *) log->bp;
456            lp = (struct logpage *) bp->l_ldata;
457            dstoffset = LOGPHDRSIZE;
458
459            srclen -= nbytes;
460            src += nbytes;
461        }
462
463        /*
464         * move log vector descriptor
465         */
466        len += 4;
467        lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
468        lvd->offset = cpu_to_le16(lv->offset);
469        lvd->length = cpu_to_le16(lv->length);
470        dstoffset += 4;
471        jfs_info("lmWriteRecord: lv offset:%d length:%d",
472             lv->offset, lv->length);
473    }
474
475    if ((i = linelock->next)) {
476        linelock = (struct linelock *) lid_to_tlock(i);
477        goto moveData;
478    }
479
480    /*
481     * move log record descriptor
482     */
483      moveLrd:
484    lrd->length = cpu_to_le16(len);
485
486    src = (caddr_t) lrd;
487    srclen = LOGRDSIZE;
488
489    while (srclen > 0) {
490        freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
491        nbytes = min(freespace, srclen);
492        dst = (caddr_t) lp + dstoffset;
493        memcpy(dst, src, nbytes);
494
495        dstoffset += nbytes;
496        srclen -= nbytes;
497
498        /* are there more to move than freespace of page ? */
499        if (srclen)
500            goto pageFull;
501
502        /*
503         * end of log record descriptor
504         */
505
506        /* update last log record eor */
507        log->eor = dstoffset;
508        bp->l_eor = dstoffset;
509        lsn = (log->page << L2LOGPSIZE) + dstoffset;
510
511        if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
512            tblk->clsn = lsn;
513            jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
514                 bp->l_eor);
515
516            INCREMENT(lmStat.commit); /* # of commit */
517
518            /*
519             * enqueue tblock for group commit:
520             *
521             * enqueue tblock of non-trivial/synchronous COMMIT
522             * at tail of group commit queue
523             * (trivial/asynchronous COMMITs are ignored by
524             * group commit.)
525             */
526            LOGGC_LOCK(log);
527
528            /* init tblock gc state */
529            tblk->flag = tblkGC_QUEUE;
530            tblk->bp = log->bp;
531            tblk->pn = log->page;
532            tblk->eor = log->eor;
533
534            /* enqueue transaction to commit queue */
535            list_add_tail(&tblk->cqueue, &log->cqueue);
536
537            LOGGC_UNLOCK(log);
538        }
539
540        jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
541            le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
542
543        /* page not full ? */
544        if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
545            return lsn;
546
547          pageFull:
548        /* page become full: move on to next page */
549        lmNextPage(log);
550
551        bp = (struct lbuf *) log->bp;
552        lp = (struct logpage *) bp->l_ldata;
553        dstoffset = LOGPHDRSIZE;
554        src += nbytes;
555    }
556
557    return lsn;
558}
559
560
561/*
562 * NAME: lmNextPage()
563 *
564 * FUNCTION: write current page and allocate next page.
565 *
566 * PARAMETER: log
567 *
568 * RETURN: 0
569 *
570 * serialization: LOG_LOCK() held on entry/exit
571 */
572static int lmNextPage(struct jfs_log * log)
573{
574    struct logpage *lp;
575    int lspn; /* log sequence page number */
576    int pn; /* current page number */
577    struct lbuf *bp;
578    struct lbuf *nextbp;
579    struct tblock *tblk;
580
581    /* get current log page number and log sequence page number */
582    pn = log->page;
583    bp = log->bp;
584    lp = (struct logpage *) bp->l_ldata;
585    lspn = le32_to_cpu(lp->h.page);
586
587    LOGGC_LOCK(log);
588
589    /*
590     * write or queue the full page at the tail of write queue
591     */
592    /* get the tail tblk on commit queue */
593    if (list_empty(&log->cqueue))
594        tblk = NULL;
595    else
596        tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
597
598    /* every tblk who has COMMIT record on the current page,
599     * and has not been committed, must be on commit queue
600     * since tblk is queued at commit queueu at the time
601     * of writing its COMMIT record on the page before
602     * page becomes full (even though the tblk thread
603     * who wrote COMMIT record may have been suspended
604     * currently);
605     */
606
607    /* is page bound with outstanding tail tblk ? */
608    if (tblk && tblk->pn == pn) {
609        /* mark tblk for end-of-page */
610        tblk->flag |= tblkGC_EOP;
611
612        if (log->cflag & logGC_PAGEOUT) {
613            /* if page is not already on write queue,
614             * just enqueue (no lbmWRITE to prevent redrive)
615             * buffer to wqueue to ensure correct serial order
616             * of the pages since log pages will be added
617             * continuously
618             */
619            if (bp->l_wqnext == NULL)
620                lbmWrite(log, bp, 0, 0);
621        } else {
622            /*
623             * No current GC leader, initiate group commit
624             */
625            log->cflag |= logGC_PAGEOUT;
626            lmGCwrite(log, 0);
627        }
628    }
629    /* page is not bound with outstanding tblk:
630     * init write or mark it to be redriven (lbmWRITE)
631     */
632    else {
633        /* finalize the page */
634        bp->l_ceor = bp->l_eor;
635        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
636        lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
637    }
638    LOGGC_UNLOCK(log);
639
640    /*
641     * allocate/initialize next page
642     */
643    /* if log wraps, the first data page of log is 2
644     * (0 never used, 1 is superblock).
645     */
646    log->page = (pn == log->size - 1) ? 2 : pn + 1;
647    log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
648
649    /* allocate/initialize next log page buffer */
650    nextbp = lbmAllocate(log, log->page);
651    nextbp->l_eor = log->eor;
652    log->bp = nextbp;
653
654    /* initialize next log page */
655    lp = (struct logpage *) nextbp->l_ldata;
656    lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
657    lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
658
659    return 0;
660}
661
662
663/*
664 * NAME: lmGroupCommit()
665 *
666 * FUNCTION: group commit
667 * initiate pageout of the pages with COMMIT in the order of
668 * page number - redrive pageout of the page at the head of
669 * pageout queue until full page has been written.
670 *
671 * RETURN:
672 *
673 * NOTE:
674 * LOGGC_LOCK serializes log group commit queue, and
675 * transaction blocks on the commit queue.
676 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
677 */
678int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
679{
680    int rc = 0;
681
682    LOGGC_LOCK(log);
683
684    /* group committed already ? */
685    if (tblk->flag & tblkGC_COMMITTED) {
686        if (tblk->flag & tblkGC_ERROR)
687            rc = -EIO;
688
689        LOGGC_UNLOCK(log);
690        return rc;
691    }
692    jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
693
694    if (tblk->xflag & COMMIT_LAZY)
695        tblk->flag |= tblkGC_LAZY;
696
697    if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
698        (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
699         || jfs_tlocks_low)) {
700        /*
701         * No pageout in progress
702         *
703         * start group commit as its group leader.
704         */
705        log->cflag |= logGC_PAGEOUT;
706
707        lmGCwrite(log, 0);
708    }
709
710    if (tblk->xflag & COMMIT_LAZY) {
711        /*
712         * Lazy transactions can leave now
713         */
714        LOGGC_UNLOCK(log);
715        return 0;
716    }
717
718    /* lmGCwrite gives up LOGGC_LOCK, check again */
719
720    if (tblk->flag & tblkGC_COMMITTED) {
721        if (tblk->flag & tblkGC_ERROR)
722            rc = -EIO;
723
724        LOGGC_UNLOCK(log);
725        return rc;
726    }
727
728    /* upcount transaction waiting for completion
729     */
730    log->gcrtc++;
731    tblk->flag |= tblkGC_READY;
732
733    __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
734             LOGGC_LOCK(log), LOGGC_UNLOCK(log));
735
736    /* removed from commit queue */
737    if (tblk->flag & tblkGC_ERROR)
738        rc = -EIO;
739
740    LOGGC_UNLOCK(log);
741    return rc;
742}
743
744/*
745 * NAME: lmGCwrite()
746 *
747 * FUNCTION: group commit write
748 * initiate write of log page, building a group of all transactions
749 * with commit records on that page.
750 *
751 * RETURN: None
752 *
753 * NOTE:
754 * LOGGC_LOCK must be held by caller.
755 * N.B. LOG_LOCK is NOT held during lmGroupCommit().
756 */
757static void lmGCwrite(struct jfs_log * log, int cant_write)
758{
759    struct lbuf *bp;
760    struct logpage *lp;
761    int gcpn; /* group commit page number */
762    struct tblock *tblk;
763    struct tblock *xtblk = NULL;
764
765    /*
766     * build the commit group of a log page
767     *
768     * scan commit queue and make a commit group of all
769     * transactions with COMMIT records on the same log page.
770     */
771    /* get the head tblk on the commit queue */
772    gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
773
774    list_for_each_entry(tblk, &log->cqueue, cqueue) {
775        if (tblk->pn != gcpn)
776            break;
777
778        xtblk = tblk;
779
780        /* state transition: (QUEUE, READY) -> COMMIT */
781        tblk->flag |= tblkGC_COMMIT;
782    }
783    tblk = xtblk; /* last tblk of the page */
784
785    /*
786     * pageout to commit transactions on the log page.
787     */
788    bp = (struct lbuf *) tblk->bp;
789    lp = (struct logpage *) bp->l_ldata;
790    /* is page already full ? */
791    if (tblk->flag & tblkGC_EOP) {
792        /* mark page to free at end of group commit of the page */
793        tblk->flag &= ~tblkGC_EOP;
794        tblk->flag |= tblkGC_FREE;
795        bp->l_ceor = bp->l_eor;
796        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
797        lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
798             cant_write);
799        INCREMENT(lmStat.full_page);
800    }
801    /* page is not yet full */
802    else {
803        bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
804        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
805        lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
806        INCREMENT(lmStat.partial_page);
807    }
808}
809
810/*
811 * NAME: lmPostGC()
812 *
813 * FUNCTION: group commit post-processing
814 * Processes transactions after their commit records have been written
815 * to disk, redriving log I/O if necessary.
816 *
817 * RETURN: None
818 *
819 * NOTE:
820 * This routine is called a interrupt time by lbmIODone
821 */
822static void lmPostGC(struct lbuf * bp)
823{
824    unsigned long flags;
825    struct jfs_log *log = bp->l_log;
826    struct logpage *lp;
827    struct tblock *tblk, *temp;
828
829    //LOGGC_LOCK(log);
830    spin_lock_irqsave(&log->gclock, flags);
831    /*
832     * current pageout of group commit completed.
833     *
834     * remove/wakeup transactions from commit queue who were
835     * group committed with the current log page
836     */
837    list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
838        if (!(tblk->flag & tblkGC_COMMIT))
839            break;
840        /* if transaction was marked GC_COMMIT then
841         * it has been shipped in the current pageout
842         * and made it to disk - it is committed.
843         */
844
845        if (bp->l_flag & lbmERROR)
846            tblk->flag |= tblkGC_ERROR;
847
848        /* remove it from the commit queue */
849        list_del(&tblk->cqueue);
850        tblk->flag &= ~tblkGC_QUEUE;
851
852        if (tblk == log->flush_tblk) {
853            /* we can stop flushing the log now */
854            clear_bit(log_FLUSH, &log->flag);
855            log->flush_tblk = NULL;
856        }
857
858        jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
859             tblk->flag);
860
861        if (!(tblk->xflag & COMMIT_FORCE))
862            /*
863             * Hand tblk over to lazy commit thread
864             */
865            txLazyUnlock(tblk);
866        else {
867            /* state transition: COMMIT -> COMMITTED */
868            tblk->flag |= tblkGC_COMMITTED;
869
870            if (tblk->flag & tblkGC_READY)
871                log->gcrtc--;
872
873            LOGGC_WAKEUP(tblk);
874        }
875
876        /* was page full before pageout ?
877         * (and this is the last tblk bound with the page)
878         */
879        if (tblk->flag & tblkGC_FREE)
880            lbmFree(bp);
881        /* did page become full after pageout ?
882         * (and this is the last tblk bound with the page)
883         */
884        else if (tblk->flag & tblkGC_EOP) {
885            /* finalize the page */
886            lp = (struct logpage *) bp->l_ldata;
887            bp->l_ceor = bp->l_eor;
888            lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
889            jfs_info("lmPostGC: calling lbmWrite");
890            lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
891                 1);
892        }
893
894    }
895
896    /* are there any transactions who have entered lnGroupCommit()
897     * (whose COMMITs are after that of the last log page written.
898     * They are waiting for new group commit (above at (SLEEP 1))
899     * or lazy transactions are on a full (queued) log page,
900     * select the latest ready transaction as new group leader and
901     * wake her up to lead her group.
902     */
903    if ((!list_empty(&log->cqueue)) &&
904        ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
905         test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
906        /*
907         * Call lmGCwrite with new group leader
908         */
909        lmGCwrite(log, 1);
910
911    /* no transaction are ready yet (transactions are only just
912     * queued (GC_QUEUE) and not entered for group commit yet).
913     * the first transaction entering group commit
914     * will elect herself as new group leader.
915     */
916    else
917        log->cflag &= ~logGC_PAGEOUT;
918
919    //LOGGC_UNLOCK(log);
920    spin_unlock_irqrestore(&log->gclock, flags);
921    return;
922}
923
924/*
925 * NAME: lmLogSync()
926 *
927 * FUNCTION: write log SYNCPT record for specified log
928 * if new sync address is available
929 * (normally the case if sync() is executed by back-ground
930 * process).
931 * calculate new value of i_nextsync which determines when
932 * this code is called again.
933 *
934 * PARAMETERS: log - log structure
935 * hard_sync - 1 to force all metadata to be written
936 *
937 * RETURN: 0
938 *
939 * serialization: LOG_LOCK() held on entry/exit
940 */
941static int lmLogSync(struct jfs_log * log, int hard_sync)
942{
943    int logsize;
944    int written; /* written since last syncpt */
945    int free; /* free space left available */
946    int delta; /* additional delta to write normally */
947    int more; /* additional write granted */
948    struct lrd lrd;
949    int lsn;
950    struct logsyncblk *lp;
951    unsigned long flags;
952
953    /* push dirty metapages out to disk */
954    if (hard_sync)
955        write_special_inodes(log, filemap_fdatawrite);
956    else
957        write_special_inodes(log, filemap_flush);
958
959    /*
960     * forward syncpt
961     */
962    /* if last sync is same as last syncpt,
963     * invoke sync point forward processing to update sync.
964     */
965
966    if (log->sync == log->syncpt) {
967        LOGSYNC_LOCK(log, flags);
968        if (list_empty(&log->synclist))
969            log->sync = log->lsn;
970        else {
971            lp = list_entry(log->synclist.next,
972                    struct logsyncblk, synclist);
973            log->sync = lp->lsn;
974        }
975        LOGSYNC_UNLOCK(log, flags);
976
977    }
978
979    /* if sync is different from last syncpt,
980     * write a SYNCPT record with syncpt = sync.
981     * reset syncpt = sync
982     */
983    if (log->sync != log->syncpt) {
984        lrd.logtid = 0;
985        lrd.backchain = 0;
986        lrd.type = cpu_to_le16(LOG_SYNCPT);
987        lrd.length = 0;
988        lrd.log.syncpt.sync = cpu_to_le32(log->sync);
989        lsn = lmWriteRecord(log, NULL, &lrd, NULL);
990
991        log->syncpt = log->sync;
992    } else
993        lsn = log->lsn;
994
995    /*
996     * setup next syncpt trigger (SWAG)
997     */
998    logsize = log->logsize;
999
1000    logdiff(written, lsn, log);
1001    free = logsize - written;
1002    delta = LOGSYNC_DELTA(logsize);
1003    more = min(free / 2, delta);
1004    if (more < 2 * LOGPSIZE) {
1005        jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1006        /*
1007         * log wrapping
1008         *
1009         * option 1 - panic ? No.!
1010         * option 2 - shutdown file systems
1011         * associated with log ?
1012         * option 3 - extend log ?
1013         * option 4 - second chance
1014         *
1015         * mark log wrapped, and continue.
1016         * when all active transactions are completed,
1017         * mark log valid for recovery.
1018         * if crashed during invalid state, log state
1019         * implies invalid log, forcing fsck().
1020         */
1021        /* mark log state log wrap in log superblock */
1022        /* log->state = LOGWRAP; */
1023
1024        /* reset sync point computation */
1025        log->syncpt = log->sync = lsn;
1026        log->nextsync = delta;
1027    } else
1028        /* next syncpt trigger = written + more */
1029        log->nextsync = written + more;
1030
1031    /* if number of bytes written from last sync point is more
1032     * than 1/4 of the log size, stop new transactions from
1033     * starting until all current transactions are completed
1034     * by setting syncbarrier flag.
1035     */
1036    if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1037        (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1038        set_bit(log_SYNCBARRIER, &log->flag);
1039        jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1040             log->syncpt);
1041        /*
1042         * We may have to initiate group commit
1043         */
1044        jfs_flush_journal(log, 0);
1045    }
1046
1047    return lsn;
1048}
1049
1050/*
1051 * NAME: jfs_syncpt
1052 *
1053 * FUNCTION: write log SYNCPT record for specified log
1054 *
1055 * PARAMETERS: log - log structure
1056 * hard_sync - set to 1 to force metadata to be written
1057 */
1058void jfs_syncpt(struct jfs_log *log, int hard_sync)
1059{ LOG_LOCK(log);
1060    lmLogSync(log, hard_sync);
1061    LOG_UNLOCK(log);
1062}
1063
1064/*
1065 * NAME: lmLogOpen()
1066 *
1067 * FUNCTION: open the log on first open;
1068 * insert filesystem in the active list of the log.
1069 *
1070 * PARAMETER: ipmnt - file system mount inode
1071 * iplog - log inode (out)
1072 *
1073 * RETURN:
1074 *
1075 * serialization:
1076 */
1077int lmLogOpen(struct super_block *sb)
1078{
1079    int rc;
1080    struct block_device *bdev;
1081    struct jfs_log *log;
1082    struct jfs_sb_info *sbi = JFS_SBI(sb);
1083
1084    if (sbi->flag & JFS_NOINTEGRITY)
1085        return open_dummy_log(sb);
1086
1087    if (sbi->mntflag & JFS_INLINELOG)
1088        return open_inline_log(sb);
1089
1090    mutex_lock(&jfs_log_mutex);
1091    list_for_each_entry(log, &jfs_external_logs, journal_list) {
1092        if (log->bdev->bd_dev == sbi->logdev) {
1093            if (memcmp(log->uuid, sbi->loguuid,
1094                   sizeof(log->uuid))) {
1095                jfs_warn("wrong uuid on JFS journal\n");
1096                mutex_unlock(&jfs_log_mutex);
1097                return -EINVAL;
1098            }
1099            /*
1100             * add file system to log active file system list
1101             */
1102            if ((rc = lmLogFileSystem(log, sbi, 1))) {
1103                mutex_unlock(&jfs_log_mutex);
1104                return rc;
1105            }
1106            goto journal_found;
1107        }
1108    }
1109
1110    if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1111        mutex_unlock(&jfs_log_mutex);
1112        return -ENOMEM;
1113    }
1114    INIT_LIST_HEAD(&log->sb_list);
1115    init_waitqueue_head(&log->syncwait);
1116
1117    /*
1118     * external log as separate logical volume
1119     *
1120     * file systems to log may have n-to-1 relationship;
1121     */
1122
1123    bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1124                 log);
1125    if (IS_ERR(bdev)) {
1126        rc = PTR_ERR(bdev);
1127        goto free;
1128    }
1129
1130    log->bdev = bdev;
1131    memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
1132
1133    /*
1134     * initialize log:
1135     */
1136    if ((rc = lmLogInit(log)))
1137        goto close;
1138
1139    list_add(&log->journal_list, &jfs_external_logs);
1140
1141    /*
1142     * add file system to log active file system list
1143     */
1144    if ((rc = lmLogFileSystem(log, sbi, 1)))
1145        goto shutdown;
1146
1147journal_found:
1148    LOG_LOCK(log);
1149    list_add(&sbi->log_list, &log->sb_list);
1150    sbi->log = log;
1151    LOG_UNLOCK(log);
1152
1153    mutex_unlock(&jfs_log_mutex);
1154    return 0;
1155
1156    /*
1157     * unwind on error
1158     */
1159      shutdown: /* unwind lbmLogInit() */
1160    list_del(&log->journal_list);
1161    lbmLogShutdown(log);
1162
1163      close: /* close external log device */
1164    blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1165
1166      free: /* free log descriptor */
1167    mutex_unlock(&jfs_log_mutex);
1168    kfree(log);
1169
1170    jfs_warn("lmLogOpen: exit(%d)", rc);
1171    return rc;
1172}
1173
1174static int open_inline_log(struct super_block *sb)
1175{
1176    struct jfs_log *log;
1177    int rc;
1178
1179    if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1180        return -ENOMEM;
1181    INIT_LIST_HEAD(&log->sb_list);
1182    init_waitqueue_head(&log->syncwait);
1183
1184    set_bit(log_INLINELOG, &log->flag);
1185    log->bdev = sb->s_bdev;
1186    log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1187    log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1188        (L2LOGPSIZE - sb->s_blocksize_bits);
1189    log->l2bsize = sb->s_blocksize_bits;
1190    ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1191
1192    /*
1193     * initialize log.
1194     */
1195    if ((rc = lmLogInit(log))) {
1196        kfree(log);
1197        jfs_warn("lmLogOpen: exit(%d)", rc);
1198        return rc;
1199    }
1200
1201    list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1202    JFS_SBI(sb)->log = log;
1203
1204    return rc;
1205}
1206
1207static int open_dummy_log(struct super_block *sb)
1208{
1209    int rc;
1210
1211    mutex_lock(&jfs_log_mutex);
1212    if (!dummy_log) {
1213        dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1214        if (!dummy_log) {
1215            mutex_unlock(&jfs_log_mutex);
1216            return -ENOMEM;
1217        }
1218        INIT_LIST_HEAD(&dummy_log->sb_list);
1219        init_waitqueue_head(&dummy_log->syncwait);
1220        dummy_log->no_integrity = 1;
1221        /* Make up some stuff */
1222        dummy_log->base = 0;
1223        dummy_log->size = 1024;
1224        rc = lmLogInit(dummy_log);
1225        if (rc) {
1226            kfree(dummy_log);
1227            dummy_log = NULL;
1228            mutex_unlock(&jfs_log_mutex);
1229            return rc;
1230        }
1231    }
1232
1233    LOG_LOCK(dummy_log);
1234    list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1235    JFS_SBI(sb)->log = dummy_log;
1236    LOG_UNLOCK(dummy_log);
1237    mutex_unlock(&jfs_log_mutex);
1238
1239    return 0;
1240}
1241
1242/*
1243 * NAME: lmLogInit()
1244 *
1245 * FUNCTION: log initialization at first log open.
1246 *
1247 * logredo() (or logformat()) should have been run previously.
1248 * initialize the log from log superblock.
1249 * set the log state in the superblock to LOGMOUNT and
1250 * write SYNCPT log record.
1251 *
1252 * PARAMETER: log - log structure
1253 *
1254 * RETURN: 0 - if ok
1255 * -EINVAL - bad log magic number or superblock dirty
1256 * error returned from logwait()
1257 *
1258 * serialization: single first open thread
1259 */
1260int lmLogInit(struct jfs_log * log)
1261{
1262    int rc = 0;
1263    struct lrd lrd;
1264    struct logsuper *logsuper;
1265    struct lbuf *bpsuper;
1266    struct lbuf *bp;
1267    struct logpage *lp;
1268    int lsn = 0;
1269
1270    jfs_info("lmLogInit: log:0x%p", log);
1271
1272    /* initialize the group commit serialization lock */
1273    LOGGC_LOCK_INIT(log);
1274
1275    /* allocate/initialize the log write serialization lock */
1276    LOG_LOCK_INIT(log);
1277
1278    LOGSYNC_LOCK_INIT(log);
1279
1280    INIT_LIST_HEAD(&log->synclist);
1281
1282    INIT_LIST_HEAD(&log->cqueue);
1283    log->flush_tblk = NULL;
1284
1285    log->count = 0;
1286
1287    /*
1288     * initialize log i/o
1289     */
1290    if ((rc = lbmLogInit(log)))
1291        return rc;
1292
1293    if (!test_bit(log_INLINELOG, &log->flag))
1294        log->l2bsize = L2LOGPSIZE;
1295
1296    /* check for disabled journaling to disk */
1297    if (log->no_integrity) {
1298        /*
1299         * Journal pages will still be filled. When the time comes
1300         * to actually do the I/O, the write is not done, and the
1301         * endio routine is called directly.
1302         */
1303        bp = lbmAllocate(log , 0);
1304        log->bp = bp;
1305        bp->l_pn = bp->l_eor = 0;
1306    } else {
1307        /*
1308         * validate log superblock
1309         */
1310        if ((rc = lbmRead(log, 1, &bpsuper)))
1311            goto errout10;
1312
1313        logsuper = (struct logsuper *) bpsuper->l_ldata;
1314
1315        if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1316            jfs_warn("*** Log Format Error ! ***");
1317            rc = -EINVAL;
1318            goto errout20;
1319        }
1320
1321        /* logredo() should have been run successfully. */
1322        if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1323            jfs_warn("*** Log Is Dirty ! ***");
1324            rc = -EINVAL;
1325            goto errout20;
1326        }
1327
1328        /* initialize log from log superblock */
1329        if (test_bit(log_INLINELOG,&log->flag)) {
1330            if (log->size != le32_to_cpu(logsuper->size)) {
1331                rc = -EINVAL;
1332                goto errout20;
1333            }
1334            jfs_info("lmLogInit: inline log:0x%p base:0x%Lx "
1335                 "size:0x%x", log,
1336                 (unsigned long long) log->base, log->size);
1337        } else {
1338            if (memcmp(logsuper->uuid, log->uuid, 16)) {
1339                jfs_warn("wrong uuid on JFS log device");
1340                goto errout20;
1341            }
1342            log->size = le32_to_cpu(logsuper->size);
1343            log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1344            jfs_info("lmLogInit: external log:0x%p base:0x%Lx "
1345                 "size:0x%x", log,
1346                 (unsigned long long) log->base, log->size);
1347        }
1348
1349        log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1350        log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1351
1352        /*
1353         * initialize for log append write mode
1354         */
1355        /* establish current/end-of-log page/buffer */
1356        if ((rc = lbmRead(log, log->page, &bp)))
1357            goto errout20;
1358
1359        lp = (struct logpage *) bp->l_ldata;
1360
1361        jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1362             le32_to_cpu(logsuper->end), log->page, log->eor,
1363             le16_to_cpu(lp->h.eor));
1364
1365        log->bp = bp;
1366        bp->l_pn = log->page;
1367        bp->l_eor = log->eor;
1368
1369        /* if current page is full, move on to next page */
1370        if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1371            lmNextPage(log);
1372
1373        /*
1374         * initialize log syncpoint
1375         */
1376        /*
1377         * write the first SYNCPT record with syncpoint = 0
1378         * (i.e., log redo up to HERE !);
1379         * remove current page from lbm write queue at end of pageout
1380         * (to write log superblock update), but do not release to
1381         * freelist;
1382         */
1383        lrd.logtid = 0;
1384        lrd.backchain = 0;
1385        lrd.type = cpu_to_le16(LOG_SYNCPT);
1386        lrd.length = 0;
1387        lrd.log.syncpt.sync = 0;
1388        lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1389        bp = log->bp;
1390        bp->l_ceor = bp->l_eor;
1391        lp = (struct logpage *) bp->l_ldata;
1392        lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1393        lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1394        if ((rc = lbmIOWait(bp, 0)))
1395            goto errout30;
1396
1397        /*
1398         * update/write superblock
1399         */
1400        logsuper->state = cpu_to_le32(LOGMOUNT);
1401        log->serial = le32_to_cpu(logsuper->serial) + 1;
1402        logsuper->serial = cpu_to_le32(log->serial);
1403        lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1404        if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1405            goto errout30;
1406    }
1407
1408    /* initialize logsync parameters */
1409    log->logsize = (log->size - 2) << L2LOGPSIZE;
1410    log->lsn = lsn;
1411    log->syncpt = lsn;
1412    log->sync = log->syncpt;
1413    log->nextsync = LOGSYNC_DELTA(log->logsize);
1414
1415    jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1416         log->lsn, log->syncpt, log->sync);
1417
1418    /*
1419     * initialize for lazy/group commit
1420     */
1421    log->clsn = lsn;
1422
1423    return 0;
1424
1425    /*
1426     * unwind on error
1427     */
1428      errout30: /* release log page */
1429    log->wqueue = NULL;
1430    bp->l_wqnext = NULL;
1431    lbmFree(bp);
1432
1433      errout20: /* release log superblock */
1434    lbmFree(bpsuper);
1435
1436      errout10: /* unwind lbmLogInit() */
1437    lbmLogShutdown(log);
1438
1439    jfs_warn("lmLogInit: exit(%d)", rc);
1440    return rc;
1441}
1442
1443
1444/*
1445 * NAME: lmLogClose()
1446 *
1447 * FUNCTION: remove file system <ipmnt> from active list of log <iplog>
1448 * and close it on last close.
1449 *
1450 * PARAMETER: sb - superblock
1451 *
1452 * RETURN: errors from subroutines
1453 *
1454 * serialization:
1455 */
1456int lmLogClose(struct super_block *sb)
1457{
1458    struct jfs_sb_info *sbi = JFS_SBI(sb);
1459    struct jfs_log *log = sbi->log;
1460    struct block_device *bdev;
1461    int rc = 0;
1462
1463    jfs_info("lmLogClose: log:0x%p", log);
1464
1465    mutex_lock(&jfs_log_mutex);
1466    LOG_LOCK(log);
1467    list_del(&sbi->log_list);
1468    LOG_UNLOCK(log);
1469    sbi->log = NULL;
1470
1471    /*
1472     * We need to make sure all of the "written" metapages
1473     * actually make it to disk
1474     */
1475    sync_blockdev(sb->s_bdev);
1476
1477    if (test_bit(log_INLINELOG, &log->flag)) {
1478        /*
1479         * in-line log in host file system
1480         */
1481        rc = lmLogShutdown(log);
1482        kfree(log);
1483        goto out;
1484    }
1485
1486    if (!log->no_integrity)
1487        lmLogFileSystem(log, sbi, 0);
1488
1489    if (!list_empty(&log->sb_list))
1490        goto out;
1491
1492    /*
1493     * TODO: ensure that the dummy_log is in a state to allow
1494     * lbmLogShutdown to deallocate all the buffers and call
1495     * kfree against dummy_log. For now, leave dummy_log & its
1496     * buffers in memory, and resuse if another no-integrity mount
1497     * is requested.
1498     */
1499    if (log->no_integrity)
1500        goto out;
1501
1502    /*
1503     * external log as separate logical volume
1504     */
1505    list_del(&log->journal_list);
1506    bdev = log->bdev;
1507    rc = lmLogShutdown(log);
1508
1509    blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1510
1511    kfree(log);
1512
1513      out:
1514    mutex_unlock(&jfs_log_mutex);
1515    jfs_info("lmLogClose: exit(%d)", rc);
1516    return rc;
1517}
1518
1519
1520/*
1521 * NAME: jfs_flush_journal()
1522 *
1523 * FUNCTION: initiate write of any outstanding transactions to the journal
1524 * and optionally wait until they are all written to disk
1525 *
1526 * wait == 0 flush until latest txn is committed, don't wait
1527 * wait == 1 flush until latest txn is committed, wait
1528 * wait > 1 flush until all txn's are complete, wait
1529 */
1530void jfs_flush_journal(struct jfs_log *log, int wait)
1531{
1532    int i;
1533    struct tblock *target = NULL;
1534
1535    /* jfs_write_inode may call us during read-only mount */
1536    if (!log)
1537        return;
1538
1539    jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1540
1541    LOGGC_LOCK(log);
1542
1543    if (!list_empty(&log->cqueue)) {
1544        /*
1545         * This ensures that we will keep writing to the journal as long
1546         * as there are unwritten commit records
1547         */
1548        target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1549
1550        if (test_bit(log_FLUSH, &log->flag)) {
1551            /*
1552             * We're already flushing.
1553             * if flush_tblk is NULL, we are flushing everything,
1554             * so leave it that way. Otherwise, update it to the
1555             * latest transaction
1556             */
1557            if (log->flush_tblk)
1558                log->flush_tblk = target;
1559        } else {
1560            /* Only flush until latest transaction is committed */
1561            log->flush_tblk = target;
1562            set_bit(log_FLUSH, &log->flag);
1563
1564            /*
1565             * Initiate I/O on outstanding transactions
1566             */
1567            if (!(log->cflag & logGC_PAGEOUT)) {
1568                log->cflag |= logGC_PAGEOUT;
1569                lmGCwrite(log, 0);
1570            }
1571        }
1572    }
1573    if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1574        /* Flush until all activity complete */
1575        set_bit(log_FLUSH, &log->flag);
1576        log->flush_tblk = NULL;
1577    }
1578
1579    if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1580        DECLARE_WAITQUEUE(__wait, current);
1581
1582        add_wait_queue(&target->gcwait, &__wait);
1583        set_current_state(TASK_UNINTERRUPTIBLE);
1584        LOGGC_UNLOCK(log);
1585        schedule();
1586        __set_current_state(TASK_RUNNING);
1587        LOGGC_LOCK(log);
1588        remove_wait_queue(&target->gcwait, &__wait);
1589    }
1590    LOGGC_UNLOCK(log);
1591
1592    if (wait < 2)
1593        return;
1594
1595    write_special_inodes(log, filemap_fdatawrite);
1596
1597    /*
1598     * If there was recent activity, we may need to wait
1599     * for the lazycommit thread to catch up
1600     */
1601    if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1602        for (i = 0; i < 200; i++) { /* Too much? */
1603            msleep(250);
1604            write_special_inodes(log, filemap_fdatawrite);
1605            if (list_empty(&log->cqueue) &&
1606                list_empty(&log->synclist))
1607                break;
1608        }
1609    }
1610    assert(list_empty(&log->cqueue));
1611
1612#ifdef CONFIG_JFS_DEBUG
1613    if (!list_empty(&log->synclist)) {
1614        struct logsyncblk *lp;
1615
1616        printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1617        list_for_each_entry(lp, &log->synclist, synclist) {
1618            if (lp->xflag & COMMIT_PAGE) {
1619                struct metapage *mp = (struct metapage *)lp;
1620                print_hex_dump(KERN_ERR, "metapage: ",
1621                           DUMP_PREFIX_ADDRESS, 16, 4,
1622                           mp, sizeof(struct metapage), 0);
1623                print_hex_dump(KERN_ERR, "page: ",
1624                           DUMP_PREFIX_ADDRESS, 16,
1625                           sizeof(long), mp->page,
1626                           sizeof(struct page), 0);
1627            } else
1628                print_hex_dump(KERN_ERR, "tblock:",
1629                           DUMP_PREFIX_ADDRESS, 16, 4,
1630                           lp, sizeof(struct tblock), 0);
1631        }
1632    }
1633#else
1634    WARN_ON(!list_empty(&log->synclist));
1635#endif
1636    clear_bit(log_FLUSH, &log->flag);
1637}
1638
1639/*
1640 * NAME: lmLogShutdown()
1641 *
1642 * FUNCTION: log shutdown at last LogClose().
1643 *
1644 * write log syncpt record.
1645 * update super block to set redone flag to 0.
1646 *
1647 * PARAMETER: log - log inode
1648 *
1649 * RETURN: 0 - success
1650 *
1651 * serialization: single last close thread
1652 */
1653int lmLogShutdown(struct jfs_log * log)
1654{
1655    int rc;
1656    struct lrd lrd;
1657    int lsn;
1658    struct logsuper *logsuper;
1659    struct lbuf *bpsuper;
1660    struct lbuf *bp;
1661    struct logpage *lp;
1662
1663    jfs_info("lmLogShutdown: log:0x%p", log);
1664
1665    jfs_flush_journal(log, 2);
1666
1667    /*
1668     * write the last SYNCPT record with syncpoint = 0
1669     * (i.e., log redo up to HERE !)
1670     */
1671    lrd.logtid = 0;
1672    lrd.backchain = 0;
1673    lrd.type = cpu_to_le16(LOG_SYNCPT);
1674    lrd.length = 0;
1675    lrd.log.syncpt.sync = 0;
1676
1677    lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1678    bp = log->bp;
1679    lp = (struct logpage *) bp->l_ldata;
1680    lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1681    lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1682    lbmIOWait(log->bp, lbmFREE);
1683    log->bp = NULL;
1684
1685    /*
1686     * synchronous update log superblock
1687     * mark log state as shutdown cleanly
1688     * (i.e., Log does not need to be replayed).
1689     */
1690    if ((rc = lbmRead(log, 1, &bpsuper)))
1691        goto out;
1692
1693    logsuper = (struct logsuper *) bpsuper->l_ldata;
1694    logsuper->state = cpu_to_le32(LOGREDONE);
1695    logsuper->end = cpu_to_le32(lsn);
1696    lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1697    rc = lbmIOWait(bpsuper, lbmFREE);
1698
1699    jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1700         lsn, log->page, log->eor);
1701
1702      out:
1703    /*
1704     * shutdown per log i/o
1705     */
1706    lbmLogShutdown(log);
1707
1708    if (rc) {
1709        jfs_warn("lmLogShutdown: exit(%d)", rc);
1710    }
1711    return rc;
1712}
1713
1714
1715/*
1716 * NAME: lmLogFileSystem()
1717 *
1718 * FUNCTION: insert (<activate> = true)/remove (<activate> = false)
1719 * file system into/from log active file system list.
1720 *
1721 * PARAMETE: log - pointer to logs inode.
1722 * fsdev - kdev_t of filesystem.
1723 * serial - pointer to returned log serial number
1724 * activate - insert/remove device from active list.
1725 *
1726 * RETURN: 0 - success
1727 * errors returned by vms_iowait().
1728 */
1729static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1730               int activate)
1731{
1732    int rc = 0;
1733    int i;
1734    struct logsuper *logsuper;
1735    struct lbuf *bpsuper;
1736    char *uuid = sbi->uuid;
1737
1738    /*
1739     * insert/remove file system device to log active file system list.
1740     */
1741    if ((rc = lbmRead(log, 1, &bpsuper)))
1742        return rc;
1743
1744    logsuper = (struct logsuper *) bpsuper->l_ldata;
1745    if (activate) {
1746        for (i = 0; i < MAX_ACTIVE; i++)
1747            if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) {
1748                memcpy(logsuper->active[i].uuid, uuid, 16);
1749                sbi->aggregate = i;
1750                break;
1751            }
1752        if (i == MAX_ACTIVE) {
1753            jfs_warn("Too many file systems sharing journal!");
1754            lbmFree(bpsuper);
1755            return -EMFILE; /* Is there a better rc? */
1756        }
1757    } else {
1758        for (i = 0; i < MAX_ACTIVE; i++)
1759            if (!memcmp(logsuper->active[i].uuid, uuid, 16)) {
1760                memcpy(logsuper->active[i].uuid, NULL_UUID, 16);
1761                break;
1762            }
1763        if (i == MAX_ACTIVE) {
1764            jfs_warn("Somebody stomped on the journal!");
1765            lbmFree(bpsuper);
1766            return -EIO;
1767        }
1768
1769    }
1770
1771    /*
1772     * synchronous write log superblock:
1773     *
1774     * write sidestream bypassing write queue:
1775     * at file system mount, log super block is updated for
1776     * activation of the file system before any log record
1777     * (MOUNT record) of the file system, and at file system
1778     * unmount, all meta data for the file system has been
1779     * flushed before log super block is updated for deactivation
1780     * of the file system.
1781     */
1782    lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1783    rc = lbmIOWait(bpsuper, lbmFREE);
1784
1785    return rc;
1786}
1787
1788/*
1789 * log buffer manager (lbm)
1790 * ------------------------
1791 *
1792 * special purpose buffer manager supporting log i/o requirements.
1793 *
1794 * per log write queue:
1795 * log pageout occurs in serial order by fifo write queue and
1796 * restricting to a single i/o in pregress at any one time.
1797 * a circular singly-linked list
1798 * (log->wrqueue points to the tail, and buffers are linked via
1799 * bp->wrqueue field), and
1800 * maintains log page in pageout ot waiting for pageout in serial pageout.
1801 */
1802
1803/*
1804 * lbmLogInit()
1805 *
1806 * initialize per log I/O setup at lmLogInit()
1807 */
1808static int lbmLogInit(struct jfs_log * log)
1809{ /* log inode */
1810    int i;
1811    struct lbuf *lbuf;
1812
1813    jfs_info("lbmLogInit: log:0x%p", log);
1814
1815    /* initialize current buffer cursor */
1816    log->bp = NULL;
1817
1818    /* initialize log device write queue */
1819    log->wqueue = NULL;
1820
1821    /*
1822     * Each log has its own buffer pages allocated to it. These are
1823     * not managed by the page cache. This ensures that a transaction
1824     * writing to the log does not block trying to allocate a page from
1825     * the page cache (for the log). This would be bad, since page
1826     * allocation waits on the kswapd thread that may be committing inodes
1827     * which would cause log activity. Was that clear? I'm trying to
1828     * avoid deadlock here.
1829     */
1830    init_waitqueue_head(&log->free_wait);
1831
1832    log->lbuf_free = NULL;
1833
1834    for (i = 0; i < LOGPAGES;) {
1835        char *buffer;
1836        uint offset;
1837        struct page *page;
1838
1839        buffer = (char *) get_zeroed_page(GFP_KERNEL);
1840        if (buffer == NULL)
1841            goto error;
1842        page = virt_to_page(buffer);
1843        for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1844            lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1845            if (lbuf == NULL) {
1846                if (offset == 0)
1847                    free_page((unsigned long) buffer);
1848                goto error;
1849            }
1850            if (offset) /* we already have one reference */
1851                get_page(page);
1852            lbuf->l_offset = offset;
1853            lbuf->l_ldata = buffer + offset;
1854            lbuf->l_page = page;
1855            lbuf->l_log = log;
1856            init_waitqueue_head(&lbuf->l_ioevent);
1857
1858            lbuf->l_freelist = log->lbuf_free;
1859            log->lbuf_free = lbuf;
1860            i++;
1861        }
1862    }
1863
1864    return (0);
1865
1866      error:
1867    lbmLogShutdown(log);
1868    return -ENOMEM;
1869}
1870
1871
1872/*
1873 * lbmLogShutdown()
1874 *
1875 * finalize per log I/O setup at lmLogShutdown()
1876 */
1877static void lbmLogShutdown(struct jfs_log * log)
1878{
1879    struct lbuf *lbuf;
1880
1881    jfs_info("lbmLogShutdown: log:0x%p", log);
1882
1883    lbuf = log->lbuf_free;
1884    while (lbuf) {
1885        struct lbuf *next = lbuf->l_freelist;
1886        __free_page(lbuf->l_page);
1887        kfree(lbuf);
1888        lbuf = next;
1889    }
1890}
1891
1892
1893/*
1894 * lbmAllocate()
1895 *
1896 * allocate an empty log buffer
1897 */
1898static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1899{
1900    struct lbuf *bp;
1901    unsigned long flags;
1902
1903    /*
1904     * recycle from log buffer freelist if any
1905     */
1906    LCACHE_LOCK(flags);
1907    LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1908    log->lbuf_free = bp->l_freelist;
1909    LCACHE_UNLOCK(flags);
1910
1911    bp->l_flag = 0;
1912
1913    bp->l_wqnext = NULL;
1914    bp->l_freelist = NULL;
1915
1916    bp->l_pn = pn;
1917    bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1918    bp->l_ceor = 0;
1919
1920    return bp;
1921}
1922
1923
1924/*
1925 * lbmFree()
1926 *
1927 * release a log buffer to freelist
1928 */
1929static void lbmFree(struct lbuf * bp)
1930{
1931    unsigned long flags;
1932
1933    LCACHE_LOCK(flags);
1934
1935    lbmfree(bp);
1936
1937    LCACHE_UNLOCK(flags);
1938}
1939
1940static void lbmfree(struct lbuf * bp)
1941{
1942    struct jfs_log *log = bp->l_log;
1943
1944    assert(bp->l_wqnext == NULL);
1945
1946    /*
1947     * return the buffer to head of freelist
1948     */
1949    bp->l_freelist = log->lbuf_free;
1950    log->lbuf_free = bp;
1951
1952    wake_up(&log->free_wait);
1953    return;
1954}
1955
1956
1957/*
1958 * NAME: lbmRedrive
1959 *
1960 * FUNCTION: add a log buffer to the log redrive list
1961 *
1962 * PARAMETER:
1963 * bp - log buffer
1964 *
1965 * NOTES:
1966 * Takes log_redrive_lock.
1967 */
1968static inline void lbmRedrive(struct lbuf *bp)
1969{
1970    unsigned long flags;
1971
1972    spin_lock_irqsave(&log_redrive_lock, flags);
1973    bp->l_redrive_next = log_redrive_list;
1974    log_redrive_list = bp;
1975    spin_unlock_irqrestore(&log_redrive_lock, flags);
1976
1977    wake_up_process(jfsIOthread);
1978}
1979
1980
1981/*
1982 * lbmRead()
1983 */
1984static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1985{
1986    struct bio *bio;
1987    struct lbuf *bp;
1988
1989    /*
1990     * allocate a log buffer
1991     */
1992    *bpp = bp = lbmAllocate(log, pn);
1993    jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1994
1995    bp->l_flag |= lbmREAD;
1996
1997    bio = bio_alloc(GFP_NOFS, 1);
1998
1999    bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
2000    bio->bi_bdev = log->bdev;
2001    bio->bi_io_vec[0].bv_page = bp->l_page;
2002    bio->bi_io_vec[0].bv_len = LOGPSIZE;
2003    bio->bi_io_vec[0].bv_offset = bp->l_offset;
2004
2005    bio->bi_vcnt = 1;
2006    bio->bi_idx = 0;
2007    bio->bi_size = LOGPSIZE;
2008
2009    bio->bi_end_io = lbmIODone;
2010    bio->bi_private = bp;
2011    submit_bio(READ_SYNC, bio);
2012
2013    wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2014
2015    return 0;
2016}
2017
2018
2019/*
2020 * lbmWrite()
2021 *
2022 * buffer at head of pageout queue stays after completion of
2023 * partial-page pageout and redriven by explicit initiation of
2024 * pageout by caller until full-page pageout is completed and
2025 * released.
2026 *
2027 * device driver i/o done redrives pageout of new buffer at
2028 * head of pageout queue when current buffer at head of pageout
2029 * queue is released at the completion of its full-page pageout.
2030 *
2031 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2032 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2033 */
2034static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2035             int cant_block)
2036{
2037    struct lbuf *tail;
2038    unsigned long flags;
2039
2040    jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2041
2042    /* map the logical block address to physical block address */
2043    bp->l_blkno =
2044        log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2045
2046    LCACHE_LOCK(flags); /* disable+lock */
2047
2048    /*
2049     * initialize buffer for device driver
2050     */
2051    bp->l_flag = flag;
2052
2053    /*
2054     * insert bp at tail of write queue associated with log
2055     *
2056     * (request is either for bp already/currently at head of queue
2057     * or new bp to be inserted at tail)
2058     */
2059    tail = log->wqueue;
2060
2061    /* is buffer not already on write queue ? */
2062    if (bp->l_wqnext == NULL) {
2063        /* insert at tail of wqueue */
2064        if (tail == NULL) {
2065            log->wqueue = bp;
2066            bp->l_wqnext = bp;
2067        } else {
2068            log->wqueue = bp;
2069            bp->l_wqnext = tail->l_wqnext;
2070            tail->l_wqnext = bp;
2071        }
2072
2073        tail = bp;
2074    }
2075
2076    /* is buffer at head of wqueue and for write ? */
2077    if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2078        LCACHE_UNLOCK(flags); /* unlock+enable */
2079        return;
2080    }
2081
2082    LCACHE_UNLOCK(flags); /* unlock+enable */
2083
2084    if (cant_block)
2085        lbmRedrive(bp);
2086    else if (flag & lbmSYNC)
2087        lbmStartIO(bp);
2088    else {
2089        LOGGC_UNLOCK(log);
2090        lbmStartIO(bp);
2091        LOGGC_LOCK(log);
2092    }
2093}
2094
2095
2096/*
2097 * lbmDirectWrite()
2098 *
2099 * initiate pageout bypassing write queue for sidestream
2100 * (e.g., log superblock) write;
2101 */
2102static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2103{
2104    jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2105         bp, flag, bp->l_pn);
2106
2107    /*
2108     * initialize buffer for device driver
2109     */
2110    bp->l_flag = flag | lbmDIRECT;
2111
2112    /* map the logical block address to physical block address */
2113    bp->l_blkno =
2114        log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2115
2116    /*
2117     * initiate pageout of the page
2118     */
2119    lbmStartIO(bp);
2120}
2121
2122
2123/*
2124 * NAME: lbmStartIO()
2125 *
2126 * FUNCTION: Interface to DD strategy routine
2127 *
2128 * RETURN: none
2129 *
2130 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2131 */
2132static void lbmStartIO(struct lbuf * bp)
2133{
2134    struct bio *bio;
2135    struct jfs_log *log = bp->l_log;
2136
2137    jfs_info("lbmStartIO\n");
2138
2139    bio = bio_alloc(GFP_NOFS, 1);
2140    bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
2141    bio->bi_bdev = log->bdev;
2142    bio->bi_io_vec[0].bv_page = bp->l_page;
2143    bio->bi_io_vec[0].bv_len = LOGPSIZE;
2144    bio->bi_io_vec[0].bv_offset = bp->l_offset;
2145
2146    bio->bi_vcnt = 1;
2147    bio->bi_idx = 0;
2148    bio->bi_size = LOGPSIZE;
2149
2150    bio->bi_end_io = lbmIODone;
2151    bio->bi_private = bp;
2152
2153    /* check if journaling to disk has been disabled */
2154    if (log->no_integrity) {
2155        bio->bi_size = 0;
2156        lbmIODone(bio, 0);
2157    } else {
2158        submit_bio(WRITE_SYNC, bio);
2159        INCREMENT(lmStat.submitted);
2160    }
2161}
2162
2163
2164/*
2165 * lbmIOWait()
2166 */
2167static int lbmIOWait(struct lbuf * bp, int flag)
2168{
2169    unsigned long flags;
2170    int rc = 0;
2171
2172    jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2173
2174    LCACHE_LOCK(flags); /* disable+lock */
2175
2176    LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2177
2178    rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2179
2180    if (flag & lbmFREE)
2181        lbmfree(bp);
2182
2183    LCACHE_UNLOCK(flags); /* unlock+enable */
2184
2185    jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2186    return rc;
2187}
2188
2189/*
2190 * lbmIODone()
2191 *
2192 * executed at INTIODONE level
2193 */
2194static void lbmIODone(struct bio *bio, int error)
2195{
2196    struct lbuf *bp = bio->bi_private;
2197    struct lbuf *nextbp, *tail;
2198    struct jfs_log *log;
2199    unsigned long flags;
2200
2201    /*
2202     * get back jfs buffer bound to the i/o buffer
2203     */
2204    jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2205
2206    LCACHE_LOCK(flags); /* disable+lock */
2207
2208    bp->l_flag |= lbmDONE;
2209
2210    if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
2211        bp->l_flag |= lbmERROR;
2212
2213        jfs_err("lbmIODone: I/O error in JFS log");
2214    }
2215
2216    bio_put(bio);
2217
2218    /*
2219     * pagein completion
2220     */
2221    if (bp->l_flag & lbmREAD) {
2222        bp->l_flag &= ~lbmREAD;
2223
2224        LCACHE_UNLOCK(flags); /* unlock+enable */
2225
2226        /* wakeup I/O initiator */
2227        LCACHE_WAKEUP(&bp->l_ioevent);
2228
2229        return;
2230    }
2231
2232    /*
2233     * pageout completion
2234     *
2235     * the bp at the head of write queue has completed pageout.
2236     *
2237     * if single-commit/full-page pageout, remove the current buffer
2238     * from head of pageout queue, and redrive pageout with
2239     * the new buffer at head of pageout queue;
2240     * otherwise, the partial-page pageout buffer stays at
2241     * the head of pageout queue to be redriven for pageout
2242     * by lmGroupCommit() until full-page pageout is completed.
2243     */
2244    bp->l_flag &= ~lbmWRITE;
2245    INCREMENT(lmStat.pagedone);
2246
2247    /* update committed lsn */
2248    log = bp->l_log;
2249    log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2250
2251    if (bp->l_flag & lbmDIRECT) {
2252        LCACHE_WAKEUP(&bp->l_ioevent);
2253        LCACHE_UNLOCK(flags);
2254        return;
2255    }
2256
2257    tail = log->wqueue;
2258
2259    /* single element queue */
2260    if (bp == tail) {
2261        /* remove head buffer of full-page pageout
2262         * from log device write queue
2263         */
2264        if (bp->l_flag & lbmRELEASE) {
2265            log->wqueue = NULL;
2266            bp->l_wqnext = NULL;
2267        }
2268    }
2269    /* multi element queue */
2270    else {
2271        /* remove head buffer of full-page pageout
2272         * from log device write queue
2273         */
2274        if (bp->l_flag & lbmRELEASE) {
2275            nextbp = tail->l_wqnext = bp->l_wqnext;
2276            bp->l_wqnext = NULL;
2277
2278            /*
2279             * redrive pageout of next page at head of write queue:
2280             * redrive next page without any bound tblk
2281             * (i.e., page w/o any COMMIT records), or
2282             * first page of new group commit which has been
2283             * queued after current page (subsequent pageout
2284             * is performed synchronously, except page without
2285             * any COMMITs) by lmGroupCommit() as indicated
2286             * by lbmWRITE flag;
2287             */
2288            if (nextbp->l_flag & lbmWRITE) {
2289                /*
2290                 * We can't do the I/O at interrupt time.
2291                 * The jfsIO thread can do it
2292                 */
2293                lbmRedrive(nextbp);
2294            }
2295        }
2296    }
2297
2298    /*
2299     * synchronous pageout:
2300     *
2301     * buffer has not necessarily been removed from write queue
2302     * (e.g., synchronous write of partial-page with COMMIT):
2303     * leave buffer for i/o initiator to dispose
2304     */
2305    if (bp->l_flag & lbmSYNC) {
2306        LCACHE_UNLOCK(flags); /* unlock+enable */
2307
2308        /* wakeup I/O initiator */
2309        LCACHE_WAKEUP(&bp->l_ioevent);
2310    }
2311
2312    /*
2313     * Group Commit pageout:
2314     */
2315    else if (bp->l_flag & lbmGC) {
2316        LCACHE_UNLOCK(flags);
2317        lmPostGC(bp);
2318    }
2319
2320    /*
2321     * asynchronous pageout:
2322     *
2323     * buffer must have been removed from write queue:
2324     * insert buffer at head of freelist where it can be recycled
2325     */
2326    else {
2327        assert(bp->l_flag & lbmRELEASE);
2328        assert(bp->l_flag & lbmFREE);
2329        lbmfree(bp);
2330
2331        LCACHE_UNLOCK(flags); /* unlock+enable */
2332    }
2333}
2334
2335int jfsIOWait(void *arg)
2336{
2337    struct lbuf *bp;
2338
2339    do {
2340        spin_lock_irq(&log_redrive_lock);
2341        while ((bp = log_redrive_list)) {
2342            log_redrive_list = bp->l_redrive_next;
2343            bp->l_redrive_next = NULL;
2344            spin_unlock_irq(&log_redrive_lock);
2345            lbmStartIO(bp);
2346            spin_lock_irq(&log_redrive_lock);
2347        }
2348
2349        if (freezing(current)) {
2350            spin_unlock_irq(&log_redrive_lock);
2351            refrigerator();
2352        } else {
2353            set_current_state(TASK_INTERRUPTIBLE);
2354            spin_unlock_irq(&log_redrive_lock);
2355            schedule();
2356            __set_current_state(TASK_RUNNING);
2357        }
2358    } while (!kthread_should_stop());
2359
2360    jfs_info("jfsIOWait being killed!");
2361    return 0;
2362}
2363
2364/*
2365 * NAME: lmLogFormat()/jfs_logform()
2366 *
2367 * FUNCTION: format file system log
2368 *
2369 * PARAMETERS:
2370 * log - volume log
2371 * logAddress - start address of log space in FS block
2372 * logSize - length of log space in FS block;
2373 *
2374 * RETURN: 0 - success
2375 * -EIO - i/o error
2376 *
2377 * XXX: We're synchronously writing one page at a time. This needs to
2378 * be improved by writing multiple pages at once.
2379 */
2380int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2381{
2382    int rc = -EIO;
2383    struct jfs_sb_info *sbi;
2384    struct logsuper *logsuper;
2385    struct logpage *lp;
2386    int lspn; /* log sequence page number */
2387    struct lrd *lrd_ptr;
2388    int npages = 0;
2389    struct lbuf *bp;
2390
2391    jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2392         (long long)logAddress, logSize);
2393
2394    sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2395
2396    /* allocate a log buffer */
2397    bp = lbmAllocate(log, 1);
2398
2399    npages = logSize >> sbi->l2nbperpage;
2400
2401    /*
2402     * log space:
2403     *
2404     * page 0 - reserved;
2405     * page 1 - log superblock;
2406     * page 2 - log data page: A SYNC log record is written
2407     * into this page at logform time;
2408     * pages 3-N - log data page: set to empty log data pages;
2409     */
2410    /*
2411     * init log superblock: log page 1
2412     */
2413    logsuper = (struct logsuper *) bp->l_ldata;
2414
2415    logsuper->magic = cpu_to_le32(LOGMAGIC);
2416    logsuper->version = cpu_to_le32(LOGVERSION);
2417    logsuper->state = cpu_to_le32(LOGREDONE);
2418    logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
2419    logsuper->size = cpu_to_le32(npages);
2420    logsuper->bsize = cpu_to_le32(sbi->bsize);
2421    logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2422    logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2423
2424    bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2425    bp->l_blkno = logAddress + sbi->nbperpage;
2426    lbmStartIO(bp);
2427    if ((rc = lbmIOWait(bp, 0)))
2428        goto exit;
2429
2430    /*
2431     * init pages 2 to npages-1 as log data pages:
2432     *
2433     * log page sequence number (lpsn) initialization:
2434     *
2435     * pn: 0 1 2 3 n-1
2436     * +-----+-----+=====+=====+===.....===+=====+
2437     * lspn: N-1 0 1 N-2
2438     * <--- N page circular file ---->
2439     *
2440     * the N (= npages-2) data pages of the log is maintained as
2441     * a circular file for the log records;
2442     * lpsn grows by 1 monotonically as each log page is written
2443     * to the circular file of the log;
2444     * and setLogpage() will not reset the page number even if
2445     * the eor is equal to LOGPHDRSIZE. In order for binary search
2446     * still work in find log end process, we have to simulate the
2447     * log wrap situation at the log format time.
2448     * The 1st log page written will have the highest lpsn. Then
2449     * the succeeding log pages will have ascending order of
2450     * the lspn starting from 0, ... (N-2)
2451     */
2452    lp = (struct logpage *) bp->l_ldata;
2453    /*
2454     * initialize 1st log page to be written: lpsn = N - 1,
2455     * write a SYNCPT log record is written to this page
2456     */
2457    lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2458    lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2459
2460    lrd_ptr = (struct lrd *) &lp->data;
2461    lrd_ptr->logtid = 0;
2462    lrd_ptr->backchain = 0;
2463    lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2464    lrd_ptr->length = 0;
2465    lrd_ptr->log.syncpt.sync = 0;
2466
2467    bp->l_blkno += sbi->nbperpage;
2468    bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2469    lbmStartIO(bp);
2470    if ((rc = lbmIOWait(bp, 0)))
2471        goto exit;
2472
2473    /*
2474     * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2475     */
2476    for (lspn = 0; lspn < npages - 3; lspn++) {
2477        lp->h.page = lp->t.page = cpu_to_le32(lspn);
2478        lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2479
2480        bp->l_blkno += sbi->nbperpage;
2481        bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2482        lbmStartIO(bp);
2483        if ((rc = lbmIOWait(bp, 0)))
2484            goto exit;
2485    }
2486
2487    rc = 0;
2488exit:
2489    /*
2490     * finalize log
2491     */
2492    /* release the buffer */
2493    lbmFree(bp);
2494
2495    return rc;
2496}
2497
2498#ifdef CONFIG_JFS_STATISTICS
2499static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2500{
2501    seq_printf(m,
2502               "JFS Logmgr stats\n"
2503               "================\n"
2504               "commits = %d\n"
2505               "writes submitted = %d\n"
2506               "writes completed = %d\n"
2507               "full pages submitted = %d\n"
2508               "partial pages submitted = %d\n",
2509               lmStat.commit,
2510               lmStat.submitted,
2511               lmStat.pagedone,
2512               lmStat.full_page,
2513               lmStat.partial_page);
2514    return 0;
2515}
2516
2517static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
2518{
2519    return single_open(file, jfs_lmstats_proc_show, NULL);
2520}
2521
2522const struct file_operations jfs_lmstats_proc_fops = {
2523    .owner = THIS_MODULE,
2524    .open = jfs_lmstats_proc_open,
2525    .read = seq_read,
2526    .llseek = seq_lseek,
2527    .release = single_release,
2528};
2529#endif /* CONFIG_JFS_STATISTICS */
2530

Archive Download this file



interactive