Root/fs/block_dev.c

1/*
2 * linux/fs/block_dev.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
6 */
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/smp_lock.h>
15#include <linux/device_cgroup.h>
16#include <linux/highmem.h>
17#include <linux/blkdev.h>
18#include <linux/module.h>
19#include <linux/blkpg.h>
20#include <linux/buffer_head.h>
21#include <linux/pagevec.h>
22#include <linux/writeback.h>
23#include <linux/mpage.h>
24#include <linux/mount.h>
25#include <linux/uio.h>
26#include <linux/namei.h>
27#include <linux/log2.h>
28#include <linux/kmemleak.h>
29#include <asm/uaccess.h>
30#include "internal.h"
31
32struct bdev_inode {
33    struct block_device bdev;
34    struct inode vfs_inode;
35};
36
37static const struct address_space_operations def_blk_aops;
38
39static inline struct bdev_inode *BDEV_I(struct inode *inode)
40{
41    return container_of(inode, struct bdev_inode, vfs_inode);
42}
43
44inline struct block_device *I_BDEV(struct inode *inode)
45{
46    return &BDEV_I(inode)->bdev;
47}
48
49EXPORT_SYMBOL(I_BDEV);
50
51static sector_t max_block(struct block_device *bdev)
52{
53    sector_t retval = ~((sector_t)0);
54    loff_t sz = i_size_read(bdev->bd_inode);
55
56    if (sz) {
57        unsigned int size = block_size(bdev);
58        unsigned int sizebits = blksize_bits(size);
59        retval = (sz >> sizebits);
60    }
61    return retval;
62}
63
64/* Kill _all_ buffers and pagecache , dirty or not.. */
65static void kill_bdev(struct block_device *bdev)
66{
67    if (bdev->bd_inode->i_mapping->nrpages == 0)
68        return;
69    invalidate_bh_lrus();
70    truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
71}
72
73int set_blocksize(struct block_device *bdev, int size)
74{
75    /* Size must be a power of two, and between 512 and PAGE_SIZE */
76    if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
77        return -EINVAL;
78
79    /* Size cannot be smaller than the size supported by the device */
80    if (size < bdev_logical_block_size(bdev))
81        return -EINVAL;
82
83    /* Don't change the size if it is same as current */
84    if (bdev->bd_block_size != size) {
85        sync_blockdev(bdev);
86        bdev->bd_block_size = size;
87        bdev->bd_inode->i_blkbits = blksize_bits(size);
88        kill_bdev(bdev);
89    }
90    return 0;
91}
92
93EXPORT_SYMBOL(set_blocksize);
94
95int sb_set_blocksize(struct super_block *sb, int size)
96{
97    if (set_blocksize(sb->s_bdev, size))
98        return 0;
99    /* If we get here, we know size is power of two
100     * and it's value is between 512 and PAGE_SIZE */
101    sb->s_blocksize = size;
102    sb->s_blocksize_bits = blksize_bits(size);
103    return sb->s_blocksize;
104}
105
106EXPORT_SYMBOL(sb_set_blocksize);
107
108int sb_min_blocksize(struct super_block *sb, int size)
109{
110    int minsize = bdev_logical_block_size(sb->s_bdev);
111    if (size < minsize)
112        size = minsize;
113    return sb_set_blocksize(sb, size);
114}
115
116EXPORT_SYMBOL(sb_min_blocksize);
117
118static int
119blkdev_get_block(struct inode *inode, sector_t iblock,
120        struct buffer_head *bh, int create)
121{
122    if (iblock >= max_block(I_BDEV(inode))) {
123        if (create)
124            return -EIO;
125
126        /*
127         * for reads, we're just trying to fill a partial page.
128         * return a hole, they will have to call get_block again
129         * before they can fill it, and they will get -EIO at that
130         * time
131         */
132        return 0;
133    }
134    bh->b_bdev = I_BDEV(inode);
135    bh->b_blocknr = iblock;
136    set_buffer_mapped(bh);
137    return 0;
138}
139
140static int
141blkdev_get_blocks(struct inode *inode, sector_t iblock,
142        struct buffer_head *bh, int create)
143{
144    sector_t end_block = max_block(I_BDEV(inode));
145    unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
146
147    if ((iblock + max_blocks) > end_block) {
148        max_blocks = end_block - iblock;
149        if ((long)max_blocks <= 0) {
150            if (create)
151                return -EIO; /* write fully beyond EOF */
152            /*
153             * It is a read which is fully beyond EOF. We return
154             * a !buffer_mapped buffer
155             */
156            max_blocks = 0;
157        }
158    }
159
160    bh->b_bdev = I_BDEV(inode);
161    bh->b_blocknr = iblock;
162    bh->b_size = max_blocks << inode->i_blkbits;
163    if (max_blocks)
164        set_buffer_mapped(bh);
165    return 0;
166}
167
168static ssize_t
169blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
170            loff_t offset, unsigned long nr_segs)
171{
172    struct file *file = iocb->ki_filp;
173    struct inode *inode = file->f_mapping->host;
174
175    return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
176                iov, offset, nr_segs, blkdev_get_blocks, NULL);
177}
178
179int __sync_blockdev(struct block_device *bdev, int wait)
180{
181    if (!bdev)
182        return 0;
183    if (!wait)
184        return filemap_flush(bdev->bd_inode->i_mapping);
185    return filemap_write_and_wait(bdev->bd_inode->i_mapping);
186}
187
188/*
189 * Write out and wait upon all the dirty data associated with a block
190 * device via its mapping. Does not take the superblock lock.
191 */
192int sync_blockdev(struct block_device *bdev)
193{
194    return __sync_blockdev(bdev, 1);
195}
196EXPORT_SYMBOL(sync_blockdev);
197
198/*
199 * Write out and wait upon all dirty data associated with this
200 * device. Filesystem data as well as the underlying block
201 * device. Takes the superblock lock.
202 */
203int fsync_bdev(struct block_device *bdev)
204{
205    struct super_block *sb = get_super(bdev);
206    if (sb) {
207        int res = sync_filesystem(sb);
208        drop_super(sb);
209        return res;
210    }
211    return sync_blockdev(bdev);
212}
213EXPORT_SYMBOL(fsync_bdev);
214
215/**
216 * freeze_bdev -- lock a filesystem and force it into a consistent state
217 * @bdev: blockdevice to lock
218 *
219 * This takes the block device bd_mount_sem to make sure no new mounts
220 * happen on bdev until thaw_bdev() is called.
221 * If a superblock is found on this device, we take the s_umount semaphore
222 * on it to make sure nobody unmounts until the snapshot creation is done.
223 * The reference counter (bd_fsfreeze_count) guarantees that only the last
224 * unfreeze process can unfreeze the frozen filesystem actually when multiple
225 * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
226 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
227 * actually.
228 */
229struct super_block *freeze_bdev(struct block_device *bdev)
230{
231    struct super_block *sb;
232    int error = 0;
233
234    mutex_lock(&bdev->bd_fsfreeze_mutex);
235    if (bdev->bd_fsfreeze_count > 0) {
236        bdev->bd_fsfreeze_count++;
237        sb = get_super(bdev);
238        mutex_unlock(&bdev->bd_fsfreeze_mutex);
239        return sb;
240    }
241    bdev->bd_fsfreeze_count++;
242
243    down(&bdev->bd_mount_sem);
244    sb = get_super(bdev);
245    if (sb && !(sb->s_flags & MS_RDONLY)) {
246        sb->s_frozen = SB_FREEZE_WRITE;
247        smp_wmb();
248
249        sync_filesystem(sb);
250
251        sb->s_frozen = SB_FREEZE_TRANS;
252        smp_wmb();
253
254        sync_blockdev(sb->s_bdev);
255
256        if (sb->s_op->freeze_fs) {
257            error = sb->s_op->freeze_fs(sb);
258            if (error) {
259                printk(KERN_ERR
260                    "VFS:Filesystem freeze failed\n");
261                sb->s_frozen = SB_UNFROZEN;
262                drop_super(sb);
263                up(&bdev->bd_mount_sem);
264                bdev->bd_fsfreeze_count--;
265                mutex_unlock(&bdev->bd_fsfreeze_mutex);
266                return ERR_PTR(error);
267            }
268        }
269    }
270
271    sync_blockdev(bdev);
272    mutex_unlock(&bdev->bd_fsfreeze_mutex);
273
274    return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */
275}
276EXPORT_SYMBOL(freeze_bdev);
277
278/**
279 * thaw_bdev -- unlock filesystem
280 * @bdev: blockdevice to unlock
281 * @sb: associated superblock
282 *
283 * Unlocks the filesystem and marks it writeable again after freeze_bdev().
284 */
285int thaw_bdev(struct block_device *bdev, struct super_block *sb)
286{
287    int error = 0;
288
289    mutex_lock(&bdev->bd_fsfreeze_mutex);
290    if (!bdev->bd_fsfreeze_count) {
291        mutex_unlock(&bdev->bd_fsfreeze_mutex);
292        return -EINVAL;
293    }
294
295    bdev->bd_fsfreeze_count--;
296    if (bdev->bd_fsfreeze_count > 0) {
297        if (sb)
298            drop_super(sb);
299        mutex_unlock(&bdev->bd_fsfreeze_mutex);
300        return 0;
301    }
302
303    if (sb) {
304        BUG_ON(sb->s_bdev != bdev);
305        if (!(sb->s_flags & MS_RDONLY)) {
306            if (sb->s_op->unfreeze_fs) {
307                error = sb->s_op->unfreeze_fs(sb);
308                if (error) {
309                    printk(KERN_ERR
310                        "VFS:Filesystem thaw failed\n");
311                    sb->s_frozen = SB_FREEZE_TRANS;
312                    bdev->bd_fsfreeze_count++;
313                    mutex_unlock(&bdev->bd_fsfreeze_mutex);
314                    return error;
315                }
316            }
317            sb->s_frozen = SB_UNFROZEN;
318            smp_wmb();
319            wake_up(&sb->s_wait_unfrozen);
320        }
321        drop_super(sb);
322    }
323
324    up(&bdev->bd_mount_sem);
325    mutex_unlock(&bdev->bd_fsfreeze_mutex);
326    return 0;
327}
328EXPORT_SYMBOL(thaw_bdev);
329
330static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
331{
332    return block_write_full_page(page, blkdev_get_block, wbc);
333}
334
335static int blkdev_readpage(struct file * file, struct page * page)
336{
337    return block_read_full_page(page, blkdev_get_block);
338}
339
340static int blkdev_write_begin(struct file *file, struct address_space *mapping,
341            loff_t pos, unsigned len, unsigned flags,
342            struct page **pagep, void **fsdata)
343{
344    *pagep = NULL;
345    return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
346                blkdev_get_block);
347}
348
349static int blkdev_write_end(struct file *file, struct address_space *mapping,
350            loff_t pos, unsigned len, unsigned copied,
351            struct page *page, void *fsdata)
352{
353    int ret;
354    ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
355
356    unlock_page(page);
357    page_cache_release(page);
358
359    return ret;
360}
361
362/*
363 * private llseek:
364 * for a block special file file->f_path.dentry->d_inode->i_size is zero
365 * so we compute the size by hand (just as in block_read/write above)
366 */
367static loff_t block_llseek(struct file *file, loff_t offset, int origin)
368{
369    struct inode *bd_inode = file->f_mapping->host;
370    loff_t size;
371    loff_t retval;
372
373    mutex_lock(&bd_inode->i_mutex);
374    size = i_size_read(bd_inode);
375
376    switch (origin) {
377        case 2:
378            offset += size;
379            break;
380        case 1:
381            offset += file->f_pos;
382    }
383    retval = -EINVAL;
384    if (offset >= 0 && offset <= size) {
385        if (offset != file->f_pos) {
386            file->f_pos = offset;
387        }
388        retval = offset;
389    }
390    mutex_unlock(&bd_inode->i_mutex);
391    return retval;
392}
393    
394/*
395 * Filp is never NULL; the only case when ->fsync() is called with
396 * NULL first argument is nfsd_sync_dir() and that's not a directory.
397 */
398 
399static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
400{
401    return sync_blockdev(I_BDEV(filp->f_mapping->host));
402}
403
404/*
405 * pseudo-fs
406 */
407
408static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
409static struct kmem_cache * bdev_cachep __read_mostly;
410
411static struct inode *bdev_alloc_inode(struct super_block *sb)
412{
413    struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
414    if (!ei)
415        return NULL;
416    return &ei->vfs_inode;
417}
418
419static void bdev_destroy_inode(struct inode *inode)
420{
421    struct bdev_inode *bdi = BDEV_I(inode);
422
423    bdi->bdev.bd_inode_backing_dev_info = NULL;
424    kmem_cache_free(bdev_cachep, bdi);
425}
426
427static void init_once(void *foo)
428{
429    struct bdev_inode *ei = (struct bdev_inode *) foo;
430    struct block_device *bdev = &ei->bdev;
431
432    memset(bdev, 0, sizeof(*bdev));
433    mutex_init(&bdev->bd_mutex);
434    sema_init(&bdev->bd_mount_sem, 1);
435    INIT_LIST_HEAD(&bdev->bd_inodes);
436    INIT_LIST_HEAD(&bdev->bd_list);
437#ifdef CONFIG_SYSFS
438    INIT_LIST_HEAD(&bdev->bd_holder_list);
439#endif
440    inode_init_once(&ei->vfs_inode);
441    /* Initialize mutex for freeze. */
442    mutex_init(&bdev->bd_fsfreeze_mutex);
443}
444
445static inline void __bd_forget(struct inode *inode)
446{
447    list_del_init(&inode->i_devices);
448    inode->i_bdev = NULL;
449    inode->i_mapping = &inode->i_data;
450}
451
452static void bdev_clear_inode(struct inode *inode)
453{
454    struct block_device *bdev = &BDEV_I(inode)->bdev;
455    struct list_head *p;
456    spin_lock(&bdev_lock);
457    while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
458        __bd_forget(list_entry(p, struct inode, i_devices));
459    }
460    list_del_init(&bdev->bd_list);
461    spin_unlock(&bdev_lock);
462}
463
464static const struct super_operations bdev_sops = {
465    .statfs = simple_statfs,
466    .alloc_inode = bdev_alloc_inode,
467    .destroy_inode = bdev_destroy_inode,
468    .drop_inode = generic_delete_inode,
469    .clear_inode = bdev_clear_inode,
470};
471
472static int bd_get_sb(struct file_system_type *fs_type,
473    int flags, const char *dev_name, void *data, struct vfsmount *mnt)
474{
475    return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
476}
477
478static struct file_system_type bd_type = {
479    .name = "bdev",
480    .get_sb = bd_get_sb,
481    .kill_sb = kill_anon_super,
482};
483
484struct super_block *blockdev_superblock __read_mostly;
485
486void __init bdev_cache_init(void)
487{
488    int err;
489    struct vfsmount *bd_mnt;
490
491    bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
492            0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
493                SLAB_MEM_SPREAD|SLAB_PANIC),
494            init_once);
495    err = register_filesystem(&bd_type);
496    if (err)
497        panic("Cannot register bdev pseudo-fs");
498    bd_mnt = kern_mount(&bd_type);
499    if (IS_ERR(bd_mnt))
500        panic("Cannot create bdev pseudo-fs");
501    /*
502     * This vfsmount structure is only used to obtain the
503     * blockdev_superblock, so tell kmemleak not to report it.
504     */
505    kmemleak_not_leak(bd_mnt);
506    blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
507}
508
509/*
510 * Most likely _very_ bad one - but then it's hardly critical for small
511 * /dev and can be fixed when somebody will need really large one.
512 * Keep in mind that it will be fed through icache hash function too.
513 */
514static inline unsigned long hash(dev_t dev)
515{
516    return MAJOR(dev)+MINOR(dev);
517}
518
519static int bdev_test(struct inode *inode, void *data)
520{
521    return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
522}
523
524static int bdev_set(struct inode *inode, void *data)
525{
526    BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
527    return 0;
528}
529
530static LIST_HEAD(all_bdevs);
531
532struct block_device *bdget(dev_t dev)
533{
534    struct block_device *bdev;
535    struct inode *inode;
536
537    inode = iget5_locked(blockdev_superblock, hash(dev),
538            bdev_test, bdev_set, &dev);
539
540    if (!inode)
541        return NULL;
542
543    bdev = &BDEV_I(inode)->bdev;
544
545    if (inode->i_state & I_NEW) {
546        bdev->bd_contains = NULL;
547        bdev->bd_inode = inode;
548        bdev->bd_block_size = (1 << inode->i_blkbits);
549        bdev->bd_part_count = 0;
550        bdev->bd_invalidated = 0;
551        inode->i_mode = S_IFBLK;
552        inode->i_rdev = dev;
553        inode->i_bdev = bdev;
554        inode->i_data.a_ops = &def_blk_aops;
555        mapping_set_gfp_mask(&inode->i_data, GFP_USER);
556        inode->i_data.backing_dev_info = &default_backing_dev_info;
557        spin_lock(&bdev_lock);
558        list_add(&bdev->bd_list, &all_bdevs);
559        spin_unlock(&bdev_lock);
560        unlock_new_inode(inode);
561    }
562    return bdev;
563}
564
565EXPORT_SYMBOL(bdget);
566
567/**
568 * bdgrab -- Grab a reference to an already referenced block device
569 * @bdev: Block device to grab a reference to.
570 */
571struct block_device *bdgrab(struct block_device *bdev)
572{
573    atomic_inc(&bdev->bd_inode->i_count);
574    return bdev;
575}
576
577long nr_blockdev_pages(void)
578{
579    struct block_device *bdev;
580    long ret = 0;
581    spin_lock(&bdev_lock);
582    list_for_each_entry(bdev, &all_bdevs, bd_list) {
583        ret += bdev->bd_inode->i_mapping->nrpages;
584    }
585    spin_unlock(&bdev_lock);
586    return ret;
587}
588
589void bdput(struct block_device *bdev)
590{
591    iput(bdev->bd_inode);
592}
593
594EXPORT_SYMBOL(bdput);
595 
596static struct block_device *bd_acquire(struct inode *inode)
597{
598    struct block_device *bdev;
599
600    spin_lock(&bdev_lock);
601    bdev = inode->i_bdev;
602    if (bdev) {
603        atomic_inc(&bdev->bd_inode->i_count);
604        spin_unlock(&bdev_lock);
605        return bdev;
606    }
607    spin_unlock(&bdev_lock);
608
609    bdev = bdget(inode->i_rdev);
610    if (bdev) {
611        spin_lock(&bdev_lock);
612        if (!inode->i_bdev) {
613            /*
614             * We take an additional bd_inode->i_count for inode,
615             * and it's released in clear_inode() of inode.
616             * So, we can access it via ->i_mapping always
617             * without igrab().
618             */
619            atomic_inc(&bdev->bd_inode->i_count);
620            inode->i_bdev = bdev;
621            inode->i_mapping = bdev->bd_inode->i_mapping;
622            list_add(&inode->i_devices, &bdev->bd_inodes);
623        }
624        spin_unlock(&bdev_lock);
625    }
626    return bdev;
627}
628
629/* Call when you free inode */
630
631void bd_forget(struct inode *inode)
632{
633    struct block_device *bdev = NULL;
634
635    spin_lock(&bdev_lock);
636    if (inode->i_bdev) {
637        if (!sb_is_blkdev_sb(inode->i_sb))
638            bdev = inode->i_bdev;
639        __bd_forget(inode);
640    }
641    spin_unlock(&bdev_lock);
642
643    if (bdev)
644        iput(bdev->bd_inode);
645}
646
647int bd_claim(struct block_device *bdev, void *holder)
648{
649    int res;
650    spin_lock(&bdev_lock);
651
652    /* first decide result */
653    if (bdev->bd_holder == holder)
654        res = 0; /* already a holder */
655    else if (bdev->bd_holder != NULL)
656        res = -EBUSY; /* held by someone else */
657    else if (bdev->bd_contains == bdev)
658        res = 0; /* is a whole device which isn't held */
659
660    else if (bdev->bd_contains->bd_holder == bd_claim)
661        res = 0; /* is a partition of a device that is being partitioned */
662    else if (bdev->bd_contains->bd_holder != NULL)
663        res = -EBUSY; /* is a partition of a held device */
664    else
665        res = 0; /* is a partition of an un-held device */
666
667    /* now impose change */
668    if (res==0) {
669        /* note that for a whole device bd_holders
670         * will be incremented twice, and bd_holder will
671         * be set to bd_claim before being set to holder
672         */
673        bdev->bd_contains->bd_holders ++;
674        bdev->bd_contains->bd_holder = bd_claim;
675        bdev->bd_holders++;
676        bdev->bd_holder = holder;
677    }
678    spin_unlock(&bdev_lock);
679    return res;
680}
681
682EXPORT_SYMBOL(bd_claim);
683
684void bd_release(struct block_device *bdev)
685{
686    spin_lock(&bdev_lock);
687    if (!--bdev->bd_contains->bd_holders)
688        bdev->bd_contains->bd_holder = NULL;
689    if (!--bdev->bd_holders)
690        bdev->bd_holder = NULL;
691    spin_unlock(&bdev_lock);
692}
693
694EXPORT_SYMBOL(bd_release);
695
696#ifdef CONFIG_SYSFS
697/*
698 * Functions for bd_claim_by_kobject / bd_release_from_kobject
699 *
700 * If a kobject is passed to bd_claim_by_kobject()
701 * and the kobject has a parent directory,
702 * following symlinks are created:
703 * o from the kobject to the claimed bdev
704 * o from "holders" directory of the bdev to the parent of the kobject
705 * bd_release_from_kobject() removes these symlinks.
706 *
707 * Example:
708 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to
709 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
710 * /sys/block/dm-0/slaves/sda --> /sys/block/sda
711 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
712 */
713
714static int add_symlink(struct kobject *from, struct kobject *to)
715{
716    if (!from || !to)
717        return 0;
718    return sysfs_create_link(from, to, kobject_name(to));
719}
720
721static void del_symlink(struct kobject *from, struct kobject *to)
722{
723    if (!from || !to)
724        return;
725    sysfs_remove_link(from, kobject_name(to));
726}
727
728/*
729 * 'struct bd_holder' contains pointers to kobjects symlinked by
730 * bd_claim_by_kobject.
731 * It's connected to bd_holder_list which is protected by bdev->bd_sem.
732 */
733struct bd_holder {
734    struct list_head list; /* chain of holders of the bdev */
735    int count; /* references from the holder */
736    struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */
737    struct kobject *hdev; /* e.g. "/block/dm-0" */
738    struct kobject *hdir; /* e.g. "/block/sda/holders" */
739    struct kobject *sdev; /* e.g. "/block/sda" */
740};
741
742/*
743 * Get references of related kobjects at once.
744 * Returns 1 on success. 0 on failure.
745 *
746 * Should call bd_holder_release_dirs() after successful use.
747 */
748static int bd_holder_grab_dirs(struct block_device *bdev,
749            struct bd_holder *bo)
750{
751    if (!bdev || !bo)
752        return 0;
753
754    bo->sdir = kobject_get(bo->sdir);
755    if (!bo->sdir)
756        return 0;
757
758    bo->hdev = kobject_get(bo->sdir->parent);
759    if (!bo->hdev)
760        goto fail_put_sdir;
761
762    bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
763    if (!bo->sdev)
764        goto fail_put_hdev;
765
766    bo->hdir = kobject_get(bdev->bd_part->holder_dir);
767    if (!bo->hdir)
768        goto fail_put_sdev;
769
770    return 1;
771
772fail_put_sdev:
773    kobject_put(bo->sdev);
774fail_put_hdev:
775    kobject_put(bo->hdev);
776fail_put_sdir:
777    kobject_put(bo->sdir);
778
779    return 0;
780}
781
782/* Put references of related kobjects at once. */
783static void bd_holder_release_dirs(struct bd_holder *bo)
784{
785    kobject_put(bo->hdir);
786    kobject_put(bo->sdev);
787    kobject_put(bo->hdev);
788    kobject_put(bo->sdir);
789}
790
791static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
792{
793    struct bd_holder *bo;
794
795    bo = kzalloc(sizeof(*bo), GFP_KERNEL);
796    if (!bo)
797        return NULL;
798
799    bo->count = 1;
800    bo->sdir = kobj;
801
802    return bo;
803}
804
805static void free_bd_holder(struct bd_holder *bo)
806{
807    kfree(bo);
808}
809
810/**
811 * find_bd_holder - find matching struct bd_holder from the block device
812 *
813 * @bdev: struct block device to be searched
814 * @bo: target struct bd_holder
815 *
816 * Returns matching entry with @bo in @bdev->bd_holder_list.
817 * If found, increment the reference count and return the pointer.
818 * If not found, returns NULL.
819 */
820static struct bd_holder *find_bd_holder(struct block_device *bdev,
821                    struct bd_holder *bo)
822{
823    struct bd_holder *tmp;
824
825    list_for_each_entry(tmp, &bdev->bd_holder_list, list)
826        if (tmp->sdir == bo->sdir) {
827            tmp->count++;
828            return tmp;
829        }
830
831    return NULL;
832}
833
834/**
835 * add_bd_holder - create sysfs symlinks for bd_claim() relationship
836 *
837 * @bdev: block device to be bd_claimed
838 * @bo: preallocated and initialized by alloc_bd_holder()
839 *
840 * Add @bo to @bdev->bd_holder_list, create symlinks.
841 *
842 * Returns 0 if symlinks are created.
843 * Returns -ve if something fails.
844 */
845static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
846{
847    int err;
848
849    if (!bo)
850        return -EINVAL;
851
852    if (!bd_holder_grab_dirs(bdev, bo))
853        return -EBUSY;
854
855    err = add_symlink(bo->sdir, bo->sdev);
856    if (err)
857        return err;
858
859    err = add_symlink(bo->hdir, bo->hdev);
860    if (err) {
861        del_symlink(bo->sdir, bo->sdev);
862        return err;
863    }
864
865    list_add_tail(&bo->list, &bdev->bd_holder_list);
866    return 0;
867}
868
869/**
870 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
871 *
872 * @bdev: block device to be bd_claimed
873 * @kobj: holder's kobject
874 *
875 * If there is matching entry with @kobj in @bdev->bd_holder_list
876 * and no other bd_claim() from the same kobject,
877 * remove the struct bd_holder from the list, delete symlinks for it.
878 *
879 * Returns a pointer to the struct bd_holder when it's removed from the list
880 * and ready to be freed.
881 * Returns NULL if matching claim isn't found or there is other bd_claim()
882 * by the same kobject.
883 */
884static struct bd_holder *del_bd_holder(struct block_device *bdev,
885                    struct kobject *kobj)
886{
887    struct bd_holder *bo;
888
889    list_for_each_entry(bo, &bdev->bd_holder_list, list) {
890        if (bo->sdir == kobj) {
891            bo->count--;
892            BUG_ON(bo->count < 0);
893            if (!bo->count) {
894                list_del(&bo->list);
895                del_symlink(bo->sdir, bo->sdev);
896                del_symlink(bo->hdir, bo->hdev);
897                bd_holder_release_dirs(bo);
898                return bo;
899            }
900            break;
901        }
902    }
903
904    return NULL;
905}
906
907/**
908 * bd_claim_by_kobject - bd_claim() with additional kobject signature
909 *
910 * @bdev: block device to be claimed
911 * @holder: holder's signature
912 * @kobj: holder's kobject
913 *
914 * Do bd_claim() and if it succeeds, create sysfs symlinks between
915 * the bdev and the holder's kobject.
916 * Use bd_release_from_kobject() when relesing the claimed bdev.
917 *
918 * Returns 0 on success. (same as bd_claim())
919 * Returns errno on failure.
920 */
921static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
922                struct kobject *kobj)
923{
924    int err;
925    struct bd_holder *bo, *found;
926
927    if (!kobj)
928        return -EINVAL;
929
930    bo = alloc_bd_holder(kobj);
931    if (!bo)
932        return -ENOMEM;
933
934    mutex_lock(&bdev->bd_mutex);
935
936    err = bd_claim(bdev, holder);
937    if (err)
938        goto fail;
939
940    found = find_bd_holder(bdev, bo);
941    if (found)
942        goto fail;
943
944    err = add_bd_holder(bdev, bo);
945    if (err)
946        bd_release(bdev);
947    else
948        bo = NULL;
949fail:
950    mutex_unlock(&bdev->bd_mutex);
951    free_bd_holder(bo);
952    return err;
953}
954
955/**
956 * bd_release_from_kobject - bd_release() with additional kobject signature
957 *
958 * @bdev: block device to be released
959 * @kobj: holder's kobject
960 *
961 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
962 */
963static void bd_release_from_kobject(struct block_device *bdev,
964                    struct kobject *kobj)
965{
966    if (!kobj)
967        return;
968
969    mutex_lock(&bdev->bd_mutex);
970    bd_release(bdev);
971    free_bd_holder(del_bd_holder(bdev, kobj));
972    mutex_unlock(&bdev->bd_mutex);
973}
974
975/**
976 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
977 *
978 * @bdev: block device to be claimed
979 * @holder: holder's signature
980 * @disk: holder's gendisk
981 *
982 * Call bd_claim_by_kobject() with getting @disk->slave_dir.
983 */
984int bd_claim_by_disk(struct block_device *bdev, void *holder,
985            struct gendisk *disk)
986{
987    return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
988}
989EXPORT_SYMBOL_GPL(bd_claim_by_disk);
990
991/**
992 * bd_release_from_disk - wrapper function for bd_release_from_kobject()
993 *
994 * @bdev: block device to be claimed
995 * @disk: holder's gendisk
996 *
997 * Call bd_release_from_kobject() and put @disk->slave_dir.
998 */
999void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
1000{
1001    bd_release_from_kobject(bdev, disk->slave_dir);
1002    kobject_put(disk->slave_dir);
1003}
1004EXPORT_SYMBOL_GPL(bd_release_from_disk);
1005#endif
1006
1007/*
1008 * Tries to open block device by device number. Use it ONLY if you
1009 * really do not have anything better - i.e. when you are behind a
1010 * truly sucky interface and all you are given is a device number. _Never_
1011 * to be used for internal purposes. If you ever need it - reconsider
1012 * your API.
1013 */
1014struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
1015{
1016    struct block_device *bdev = bdget(dev);
1017    int err = -ENOMEM;
1018    if (bdev)
1019        err = blkdev_get(bdev, mode);
1020    return err ? ERR_PTR(err) : bdev;
1021}
1022
1023EXPORT_SYMBOL(open_by_devnum);
1024
1025/**
1026 * flush_disk - invalidates all buffer-cache entries on a disk
1027 *
1028 * @bdev: struct block device to be flushed
1029 *
1030 * Invalidates all buffer-cache entries on a disk. It should be called
1031 * when a disk has been changed -- either by a media change or online
1032 * resize.
1033 */
1034static void flush_disk(struct block_device *bdev)
1035{
1036    if (__invalidate_device(bdev)) {
1037        char name[BDEVNAME_SIZE] = "";
1038
1039        if (bdev->bd_disk)
1040            disk_name(bdev->bd_disk, 0, name);
1041        printk(KERN_WARNING "VFS: busy inodes on changed media or "
1042               "resized disk %s\n", name);
1043    }
1044
1045    if (!bdev->bd_disk)
1046        return;
1047    if (disk_partitionable(bdev->bd_disk))
1048        bdev->bd_invalidated = 1;
1049}
1050
1051/**
1052 * check_disk_size_change - checks for disk size change and adjusts bdev size.
1053 * @disk: struct gendisk to check
1054 * @bdev: struct bdev to adjust.
1055 *
1056 * This routine checks to see if the bdev size does not match the disk size
1057 * and adjusts it if it differs.
1058 */
1059void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
1060{
1061    loff_t disk_size, bdev_size;
1062
1063    disk_size = (loff_t)get_capacity(disk) << 9;
1064    bdev_size = i_size_read(bdev->bd_inode);
1065    if (disk_size != bdev_size) {
1066        char name[BDEVNAME_SIZE];
1067
1068        disk_name(disk, 0, name);
1069        printk(KERN_INFO
1070               "%s: detected capacity change from %lld to %lld\n",
1071               name, bdev_size, disk_size);
1072        i_size_write(bdev->bd_inode, disk_size);
1073        flush_disk(bdev);
1074    }
1075}
1076EXPORT_SYMBOL(check_disk_size_change);
1077
1078/**
1079 * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
1080 * @disk: struct gendisk to be revalidated
1081 *
1082 * This routine is a wrapper for lower-level driver's revalidate_disk
1083 * call-backs. It is used to do common pre and post operations needed
1084 * for all revalidate_disk operations.
1085 */
1086int revalidate_disk(struct gendisk *disk)
1087{
1088    struct block_device *bdev;
1089    int ret = 0;
1090
1091    if (disk->fops->revalidate_disk)
1092        ret = disk->fops->revalidate_disk(disk);
1093
1094    bdev = bdget_disk(disk, 0);
1095    if (!bdev)
1096        return ret;
1097
1098    mutex_lock(&bdev->bd_mutex);
1099    check_disk_size_change(disk, bdev);
1100    mutex_unlock(&bdev->bd_mutex);
1101    bdput(bdev);
1102    return ret;
1103}
1104EXPORT_SYMBOL(revalidate_disk);
1105
1106/*
1107 * This routine checks whether a removable media has been changed,
1108 * and invalidates all buffer-cache-entries in that case. This
1109 * is a relatively slow routine, so we have to try to minimize using
1110 * it. Thus it is called only upon a 'mount' or 'open'. This
1111 * is the best way of combining speed and utility, I think.
1112 * People changing diskettes in the middle of an operation deserve
1113 * to lose :-)
1114 */
1115int check_disk_change(struct block_device *bdev)
1116{
1117    struct gendisk *disk = bdev->bd_disk;
1118    struct block_device_operations * bdops = disk->fops;
1119
1120    if (!bdops->media_changed)
1121        return 0;
1122    if (!bdops->media_changed(bdev->bd_disk))
1123        return 0;
1124
1125    flush_disk(bdev);
1126    if (bdops->revalidate_disk)
1127        bdops->revalidate_disk(bdev->bd_disk);
1128    return 1;
1129}
1130
1131EXPORT_SYMBOL(check_disk_change);
1132
1133void bd_set_size(struct block_device *bdev, loff_t size)
1134{
1135    unsigned bsize = bdev_logical_block_size(bdev);
1136
1137    bdev->bd_inode->i_size = size;
1138    while (bsize < PAGE_CACHE_SIZE) {
1139        if (size & bsize)
1140            break;
1141        bsize <<= 1;
1142    }
1143    bdev->bd_block_size = bsize;
1144    bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1145}
1146EXPORT_SYMBOL(bd_set_size);
1147
1148static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1149
1150/*
1151 * bd_mutex locking:
1152 *
1153 * mutex_lock(part->bd_mutex)
1154 * mutex_lock_nested(whole->bd_mutex, 1)
1155 */
1156
1157static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1158{
1159    struct gendisk *disk;
1160    int ret;
1161    int partno;
1162    int perm = 0;
1163
1164    if (mode & FMODE_READ)
1165        perm |= MAY_READ;
1166    if (mode & FMODE_WRITE)
1167        perm |= MAY_WRITE;
1168    /*
1169     * hooks: /n/, see "layering violations".
1170     */
1171    ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1172    if (ret != 0) {
1173        bdput(bdev);
1174        return ret;
1175    }
1176
1177    lock_kernel();
1178 restart:
1179
1180    ret = -ENXIO;
1181    disk = get_gendisk(bdev->bd_dev, &partno);
1182    if (!disk)
1183        goto out_unlock_kernel;
1184
1185    mutex_lock_nested(&bdev->bd_mutex, for_part);
1186    if (!bdev->bd_openers) {
1187        bdev->bd_disk = disk;
1188        bdev->bd_contains = bdev;
1189        if (!partno) {
1190            struct backing_dev_info *bdi;
1191
1192            ret = -ENXIO;
1193            bdev->bd_part = disk_get_part(disk, partno);
1194            if (!bdev->bd_part)
1195                goto out_clear;
1196
1197            if (disk->fops->open) {
1198                ret = disk->fops->open(bdev, mode);
1199                if (ret == -ERESTARTSYS) {
1200                    /* Lost a race with 'disk' being
1201                     * deleted, try again.
1202                     * See md.c
1203                     */
1204                    disk_put_part(bdev->bd_part);
1205                    bdev->bd_part = NULL;
1206                    module_put(disk->fops->owner);
1207                    put_disk(disk);
1208                    bdev->bd_disk = NULL;
1209                    mutex_unlock(&bdev->bd_mutex);
1210                    goto restart;
1211                }
1212                if (ret)
1213                    goto out_clear;
1214            }
1215            if (!bdev->bd_openers) {
1216                bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1217                bdi = blk_get_backing_dev_info(bdev);
1218                if (bdi == NULL)
1219                    bdi = &default_backing_dev_info;
1220                bdev->bd_inode->i_data.backing_dev_info = bdi;
1221            }
1222            if (bdev->bd_invalidated)
1223                rescan_partitions(disk, bdev);
1224        } else {
1225            struct block_device *whole;
1226            whole = bdget_disk(disk, 0);
1227            ret = -ENOMEM;
1228            if (!whole)
1229                goto out_clear;
1230            BUG_ON(for_part);
1231            ret = __blkdev_get(whole, mode, 1);
1232            if (ret)
1233                goto out_clear;
1234            bdev->bd_contains = whole;
1235            bdev->bd_inode->i_data.backing_dev_info =
1236               whole->bd_inode->i_data.backing_dev_info;
1237            bdev->bd_part = disk_get_part(disk, partno);
1238            if (!(disk->flags & GENHD_FL_UP) ||
1239                !bdev->bd_part || !bdev->bd_part->nr_sects) {
1240                ret = -ENXIO;
1241                goto out_clear;
1242            }
1243            bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1244        }
1245    } else {
1246        put_disk(disk);
1247        module_put(disk->fops->owner);
1248        disk = NULL;
1249        if (bdev->bd_contains == bdev) {
1250            if (bdev->bd_disk->fops->open) {
1251                ret = bdev->bd_disk->fops->open(bdev, mode);
1252                if (ret)
1253                    goto out_unlock_bdev;
1254            }
1255            if (bdev->bd_invalidated)
1256                rescan_partitions(bdev->bd_disk, bdev);
1257        }
1258    }
1259    bdev->bd_openers++;
1260    if (for_part)
1261        bdev->bd_part_count++;
1262    mutex_unlock(&bdev->bd_mutex);
1263    unlock_kernel();
1264    return 0;
1265
1266 out_clear:
1267    disk_put_part(bdev->bd_part);
1268    bdev->bd_disk = NULL;
1269    bdev->bd_part = NULL;
1270    bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1271    if (bdev != bdev->bd_contains)
1272        __blkdev_put(bdev->bd_contains, mode, 1);
1273    bdev->bd_contains = NULL;
1274 out_unlock_bdev:
1275    mutex_unlock(&bdev->bd_mutex);
1276 out_unlock_kernel:
1277    unlock_kernel();
1278
1279    if (disk)
1280        module_put(disk->fops->owner);
1281    put_disk(disk);
1282    bdput(bdev);
1283
1284    return ret;
1285}
1286
1287int blkdev_get(struct block_device *bdev, fmode_t mode)
1288{
1289    return __blkdev_get(bdev, mode, 0);
1290}
1291EXPORT_SYMBOL(blkdev_get);
1292
1293static int blkdev_open(struct inode * inode, struct file * filp)
1294{
1295    struct block_device *bdev;
1296    int res;
1297
1298    /*
1299     * Preserve backwards compatibility and allow large file access
1300     * even if userspace doesn't ask for it explicitly. Some mkfs
1301     * binary needs it. We might want to drop this workaround
1302     * during an unstable branch.
1303     */
1304    filp->f_flags |= O_LARGEFILE;
1305
1306    if (filp->f_flags & O_NDELAY)
1307        filp->f_mode |= FMODE_NDELAY;
1308    if (filp->f_flags & O_EXCL)
1309        filp->f_mode |= FMODE_EXCL;
1310    if ((filp->f_flags & O_ACCMODE) == 3)
1311        filp->f_mode |= FMODE_WRITE_IOCTL;
1312
1313    bdev = bd_acquire(inode);
1314    if (bdev == NULL)
1315        return -ENOMEM;
1316
1317    filp->f_mapping = bdev->bd_inode->i_mapping;
1318
1319    res = blkdev_get(bdev, filp->f_mode);
1320    if (res)
1321        return res;
1322
1323    if (filp->f_mode & FMODE_EXCL) {
1324        res = bd_claim(bdev, filp);
1325        if (res)
1326            goto out_blkdev_put;
1327    }
1328
1329    return 0;
1330
1331 out_blkdev_put:
1332    blkdev_put(bdev, filp->f_mode);
1333    return res;
1334}
1335
1336static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1337{
1338    int ret = 0;
1339    struct gendisk *disk = bdev->bd_disk;
1340    struct block_device *victim = NULL;
1341
1342    mutex_lock_nested(&bdev->bd_mutex, for_part);
1343    lock_kernel();
1344    if (for_part)
1345        bdev->bd_part_count--;
1346
1347    if (!--bdev->bd_openers) {
1348        sync_blockdev(bdev);
1349        kill_bdev(bdev);
1350    }
1351    if (bdev->bd_contains == bdev) {
1352        if (disk->fops->release)
1353            ret = disk->fops->release(disk, mode);
1354    }
1355    if (!bdev->bd_openers) {
1356        struct module *owner = disk->fops->owner;
1357
1358        put_disk(disk);
1359        module_put(owner);
1360        disk_put_part(bdev->bd_part);
1361        bdev->bd_part = NULL;
1362        bdev->bd_disk = NULL;
1363        bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1364        if (bdev != bdev->bd_contains)
1365            victim = bdev->bd_contains;
1366        bdev->bd_contains = NULL;
1367    }
1368    unlock_kernel();
1369    mutex_unlock(&bdev->bd_mutex);
1370    bdput(bdev);
1371    if (victim)
1372        __blkdev_put(victim, mode, 1);
1373    return ret;
1374}
1375
1376int blkdev_put(struct block_device *bdev, fmode_t mode)
1377{
1378    return __blkdev_put(bdev, mode, 0);
1379}
1380EXPORT_SYMBOL(blkdev_put);
1381
1382static int blkdev_close(struct inode * inode, struct file * filp)
1383{
1384    struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1385    if (bdev->bd_holder == filp)
1386        bd_release(bdev);
1387    return blkdev_put(bdev, filp->f_mode);
1388}
1389
1390static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1391{
1392    struct block_device *bdev = I_BDEV(file->f_mapping->host);
1393    fmode_t mode = file->f_mode;
1394
1395    /*
1396     * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
1397     * to updated it before every ioctl.
1398     */
1399    if (file->f_flags & O_NDELAY)
1400        mode |= FMODE_NDELAY;
1401    else
1402        mode &= ~FMODE_NDELAY;
1403
1404    return blkdev_ioctl(bdev, mode, cmd, arg);
1405}
1406
1407/*
1408 * Try to release a page associated with block device when the system
1409 * is under memory pressure.
1410 */
1411static int blkdev_releasepage(struct page *page, gfp_t wait)
1412{
1413    struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1414
1415    if (super && super->s_op->bdev_try_to_free_page)
1416        return super->s_op->bdev_try_to_free_page(super, page, wait);
1417
1418    return try_to_free_buffers(page);
1419}
1420
1421static const struct address_space_operations def_blk_aops = {
1422    .readpage = blkdev_readpage,
1423    .writepage = blkdev_writepage,
1424    .sync_page = block_sync_page,
1425    .write_begin = blkdev_write_begin,
1426    .write_end = blkdev_write_end,
1427    .writepages = generic_writepages,
1428    .releasepage = blkdev_releasepage,
1429    .direct_IO = blkdev_direct_IO,
1430};
1431
1432const struct file_operations def_blk_fops = {
1433    .open = blkdev_open,
1434    .release = blkdev_close,
1435    .llseek = block_llseek,
1436    .read = do_sync_read,
1437    .write = do_sync_write,
1438      .aio_read = generic_file_aio_read,
1439      .aio_write = generic_file_aio_write_nolock,
1440    .mmap = generic_file_mmap,
1441    .fsync = block_fsync,
1442    .unlocked_ioctl = block_ioctl,
1443#ifdef CONFIG_COMPAT
1444    .compat_ioctl = compat_blkdev_ioctl,
1445#endif
1446    .splice_read = generic_file_splice_read,
1447    .splice_write = generic_file_splice_write,
1448};
1449
1450int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1451{
1452    int res;
1453    mm_segment_t old_fs = get_fs();
1454    set_fs(KERNEL_DS);
1455    res = blkdev_ioctl(bdev, 0, cmd, arg);
1456    set_fs(old_fs);
1457    return res;
1458}
1459
1460EXPORT_SYMBOL(ioctl_by_bdev);
1461
1462/**
1463 * lookup_bdev - lookup a struct block_device by name
1464 * @pathname: special file representing the block device
1465 *
1466 * Get a reference to the blockdevice at @pathname in the current
1467 * namespace if possible and return it. Return ERR_PTR(error)
1468 * otherwise.
1469 */
1470struct block_device *lookup_bdev(const char *pathname)
1471{
1472    struct block_device *bdev;
1473    struct inode *inode;
1474    struct path path;
1475    int error;
1476
1477    if (!pathname || !*pathname)
1478        return ERR_PTR(-EINVAL);
1479
1480    error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1481    if (error)
1482        return ERR_PTR(error);
1483
1484    inode = path.dentry->d_inode;
1485    error = -ENOTBLK;
1486    if (!S_ISBLK(inode->i_mode))
1487        goto fail;
1488    error = -EACCES;
1489    if (path.mnt->mnt_flags & MNT_NODEV)
1490        goto fail;
1491    error = -ENOMEM;
1492    bdev = bd_acquire(inode);
1493    if (!bdev)
1494        goto fail;
1495out:
1496    path_put(&path);
1497    return bdev;
1498fail:
1499    bdev = ERR_PTR(error);
1500    goto out;
1501}
1502EXPORT_SYMBOL(lookup_bdev);
1503
1504/**
1505 * open_bdev_exclusive - open a block device by name and set it up for use
1506 *
1507 * @path: special file representing the block device
1508 * @mode: FMODE_... combination to pass be used
1509 * @holder: owner for exclusion
1510 *
1511 * Open the blockdevice described by the special file at @path, claim it
1512 * for the @holder.
1513 */
1514struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1515{
1516    struct block_device *bdev;
1517    int error = 0;
1518
1519    bdev = lookup_bdev(path);
1520    if (IS_ERR(bdev))
1521        return bdev;
1522
1523    error = blkdev_get(bdev, mode);
1524    if (error)
1525        return ERR_PTR(error);
1526    error = -EACCES;
1527    if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1528        goto blkdev_put;
1529    error = bd_claim(bdev, holder);
1530    if (error)
1531        goto blkdev_put;
1532
1533    return bdev;
1534    
1535blkdev_put:
1536    blkdev_put(bdev, mode);
1537    return ERR_PTR(error);
1538}
1539
1540EXPORT_SYMBOL(open_bdev_exclusive);
1541
1542/**
1543 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
1544 *
1545 * @bdev: blockdevice to close
1546 * @mode: mode, must match that used to open.
1547 *
1548 * This is the counterpart to open_bdev_exclusive().
1549 */
1550void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
1551{
1552    bd_release(bdev);
1553    blkdev_put(bdev, mode);
1554}
1555
1556EXPORT_SYMBOL(close_bdev_exclusive);
1557
1558int __invalidate_device(struct block_device *bdev)
1559{
1560    struct super_block *sb = get_super(bdev);
1561    int res = 0;
1562
1563    if (sb) {
1564        /*
1565         * no need to lock the super, get_super holds the
1566         * read mutex so the filesystem cannot go away
1567         * under us (->put_super runs with the write lock
1568         * hold).
1569         */
1570        shrink_dcache_sb(sb);
1571        res = invalidate_inodes(sb);
1572        drop_super(sb);
1573    }
1574    invalidate_bdev(bdev);
1575    return res;
1576}
1577EXPORT_SYMBOL(__invalidate_device);
1578

Archive Download this file



interactive