Root/fs/open.c

1/*
2 * linux/fs/open.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7#include <linux/string.h>
8#include <linux/mm.h>
9#include <linux/file.h>
10#include <linux/fdtable.h>
11#include <linux/quotaops.h>
12#include <linux/fsnotify.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/tty.h>
16#include <linux/namei.h>
17#include <linux/backing-dev.h>
18#include <linux/capability.h>
19#include <linux/securebits.h>
20#include <linux/security.h>
21#include <linux/mount.h>
22#include <linux/vfs.h>
23#include <linux/fcntl.h>
24#include <asm/uaccess.h>
25#include <linux/fs.h>
26#include <linux/personality.h>
27#include <linux/pagemap.h>
28#include <linux/syscalls.h>
29#include <linux/rcupdate.h>
30#include <linux/audit.h>
31#include <linux/falloc.h>
32#include <linux/fs_struct.h>
33
34int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
35{
36    int retval = -ENODEV;
37
38    if (dentry) {
39        retval = -ENOSYS;
40        if (dentry->d_sb->s_op->statfs) {
41            memset(buf, 0, sizeof(*buf));
42            retval = security_sb_statfs(dentry);
43            if (retval)
44                return retval;
45            retval = dentry->d_sb->s_op->statfs(dentry, buf);
46            if (retval == 0 && buf->f_frsize == 0)
47                buf->f_frsize = buf->f_bsize;
48        }
49    }
50    return retval;
51}
52
53EXPORT_SYMBOL(vfs_statfs);
54
55static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
56{
57    struct kstatfs st;
58    int retval;
59
60    retval = vfs_statfs(dentry, &st);
61    if (retval)
62        return retval;
63
64    if (sizeof(*buf) == sizeof(st))
65        memcpy(buf, &st, sizeof(st));
66    else {
67        if (sizeof buf->f_blocks == 4) {
68            if ((st.f_blocks | st.f_bfree | st.f_bavail |
69                 st.f_bsize | st.f_frsize) &
70                0xffffffff00000000ULL)
71                return -EOVERFLOW;
72            /*
73             * f_files and f_ffree may be -1; it's okay to stuff
74             * that into 32 bits
75             */
76            if (st.f_files != -1 &&
77                (st.f_files & 0xffffffff00000000ULL))
78                return -EOVERFLOW;
79            if (st.f_ffree != -1 &&
80                (st.f_ffree & 0xffffffff00000000ULL))
81                return -EOVERFLOW;
82        }
83
84        buf->f_type = st.f_type;
85        buf->f_bsize = st.f_bsize;
86        buf->f_blocks = st.f_blocks;
87        buf->f_bfree = st.f_bfree;
88        buf->f_bavail = st.f_bavail;
89        buf->f_files = st.f_files;
90        buf->f_ffree = st.f_ffree;
91        buf->f_fsid = st.f_fsid;
92        buf->f_namelen = st.f_namelen;
93        buf->f_frsize = st.f_frsize;
94        memset(buf->f_spare, 0, sizeof(buf->f_spare));
95    }
96    return 0;
97}
98
99static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
100{
101    struct kstatfs st;
102    int retval;
103
104    retval = vfs_statfs(dentry, &st);
105    if (retval)
106        return retval;
107
108    if (sizeof(*buf) == sizeof(st))
109        memcpy(buf, &st, sizeof(st));
110    else {
111        buf->f_type = st.f_type;
112        buf->f_bsize = st.f_bsize;
113        buf->f_blocks = st.f_blocks;
114        buf->f_bfree = st.f_bfree;
115        buf->f_bavail = st.f_bavail;
116        buf->f_files = st.f_files;
117        buf->f_ffree = st.f_ffree;
118        buf->f_fsid = st.f_fsid;
119        buf->f_namelen = st.f_namelen;
120        buf->f_frsize = st.f_frsize;
121        memset(buf->f_spare, 0, sizeof(buf->f_spare));
122    }
123    return 0;
124}
125
126SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
127{
128    struct path path;
129    int error;
130
131    error = user_path(pathname, &path);
132    if (!error) {
133        struct statfs tmp;
134        error = vfs_statfs_native(path.dentry, &tmp);
135        if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
136            error = -EFAULT;
137        path_put(&path);
138    }
139    return error;
140}
141
142SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
143{
144    struct path path;
145    long error;
146
147    if (sz != sizeof(*buf))
148        return -EINVAL;
149    error = user_path(pathname, &path);
150    if (!error) {
151        struct statfs64 tmp;
152        error = vfs_statfs64(path.dentry, &tmp);
153        if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
154            error = -EFAULT;
155        path_put(&path);
156    }
157    return error;
158}
159
160SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
161{
162    struct file * file;
163    struct statfs tmp;
164    int error;
165
166    error = -EBADF;
167    file = fget(fd);
168    if (!file)
169        goto out;
170    error = vfs_statfs_native(file->f_path.dentry, &tmp);
171    if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
172        error = -EFAULT;
173    fput(file);
174out:
175    return error;
176}
177
178SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
179{
180    struct file * file;
181    struct statfs64 tmp;
182    int error;
183
184    if (sz != sizeof(*buf))
185        return -EINVAL;
186
187    error = -EBADF;
188    file = fget(fd);
189    if (!file)
190        goto out;
191    error = vfs_statfs64(file->f_path.dentry, &tmp);
192    if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
193        error = -EFAULT;
194    fput(file);
195out:
196    return error;
197}
198
199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
200    struct file *filp)
201{
202    int err;
203    struct iattr newattrs;
204
205    /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
206    if (length < 0)
207        return -EINVAL;
208
209    newattrs.ia_size = length;
210    newattrs.ia_valid = ATTR_SIZE | time_attrs;
211    if (filp) {
212        newattrs.ia_file = filp;
213        newattrs.ia_valid |= ATTR_FILE;
214    }
215
216    /* Remove suid/sgid on truncate too */
217    newattrs.ia_valid |= should_remove_suid(dentry);
218
219    mutex_lock(&dentry->d_inode->i_mutex);
220    err = notify_change(dentry, &newattrs);
221    mutex_unlock(&dentry->d_inode->i_mutex);
222    return err;
223}
224
225static long do_sys_truncate(const char __user *pathname, loff_t length)
226{
227    struct path path;
228    struct inode *inode;
229    int error;
230
231    error = -EINVAL;
232    if (length < 0) /* sorry, but loff_t says... */
233        goto out;
234
235    error = user_path(pathname, &path);
236    if (error)
237        goto out;
238    inode = path.dentry->d_inode;
239
240    /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
241    error = -EISDIR;
242    if (S_ISDIR(inode->i_mode))
243        goto dput_and_out;
244
245    error = -EINVAL;
246    if (!S_ISREG(inode->i_mode))
247        goto dput_and_out;
248
249    error = mnt_want_write(path.mnt);
250    if (error)
251        goto dput_and_out;
252
253    error = inode_permission(inode, MAY_WRITE);
254    if (error)
255        goto mnt_drop_write_and_out;
256
257    error = -EPERM;
258    if (IS_APPEND(inode))
259        goto mnt_drop_write_and_out;
260
261    error = get_write_access(inode);
262    if (error)
263        goto mnt_drop_write_and_out;
264
265    /*
266     * Make sure that there are no leases. get_write_access() protects
267     * against the truncate racing with a lease-granting setlease().
268     */
269    error = break_lease(inode, FMODE_WRITE);
270    if (error)
271        goto put_write_and_out;
272
273    error = locks_verify_truncate(inode, NULL, length);
274    if (!error)
275        error = security_path_truncate(&path, length, 0);
276    if (!error) {
277        vfs_dq_init(inode);
278        error = do_truncate(path.dentry, length, 0, NULL);
279    }
280
281put_write_and_out:
282    put_write_access(inode);
283mnt_drop_write_and_out:
284    mnt_drop_write(path.mnt);
285dput_and_out:
286    path_put(&path);
287out:
288    return error;
289}
290
291SYSCALL_DEFINE2(truncate, const char __user *, path, unsigned long, length)
292{
293    /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
294    return do_sys_truncate(path, (long)length);
295}
296
297static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
298{
299    struct inode * inode;
300    struct dentry *dentry;
301    struct file * file;
302    int error;
303
304    error = -EINVAL;
305    if (length < 0)
306        goto out;
307    error = -EBADF;
308    file = fget(fd);
309    if (!file)
310        goto out;
311
312    /* explicitly opened as large or we are on 64-bit box */
313    if (file->f_flags & O_LARGEFILE)
314        small = 0;
315
316    dentry = file->f_path.dentry;
317    inode = dentry->d_inode;
318    error = -EINVAL;
319    if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
320        goto out_putf;
321
322    error = -EINVAL;
323    /* Cannot ftruncate over 2^31 bytes without large file support */
324    if (small && length > MAX_NON_LFS)
325        goto out_putf;
326
327    error = -EPERM;
328    if (IS_APPEND(inode))
329        goto out_putf;
330
331    error = locks_verify_truncate(inode, file, length);
332    if (!error)
333        error = security_path_truncate(&file->f_path, length,
334                           ATTR_MTIME|ATTR_CTIME);
335    if (!error)
336        error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
337out_putf:
338    fput(file);
339out:
340    return error;
341}
342
343SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
344{
345    long ret = do_sys_ftruncate(fd, length, 1);
346    /* avoid REGPARM breakage on x86: */
347    asmlinkage_protect(2, ret, fd, length);
348    return ret;
349}
350
351/* LFS versions of truncate are only needed on 32 bit machines */
352#if BITS_PER_LONG == 32
353SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length)
354{
355    return do_sys_truncate(path, length);
356}
357#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
358asmlinkage long SyS_truncate64(long path, loff_t length)
359{
360    return SYSC_truncate64((const char __user *) path, length);
361}
362SYSCALL_ALIAS(sys_truncate64, SyS_truncate64);
363#endif
364
365SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length)
366{
367    long ret = do_sys_ftruncate(fd, length, 0);
368    /* avoid REGPARM breakage on x86: */
369    asmlinkage_protect(2, ret, fd, length);
370    return ret;
371}
372#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
373asmlinkage long SyS_ftruncate64(long fd, loff_t length)
374{
375    return SYSC_ftruncate64((unsigned int) fd, length);
376}
377SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
378#endif
379#endif /* BITS_PER_LONG == 32 */
380
381
382int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
383{
384    struct inode *inode = file->f_path.dentry->d_inode;
385    long ret;
386
387    if (offset < 0 || len <= 0)
388        return -EINVAL;
389
390    /* Return error if mode is not supported */
391    if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
392        return -EOPNOTSUPP;
393
394    if (!(file->f_mode & FMODE_WRITE))
395        return -EBADF;
396    /*
397     * Revalidate the write permissions, in case security policy has
398     * changed since the files were opened.
399     */
400    ret = security_file_permission(file, MAY_WRITE);
401    if (ret)
402        return ret;
403
404    if (S_ISFIFO(inode->i_mode))
405        return -ESPIPE;
406
407    /*
408     * Let individual file system decide if it supports preallocation
409     * for directories or not.
410     */
411    if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
412        return -ENODEV;
413
414    /* Check for wrap through zero too */
415    if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
416        return -EFBIG;
417
418    if (!inode->i_op->fallocate)
419        return -EOPNOTSUPP;
420
421    return inode->i_op->fallocate(inode, mode, offset, len);
422}
423
424SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
425{
426    struct file *file;
427    int error = -EBADF;
428
429    file = fget(fd);
430    if (file) {
431        error = do_fallocate(file, mode, offset, len);
432        fput(file);
433    }
434
435    return error;
436}
437
438#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
439asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
440{
441    return SYSC_fallocate((int)fd, (int)mode, offset, len);
442}
443SYSCALL_ALIAS(sys_fallocate, SyS_fallocate);
444#endif
445
446/*
447 * access() needs to use the real uid/gid, not the effective uid/gid.
448 * We do this by temporarily clearing all FS-related capabilities and
449 * switching the fsuid/fsgid around to the real ones.
450 */
451SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
452{
453    const struct cred *old_cred;
454    struct cred *override_cred;
455    struct path path;
456    struct inode *inode;
457    int res;
458
459    if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
460        return -EINVAL;
461
462    override_cred = prepare_creds();
463    if (!override_cred)
464        return -ENOMEM;
465
466    override_cred->fsuid = override_cred->uid;
467    override_cred->fsgid = override_cred->gid;
468
469    if (!issecure(SECURE_NO_SETUID_FIXUP)) {
470        /* Clear the capabilities if we switch to a non-root user */
471        if (override_cred->uid)
472            cap_clear(override_cred->cap_effective);
473        else
474            override_cred->cap_effective =
475                override_cred->cap_permitted;
476    }
477
478    old_cred = override_creds(override_cred);
479
480    res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
481    if (res)
482        goto out;
483
484    inode = path.dentry->d_inode;
485
486    if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
487        /*
488         * MAY_EXEC on regular files is denied if the fs is mounted
489         * with the "noexec" flag.
490         */
491        res = -EACCES;
492        if (path.mnt->mnt_flags & MNT_NOEXEC)
493            goto out_path_release;
494    }
495
496    res = inode_permission(inode, mode | MAY_ACCESS);
497    /* SuS v2 requires we report a read only fs too */
498    if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
499        goto out_path_release;
500    /*
501     * This is a rare case where using __mnt_is_readonly()
502     * is OK without a mnt_want/drop_write() pair. Since
503     * no actual write to the fs is performed here, we do
504     * not need to telegraph to that to anyone.
505     *
506     * By doing this, we accept that this access is
507     * inherently racy and know that the fs may change
508     * state before we even see this result.
509     */
510    if (__mnt_is_readonly(path.mnt))
511        res = -EROFS;
512
513out_path_release:
514    path_put(&path);
515out:
516    revert_creds(old_cred);
517    put_cred(override_cred);
518    return res;
519}
520
521SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
522{
523    return sys_faccessat(AT_FDCWD, filename, mode);
524}
525
526SYSCALL_DEFINE1(chdir, const char __user *, filename)
527{
528    struct path path;
529    int error;
530
531    error = user_path_dir(filename, &path);
532    if (error)
533        goto out;
534
535    error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
536    if (error)
537        goto dput_and_out;
538
539    set_fs_pwd(current->fs, &path);
540
541dput_and_out:
542    path_put(&path);
543out:
544    return error;
545}
546
547SYSCALL_DEFINE1(fchdir, unsigned int, fd)
548{
549    struct file *file;
550    struct inode *inode;
551    int error;
552
553    error = -EBADF;
554    file = fget(fd);
555    if (!file)
556        goto out;
557
558    inode = file->f_path.dentry->d_inode;
559
560    error = -ENOTDIR;
561    if (!S_ISDIR(inode->i_mode))
562        goto out_putf;
563
564    error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
565    if (!error)
566        set_fs_pwd(current->fs, &file->f_path);
567out_putf:
568    fput(file);
569out:
570    return error;
571}
572
573SYSCALL_DEFINE1(chroot, const char __user *, filename)
574{
575    struct path path;
576    int error;
577
578    error = user_path_dir(filename, &path);
579    if (error)
580        goto out;
581
582    error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
583    if (error)
584        goto dput_and_out;
585
586    error = -EPERM;
587    if (!capable(CAP_SYS_CHROOT))
588        goto dput_and_out;
589
590    set_fs_root(current->fs, &path);
591    error = 0;
592dput_and_out:
593    path_put(&path);
594out:
595    return error;
596}
597
598SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
599{
600    struct inode * inode;
601    struct dentry * dentry;
602    struct file * file;
603    int err = -EBADF;
604    struct iattr newattrs;
605
606    file = fget(fd);
607    if (!file)
608        goto out;
609
610    dentry = file->f_path.dentry;
611    inode = dentry->d_inode;
612
613    audit_inode(NULL, dentry);
614
615    err = mnt_want_write_file(file);
616    if (err)
617        goto out_putf;
618    mutex_lock(&inode->i_mutex);
619    if (mode == (mode_t) -1)
620        mode = inode->i_mode;
621    newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
622    newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
623    err = notify_change(dentry, &newattrs);
624    mutex_unlock(&inode->i_mutex);
625    mnt_drop_write(file->f_path.mnt);
626out_putf:
627    fput(file);
628out:
629    return err;
630}
631
632SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
633{
634    struct path path;
635    struct inode *inode;
636    int error;
637    struct iattr newattrs;
638
639    error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
640    if (error)
641        goto out;
642    inode = path.dentry->d_inode;
643
644    error = mnt_want_write(path.mnt);
645    if (error)
646        goto dput_and_out;
647    mutex_lock(&inode->i_mutex);
648    if (mode == (mode_t) -1)
649        mode = inode->i_mode;
650    newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
651    newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
652    error = notify_change(path.dentry, &newattrs);
653    mutex_unlock(&inode->i_mutex);
654    mnt_drop_write(path.mnt);
655dput_and_out:
656    path_put(&path);
657out:
658    return error;
659}
660
661SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
662{
663    return sys_fchmodat(AT_FDCWD, filename, mode);
664}
665
666static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
667{
668    struct inode *inode = dentry->d_inode;
669    int error;
670    struct iattr newattrs;
671
672    newattrs.ia_valid = ATTR_CTIME;
673    if (user != (uid_t) -1) {
674        newattrs.ia_valid |= ATTR_UID;
675        newattrs.ia_uid = user;
676    }
677    if (group != (gid_t) -1) {
678        newattrs.ia_valid |= ATTR_GID;
679        newattrs.ia_gid = group;
680    }
681    if (!S_ISDIR(inode->i_mode))
682        newattrs.ia_valid |=
683            ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
684    mutex_lock(&inode->i_mutex);
685    error = notify_change(dentry, &newattrs);
686    mutex_unlock(&inode->i_mutex);
687
688    return error;
689}
690
691SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
692{
693    struct path path;
694    int error;
695
696    error = user_path(filename, &path);
697    if (error)
698        goto out;
699    error = mnt_want_write(path.mnt);
700    if (error)
701        goto out_release;
702    error = chown_common(path.dentry, user, group);
703    mnt_drop_write(path.mnt);
704out_release:
705    path_put(&path);
706out:
707    return error;
708}
709
710SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
711        gid_t, group, int, flag)
712{
713    struct path path;
714    int error = -EINVAL;
715    int follow;
716
717    if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
718        goto out;
719
720    follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
721    error = user_path_at(dfd, filename, follow, &path);
722    if (error)
723        goto out;
724    error = mnt_want_write(path.mnt);
725    if (error)
726        goto out_release;
727    error = chown_common(path.dentry, user, group);
728    mnt_drop_write(path.mnt);
729out_release:
730    path_put(&path);
731out:
732    return error;
733}
734
735SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
736{
737    struct path path;
738    int error;
739
740    error = user_lpath(filename, &path);
741    if (error)
742        goto out;
743    error = mnt_want_write(path.mnt);
744    if (error)
745        goto out_release;
746    error = chown_common(path.dentry, user, group);
747    mnt_drop_write(path.mnt);
748out_release:
749    path_put(&path);
750out:
751    return error;
752}
753
754SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
755{
756    struct file * file;
757    int error = -EBADF;
758    struct dentry * dentry;
759
760    file = fget(fd);
761    if (!file)
762        goto out;
763
764    error = mnt_want_write_file(file);
765    if (error)
766        goto out_fput;
767    dentry = file->f_path.dentry;
768    audit_inode(NULL, dentry);
769    error = chown_common(dentry, user, group);
770    mnt_drop_write(file->f_path.mnt);
771out_fput:
772    fput(file);
773out:
774    return error;
775}
776
777/*
778 * You have to be very careful that these write
779 * counts get cleaned up in error cases and
780 * upon __fput(). This should probably never
781 * be called outside of __dentry_open().
782 */
783static inline int __get_file_write_access(struct inode *inode,
784                      struct vfsmount *mnt)
785{
786    int error;
787    error = get_write_access(inode);
788    if (error)
789        return error;
790    /*
791     * Do not take mount writer counts on
792     * special files since no writes to
793     * the mount itself will occur.
794     */
795    if (!special_file(inode->i_mode)) {
796        /*
797         * Balanced in __fput()
798         */
799        error = mnt_want_write(mnt);
800        if (error)
801            put_write_access(inode);
802    }
803    return error;
804}
805
806static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
807                    int flags, struct file *f,
808                    int (*open)(struct inode *, struct file *),
809                    const struct cred *cred)
810{
811    struct inode *inode;
812    int error;
813
814    f->f_flags = flags;
815    f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
816                FMODE_PREAD | FMODE_PWRITE;
817    inode = dentry->d_inode;
818    if (f->f_mode & FMODE_WRITE) {
819        error = __get_file_write_access(inode, mnt);
820        if (error)
821            goto cleanup_file;
822        if (!special_file(inode->i_mode))
823            file_take_write(f);
824    }
825
826    f->f_mapping = inode->i_mapping;
827    f->f_path.dentry = dentry;
828    f->f_path.mnt = mnt;
829    f->f_pos = 0;
830    f->f_op = fops_get(inode->i_fop);
831    file_move(f, &inode->i_sb->s_files);
832
833    error = security_dentry_open(f, cred);
834    if (error)
835        goto cleanup_all;
836
837    if (!open && f->f_op)
838        open = f->f_op->open;
839    if (open) {
840        error = open(inode, f);
841        if (error)
842            goto cleanup_all;
843    }
844
845    f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
846
847    file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
848
849    /* NB: we're sure to have correct a_ops only after f_op->open */
850    if (f->f_flags & O_DIRECT) {
851        if (!f->f_mapping->a_ops ||
852            ((!f->f_mapping->a_ops->direct_IO) &&
853            (!f->f_mapping->a_ops->get_xip_mem))) {
854            fput(f);
855            f = ERR_PTR(-EINVAL);
856        }
857    }
858
859    return f;
860
861cleanup_all:
862    fops_put(f->f_op);
863    if (f->f_mode & FMODE_WRITE) {
864        put_write_access(inode);
865        if (!special_file(inode->i_mode)) {
866            /*
867             * We don't consider this a real
868             * mnt_want/drop_write() pair
869             * because it all happenend right
870             * here, so just reset the state.
871             */
872            file_reset_write(f);
873            mnt_drop_write(mnt);
874        }
875    }
876    file_kill(f);
877    f->f_path.dentry = NULL;
878    f->f_path.mnt = NULL;
879cleanup_file:
880    put_filp(f);
881    dput(dentry);
882    mntput(mnt);
883    return ERR_PTR(error);
884}
885
886/**
887 * lookup_instantiate_filp - instantiates the open intent filp
888 * @nd: pointer to nameidata
889 * @dentry: pointer to dentry
890 * @open: open callback
891 *
892 * Helper for filesystems that want to use lookup open intents and pass back
893 * a fully instantiated struct file to the caller.
894 * This function is meant to be called from within a filesystem's
895 * lookup method.
896 * Beware of calling it for non-regular files! Those ->open methods might block
897 * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
898 * leading to a deadlock, as nobody can open that fifo anymore, because
899 * another process to open fifo will block on locked parent when doing lookup).
900 * Note that in case of error, nd->intent.open.file is destroyed, but the
901 * path information remains valid.
902 * If the open callback is set to NULL, then the standard f_op->open()
903 * filesystem callback is substituted.
904 */
905struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
906        int (*open)(struct inode *, struct file *))
907{
908    const struct cred *cred = current_cred();
909
910    if (IS_ERR(nd->intent.open.file))
911        goto out;
912    if (IS_ERR(dentry))
913        goto out_err;
914    nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
915                         nd->intent.open.flags - 1,
916                         nd->intent.open.file,
917                         open, cred);
918out:
919    return nd->intent.open.file;
920out_err:
921    release_open_intent(nd);
922    nd->intent.open.file = (struct file *)dentry;
923    goto out;
924}
925EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
926
927/**
928 * nameidata_to_filp - convert a nameidata to an open filp.
929 * @nd: pointer to nameidata
930 * @flags: open flags
931 *
932 * Note that this function destroys the original nameidata
933 */
934struct file *nameidata_to_filp(struct nameidata *nd, int flags)
935{
936    const struct cred *cred = current_cred();
937    struct file *filp;
938
939    /* Pick up the filp from the open intent */
940    filp = nd->intent.open.file;
941    /* Has the filesystem initialised the file for us? */
942    if (filp->f_path.dentry == NULL)
943        filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
944                     NULL, cred);
945    else
946        path_put(&nd->path);
947    return filp;
948}
949
950/*
951 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
952 * error.
953 */
954struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
955             const struct cred *cred)
956{
957    int error;
958    struct file *f;
959
960    /*
961     * We must always pass in a valid mount pointer. Historically
962     * callers got away with not passing it, but we must enforce this at
963     * the earliest possible point now to avoid strange problems deep in the
964     * filesystem stack.
965     */
966    if (!mnt) {
967        printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__);
968        dump_stack();
969        return ERR_PTR(-EINVAL);
970    }
971
972    error = -ENFILE;
973    f = get_empty_filp();
974    if (f == NULL) {
975        dput(dentry);
976        mntput(mnt);
977        return ERR_PTR(error);
978    }
979
980    return __dentry_open(dentry, mnt, flags, f, NULL, cred);
981}
982EXPORT_SYMBOL(dentry_open);
983
984static void __put_unused_fd(struct files_struct *files, unsigned int fd)
985{
986    struct fdtable *fdt = files_fdtable(files);
987    __FD_CLR(fd, fdt->open_fds);
988    if (fd < files->next_fd)
989        files->next_fd = fd;
990}
991
992void put_unused_fd(unsigned int fd)
993{
994    struct files_struct *files = current->files;
995    spin_lock(&files->file_lock);
996    __put_unused_fd(files, fd);
997    spin_unlock(&files->file_lock);
998}
999
1000EXPORT_SYMBOL(put_unused_fd);
1001
1002/*
1003 * Install a file pointer in the fd array.
1004 *
1005 * The VFS is full of places where we drop the files lock between
1006 * setting the open_fds bitmap and installing the file in the file
1007 * array. At any such point, we are vulnerable to a dup2() race
1008 * installing a file in the array before us. We need to detect this and
1009 * fput() the struct file we are about to overwrite in this case.
1010 *
1011 * It should never happen - if we allow dup2() do it, _really_ bad things
1012 * will follow.
1013 */
1014
1015void fd_install(unsigned int fd, struct file *file)
1016{
1017    struct files_struct *files = current->files;
1018    struct fdtable *fdt;
1019    spin_lock(&files->file_lock);
1020    fdt = files_fdtable(files);
1021    BUG_ON(fdt->fd[fd] != NULL);
1022    rcu_assign_pointer(fdt->fd[fd], file);
1023    spin_unlock(&files->file_lock);
1024}
1025
1026EXPORT_SYMBOL(fd_install);
1027
1028long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
1029{
1030    char *tmp = getname(filename);
1031    int fd = PTR_ERR(tmp);
1032
1033    if (!IS_ERR(tmp)) {
1034        fd = get_unused_fd_flags(flags);
1035        if (fd >= 0) {
1036            struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
1037            if (IS_ERR(f)) {
1038                put_unused_fd(fd);
1039                fd = PTR_ERR(f);
1040            } else {
1041                fsnotify_open(f->f_path.dentry);
1042                fd_install(fd, f);
1043            }
1044        }
1045        putname(tmp);
1046    }
1047    return fd;
1048}
1049
1050SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
1051{
1052    long ret;
1053
1054    if (force_o_largefile())
1055        flags |= O_LARGEFILE;
1056
1057    ret = do_sys_open(AT_FDCWD, filename, flags, mode);
1058    /* avoid REGPARM breakage on x86: */
1059    asmlinkage_protect(3, ret, filename, flags, mode);
1060    return ret;
1061}
1062
1063SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1064        int, mode)
1065{
1066    long ret;
1067
1068    if (force_o_largefile())
1069        flags |= O_LARGEFILE;
1070
1071    ret = do_sys_open(dfd, filename, flags, mode);
1072    /* avoid REGPARM breakage on x86: */
1073    asmlinkage_protect(4, ret, dfd, filename, flags, mode);
1074    return ret;
1075}
1076
1077#ifndef __alpha__
1078
1079/*
1080 * For backward compatibility? Maybe this should be moved
1081 * into arch/i386 instead?
1082 */
1083SYSCALL_DEFINE2(creat, const char __user *, pathname, int, mode)
1084{
1085    return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
1086}
1087
1088#endif
1089
1090/*
1091 * "id" is the POSIX thread ID. We use the
1092 * files pointer for this..
1093 */
1094int filp_close(struct file *filp, fl_owner_t id)
1095{
1096    int retval = 0;
1097
1098    if (!file_count(filp)) {
1099        printk(KERN_ERR "VFS: Close: file count is 0\n");
1100        return 0;
1101    }
1102
1103    if (filp->f_op && filp->f_op->flush)
1104        retval = filp->f_op->flush(filp, id);
1105
1106    dnotify_flush(filp, id);
1107    locks_remove_posix(filp, id);
1108    fput(filp);
1109    return retval;
1110}
1111
1112EXPORT_SYMBOL(filp_close);
1113
1114/*
1115 * Careful here! We test whether the file pointer is NULL before
1116 * releasing the fd. This ensures that one clone task can't release
1117 * an fd while another clone is opening it.
1118 */
1119SYSCALL_DEFINE1(close, unsigned int, fd)
1120{
1121    struct file * filp;
1122    struct files_struct *files = current->files;
1123    struct fdtable *fdt;
1124    int retval;
1125
1126    spin_lock(&files->file_lock);
1127    fdt = files_fdtable(files);
1128    if (fd >= fdt->max_fds)
1129        goto out_unlock;
1130    filp = fdt->fd[fd];
1131    if (!filp)
1132        goto out_unlock;
1133    rcu_assign_pointer(fdt->fd[fd], NULL);
1134    FD_CLR(fd, fdt->close_on_exec);
1135    __put_unused_fd(files, fd);
1136    spin_unlock(&files->file_lock);
1137    retval = filp_close(filp, files);
1138
1139    /* can't restart close syscall because file table entry was cleared */
1140    if (unlikely(retval == -ERESTARTSYS ||
1141             retval == -ERESTARTNOINTR ||
1142             retval == -ERESTARTNOHAND ||
1143             retval == -ERESTART_RESTARTBLOCK))
1144        retval = -EINTR;
1145
1146    return retval;
1147
1148out_unlock:
1149    spin_unlock(&files->file_lock);
1150    return -EBADF;
1151}
1152EXPORT_SYMBOL(sys_close);
1153
1154/*
1155 * This routine simulates a hangup on the tty, to arrange that users
1156 * are given clean terminals at login time.
1157 */
1158SYSCALL_DEFINE0(vhangup)
1159{
1160    if (capable(CAP_SYS_TTY_CONFIG)) {
1161        tty_vhangup_self();
1162        return 0;
1163    }
1164    return -EPERM;
1165}
1166
1167/*
1168 * Called when an inode is about to be open.
1169 * We use this to disallow opening large files on 32bit systems if
1170 * the caller didn't specify O_LARGEFILE. On 64bit systems we force
1171 * on this flag in sys_open.
1172 */
1173int generic_file_open(struct inode * inode, struct file * filp)
1174{
1175    if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1176        return -EOVERFLOW;
1177    return 0;
1178}
1179
1180EXPORT_SYMBOL(generic_file_open);
1181
1182/*
1183 * This is used by subsystems that don't want seekable
1184 * file descriptors
1185 */
1186int nonseekable_open(struct inode *inode, struct file *filp)
1187{
1188    filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1189    return 0;
1190}
1191
1192EXPORT_SYMBOL(nonseekable_open);
1193

Archive Download this file



interactive