Root/fs/read_write.c

1/*
2 * linux/fs/read_write.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7#include <linux/slab.h>
8#include <linux/stat.h>
9#include <linux/fcntl.h>
10#include <linux/file.h>
11#include <linux/uio.h>
12#include <linux/fsnotify.h>
13#include <linux/security.h>
14#include <linux/module.h>
15#include <linux/syscalls.h>
16#include <linux/pagemap.h>
17#include <linux/splice.h>
18#include "read_write.h"
19
20#include <asm/uaccess.h>
21#include <asm/unistd.h>
22
23const struct file_operations generic_ro_fops = {
24    .llseek = generic_file_llseek,
25    .read = do_sync_read,
26    .aio_read = generic_file_aio_read,
27    .mmap = generic_file_readonly_mmap,
28    .splice_read = generic_file_splice_read,
29};
30
31EXPORT_SYMBOL(generic_ro_fops);
32
33static inline int unsigned_offsets(struct file *file)
34{
35    return file->f_mode & FMODE_UNSIGNED_OFFSET;
36}
37
38/**
39 * generic_file_llseek_unlocked - lockless generic llseek implementation
40 * @file: file structure to seek on
41 * @offset: file offset to seek to
42 * @origin: type of seek
43 *
44 * Updates the file offset to the value specified by @offset and @origin.
45 * Locking must be provided by the caller.
46 */
47loff_t
48generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
49{
50    struct inode *inode = file->f_mapping->host;
51
52    switch (origin) {
53    case SEEK_END:
54        offset += inode->i_size;
55        break;
56    case SEEK_CUR:
57        /*
58         * Here we special-case the lseek(fd, 0, SEEK_CUR)
59         * position-querying operation. Avoid rewriting the "same"
60         * f_pos value back to the file because a concurrent read(),
61         * write() or lseek() might have altered it
62         */
63        if (offset == 0)
64            return file->f_pos;
65        offset += file->f_pos;
66        break;
67    }
68
69    if (offset < 0 && !unsigned_offsets(file))
70        return -EINVAL;
71    if (offset > inode->i_sb->s_maxbytes)
72        return -EINVAL;
73
74    /* Special lock needed here? */
75    if (offset != file->f_pos) {
76        file->f_pos = offset;
77        file->f_version = 0;
78    }
79
80    return offset;
81}
82EXPORT_SYMBOL(generic_file_llseek_unlocked);
83
84/**
85 * generic_file_llseek - generic llseek implementation for regular files
86 * @file: file structure to seek on
87 * @offset: file offset to seek to
88 * @origin: type of seek
89 *
90 * This is a generic implemenation of ->llseek useable for all normal local
91 * filesystems. It just updates the file offset to the value specified by
92 * @offset and @origin under i_mutex.
93 */
94loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
95{
96    loff_t rval;
97
98    mutex_lock(&file->f_dentry->d_inode->i_mutex);
99    rval = generic_file_llseek_unlocked(file, offset, origin);
100    mutex_unlock(&file->f_dentry->d_inode->i_mutex);
101
102    return rval;
103}
104EXPORT_SYMBOL(generic_file_llseek);
105
106/**
107 * noop_llseek - No Operation Performed llseek implementation
108 * @file: file structure to seek on
109 * @offset: file offset to seek to
110 * @origin: type of seek
111 *
112 * This is an implementation of ->llseek useable for the rare special case when
113 * userspace expects the seek to succeed but the (device) file is actually not
114 * able to perform the seek. In this case you use noop_llseek() instead of
115 * falling back to the default implementation of ->llseek.
116 */
117loff_t noop_llseek(struct file *file, loff_t offset, int origin)
118{
119    return file->f_pos;
120}
121EXPORT_SYMBOL(noop_llseek);
122
123loff_t no_llseek(struct file *file, loff_t offset, int origin)
124{
125    return -ESPIPE;
126}
127EXPORT_SYMBOL(no_llseek);
128
129loff_t default_llseek(struct file *file, loff_t offset, int origin)
130{
131    loff_t retval;
132
133    mutex_lock(&file->f_dentry->d_inode->i_mutex);
134    switch (origin) {
135        case SEEK_END:
136            offset += i_size_read(file->f_path.dentry->d_inode);
137            break;
138        case SEEK_CUR:
139            if (offset == 0) {
140                retval = file->f_pos;
141                goto out;
142            }
143            offset += file->f_pos;
144    }
145    retval = -EINVAL;
146    if (offset >= 0 || unsigned_offsets(file)) {
147        if (offset != file->f_pos) {
148            file->f_pos = offset;
149            file->f_version = 0;
150        }
151        retval = offset;
152    }
153out:
154    mutex_unlock(&file->f_dentry->d_inode->i_mutex);
155    return retval;
156}
157EXPORT_SYMBOL(default_llseek);
158
159loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
160{
161    loff_t (*fn)(struct file *, loff_t, int);
162
163    fn = no_llseek;
164    if (file->f_mode & FMODE_LSEEK) {
165        if (file->f_op && file->f_op->llseek)
166            fn = file->f_op->llseek;
167    }
168    return fn(file, offset, origin);
169}
170EXPORT_SYMBOL(vfs_llseek);
171
172SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
173{
174    off_t retval;
175    struct file * file;
176    int fput_needed;
177
178    retval = -EBADF;
179    file = fget_light(fd, &fput_needed);
180    if (!file)
181        goto bad;
182
183    retval = -EINVAL;
184    if (origin <= SEEK_MAX) {
185        loff_t res = vfs_llseek(file, offset, origin);
186        retval = res;
187        if (res != (loff_t)retval)
188            retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
189    }
190    fput_light(file, fput_needed);
191bad:
192    return retval;
193}
194
195#ifdef __ARCH_WANT_SYS_LLSEEK
196SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
197        unsigned long, offset_low, loff_t __user *, result,
198        unsigned int, origin)
199{
200    int retval;
201    struct file * file;
202    loff_t offset;
203    int fput_needed;
204
205    retval = -EBADF;
206    file = fget_light(fd, &fput_needed);
207    if (!file)
208        goto bad;
209
210    retval = -EINVAL;
211    if (origin > SEEK_MAX)
212        goto out_putf;
213
214    offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
215            origin);
216
217    retval = (int)offset;
218    if (offset >= 0) {
219        retval = -EFAULT;
220        if (!copy_to_user(result, &offset, sizeof(offset)))
221            retval = 0;
222    }
223out_putf:
224    fput_light(file, fput_needed);
225bad:
226    return retval;
227}
228#endif
229
230
231/*
232 * rw_verify_area doesn't like huge counts. We limit
233 * them to something that fits in "int" so that others
234 * won't have to do range checks all the time.
235 */
236int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
237{
238    struct inode *inode;
239    loff_t pos;
240    int retval = -EINVAL;
241
242    inode = file->f_path.dentry->d_inode;
243    if (unlikely((ssize_t) count < 0))
244        return retval;
245    pos = *ppos;
246    if (unlikely(pos < 0)) {
247        if (!unsigned_offsets(file))
248            return retval;
249        if (count >= -pos) /* both values are in 0..LLONG_MAX */
250            return -EOVERFLOW;
251    } else if (unlikely((loff_t) (pos + count) < 0)) {
252        if (!unsigned_offsets(file))
253            return retval;
254    }
255
256    if (unlikely(inode->i_flock && mandatory_lock(inode))) {
257        retval = locks_mandatory_area(
258            read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
259            inode, file, pos, count);
260        if (retval < 0)
261            return retval;
262    }
263    retval = security_file_permission(file,
264                read_write == READ ? MAY_READ : MAY_WRITE);
265    if (retval)
266        return retval;
267    return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
268}
269
270static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
271{
272    set_current_state(TASK_UNINTERRUPTIBLE);
273    if (!kiocbIsKicked(iocb))
274        schedule();
275    else
276        kiocbClearKicked(iocb);
277    __set_current_state(TASK_RUNNING);
278}
279
280ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
281{
282    struct iovec iov = { .iov_base = buf, .iov_len = len };
283    struct kiocb kiocb;
284    ssize_t ret;
285
286    init_sync_kiocb(&kiocb, filp);
287    kiocb.ki_pos = *ppos;
288    kiocb.ki_left = len;
289    kiocb.ki_nbytes = len;
290
291    for (;;) {
292        ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
293        if (ret != -EIOCBRETRY)
294            break;
295        wait_on_retry_sync_kiocb(&kiocb);
296    }
297
298    if (-EIOCBQUEUED == ret)
299        ret = wait_on_sync_kiocb(&kiocb);
300    *ppos = kiocb.ki_pos;
301    return ret;
302}
303
304EXPORT_SYMBOL(do_sync_read);
305
306ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
307{
308    ssize_t ret;
309
310    if (!(file->f_mode & FMODE_READ))
311        return -EBADF;
312    if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
313        return -EINVAL;
314    if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
315        return -EFAULT;
316
317    ret = rw_verify_area(READ, file, pos, count);
318    if (ret >= 0) {
319        count = ret;
320        if (file->f_op->read)
321            ret = file->f_op->read(file, buf, count, pos);
322        else
323            ret = do_sync_read(file, buf, count, pos);
324        if (ret > 0) {
325            fsnotify_access(file);
326            add_rchar(current, ret);
327        }
328        inc_syscr(current);
329    }
330
331    return ret;
332}
333
334EXPORT_SYMBOL(vfs_read);
335
336ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
337{
338    struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
339    struct kiocb kiocb;
340    ssize_t ret;
341
342    init_sync_kiocb(&kiocb, filp);
343    kiocb.ki_pos = *ppos;
344    kiocb.ki_left = len;
345    kiocb.ki_nbytes = len;
346
347    for (;;) {
348        ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
349        if (ret != -EIOCBRETRY)
350            break;
351        wait_on_retry_sync_kiocb(&kiocb);
352    }
353
354    if (-EIOCBQUEUED == ret)
355        ret = wait_on_sync_kiocb(&kiocb);
356    *ppos = kiocb.ki_pos;
357    return ret;
358}
359
360EXPORT_SYMBOL(do_sync_write);
361
362ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
363{
364    ssize_t ret;
365
366    if (!(file->f_mode & FMODE_WRITE))
367        return -EBADF;
368    if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
369        return -EINVAL;
370    if (unlikely(!access_ok(VERIFY_READ, buf, count)))
371        return -EFAULT;
372
373    ret = rw_verify_area(WRITE, file, pos, count);
374    if (ret >= 0) {
375        count = ret;
376        if (file->f_op->write)
377            ret = file->f_op->write(file, buf, count, pos);
378        else
379            ret = do_sync_write(file, buf, count, pos);
380        if (ret > 0) {
381            fsnotify_modify(file);
382            add_wchar(current, ret);
383        }
384        inc_syscw(current);
385    }
386
387    return ret;
388}
389
390EXPORT_SYMBOL(vfs_write);
391
392static inline loff_t file_pos_read(struct file *file)
393{
394    return file->f_pos;
395}
396
397static inline void file_pos_write(struct file *file, loff_t pos)
398{
399    file->f_pos = pos;
400}
401
402SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
403{
404    struct file *file;
405    ssize_t ret = -EBADF;
406    int fput_needed;
407
408    file = fget_light(fd, &fput_needed);
409    if (file) {
410        loff_t pos = file_pos_read(file);
411        ret = vfs_read(file, buf, count, &pos);
412        file_pos_write(file, pos);
413        fput_light(file, fput_needed);
414    }
415
416    return ret;
417}
418
419SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
420        size_t, count)
421{
422    struct file *file;
423    ssize_t ret = -EBADF;
424    int fput_needed;
425
426    file = fget_light(fd, &fput_needed);
427    if (file) {
428        loff_t pos = file_pos_read(file);
429        ret = vfs_write(file, buf, count, &pos);
430        file_pos_write(file, pos);
431        fput_light(file, fput_needed);
432    }
433
434    return ret;
435}
436
437SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
438            size_t count, loff_t pos)
439{
440    struct file *file;
441    ssize_t ret = -EBADF;
442    int fput_needed;
443
444    if (pos < 0)
445        return -EINVAL;
446
447    file = fget_light(fd, &fput_needed);
448    if (file) {
449        ret = -ESPIPE;
450        if (file->f_mode & FMODE_PREAD)
451            ret = vfs_read(file, buf, count, &pos);
452        fput_light(file, fput_needed);
453    }
454
455    return ret;
456}
457#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
458asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
459{
460    return SYSC_pread64((unsigned int) fd, (char __user *) buf,
461                (size_t) count, pos);
462}
463SYSCALL_ALIAS(sys_pread64, SyS_pread64);
464#endif
465
466SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
467             size_t count, loff_t pos)
468{
469    struct file *file;
470    ssize_t ret = -EBADF;
471    int fput_needed;
472
473    if (pos < 0)
474        return -EINVAL;
475
476    file = fget_light(fd, &fput_needed);
477    if (file) {
478        ret = -ESPIPE;
479        if (file->f_mode & FMODE_PWRITE)
480            ret = vfs_write(file, buf, count, &pos);
481        fput_light(file, fput_needed);
482    }
483
484    return ret;
485}
486#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
487asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
488{
489    return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
490                 (size_t) count, pos);
491}
492SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
493#endif
494
495/*
496 * Reduce an iovec's length in-place. Return the resulting number of segments
497 */
498unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
499{
500    unsigned long seg = 0;
501    size_t len = 0;
502
503    while (seg < nr_segs) {
504        seg++;
505        if (len + iov->iov_len >= to) {
506            iov->iov_len = to - len;
507            break;
508        }
509        len += iov->iov_len;
510        iov++;
511    }
512    return seg;
513}
514EXPORT_SYMBOL(iov_shorten);
515
516ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
517        unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
518{
519    struct kiocb kiocb;
520    ssize_t ret;
521
522    init_sync_kiocb(&kiocb, filp);
523    kiocb.ki_pos = *ppos;
524    kiocb.ki_left = len;
525    kiocb.ki_nbytes = len;
526
527    for (;;) {
528        ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
529        if (ret != -EIOCBRETRY)
530            break;
531        wait_on_retry_sync_kiocb(&kiocb);
532    }
533
534    if (ret == -EIOCBQUEUED)
535        ret = wait_on_sync_kiocb(&kiocb);
536    *ppos = kiocb.ki_pos;
537    return ret;
538}
539
540/* Do it by hand, with file-ops */
541ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
542        unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
543{
544    struct iovec *vector = iov;
545    ssize_t ret = 0;
546
547    while (nr_segs > 0) {
548        void __user *base;
549        size_t len;
550        ssize_t nr;
551
552        base = vector->iov_base;
553        len = vector->iov_len;
554        vector++;
555        nr_segs--;
556
557        nr = fn(filp, base, len, ppos);
558
559        if (nr < 0) {
560            if (!ret)
561                ret = nr;
562            break;
563        }
564        ret += nr;
565        if (nr != len)
566            break;
567    }
568
569    return ret;
570}
571
572/* A write operation does a read from user space and vice versa */
573#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
574
575ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
576                  unsigned long nr_segs, unsigned long fast_segs,
577                  struct iovec *fast_pointer,
578                  struct iovec **ret_pointer)
579{
580    unsigned long seg;
581    ssize_t ret;
582    struct iovec *iov = fast_pointer;
583
584    /*
585     * SuS says "The readv() function *may* fail if the iovcnt argument
586     * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
587     * traditionally returned zero for zero segments, so...
588     */
589    if (nr_segs == 0) {
590        ret = 0;
591        goto out;
592    }
593
594    /*
595     * First get the "struct iovec" from user memory and
596     * verify all the pointers
597     */
598    if (nr_segs > UIO_MAXIOV) {
599        ret = -EINVAL;
600        goto out;
601    }
602    if (nr_segs > fast_segs) {
603        iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
604        if (iov == NULL) {
605            ret = -ENOMEM;
606            goto out;
607        }
608    }
609    if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
610        ret = -EFAULT;
611        goto out;
612    }
613
614    /*
615     * According to the Single Unix Specification we should return EINVAL
616     * if an element length is < 0 when cast to ssize_t or if the
617     * total length would overflow the ssize_t return value of the
618     * system call.
619     *
620     * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
621     * overflow case.
622     */
623    ret = 0;
624    for (seg = 0; seg < nr_segs; seg++) {
625        void __user *buf = iov[seg].iov_base;
626        ssize_t len = (ssize_t)iov[seg].iov_len;
627
628        /* see if we we're about to use an invalid len or if
629         * it's about to overflow ssize_t */
630        if (len < 0) {
631            ret = -EINVAL;
632            goto out;
633        }
634        if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
635            ret = -EFAULT;
636            goto out;
637        }
638        if (len > MAX_RW_COUNT - ret) {
639            len = MAX_RW_COUNT - ret;
640            iov[seg].iov_len = len;
641        }
642        ret += len;
643    }
644out:
645    *ret_pointer = iov;
646    return ret;
647}
648
649static ssize_t do_readv_writev(int type, struct file *file,
650                   const struct iovec __user * uvector,
651                   unsigned long nr_segs, loff_t *pos)
652{
653    size_t tot_len;
654    struct iovec iovstack[UIO_FASTIOV];
655    struct iovec *iov = iovstack;
656    ssize_t ret;
657    io_fn_t fn;
658    iov_fn_t fnv;
659
660    if (!file->f_op) {
661        ret = -EINVAL;
662        goto out;
663    }
664
665    ret = rw_copy_check_uvector(type, uvector, nr_segs,
666            ARRAY_SIZE(iovstack), iovstack, &iov);
667    if (ret <= 0)
668        goto out;
669
670    tot_len = ret;
671    ret = rw_verify_area(type, file, pos, tot_len);
672    if (ret < 0)
673        goto out;
674
675    fnv = NULL;
676    if (type == READ) {
677        fn = file->f_op->read;
678        fnv = file->f_op->aio_read;
679    } else {
680        fn = (io_fn_t)file->f_op->write;
681        fnv = file->f_op->aio_write;
682    }
683
684    if (fnv)
685        ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
686                        pos, fnv);
687    else
688        ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
689
690out:
691    if (iov != iovstack)
692        kfree(iov);
693    if ((ret + (type == READ)) > 0) {
694        if (type == READ)
695            fsnotify_access(file);
696        else
697            fsnotify_modify(file);
698    }
699    return ret;
700}
701
702ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
703          unsigned long vlen, loff_t *pos)
704{
705    if (!(file->f_mode & FMODE_READ))
706        return -EBADF;
707    if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
708        return -EINVAL;
709
710    return do_readv_writev(READ, file, vec, vlen, pos);
711}
712
713EXPORT_SYMBOL(vfs_readv);
714
715ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
716           unsigned long vlen, loff_t *pos)
717{
718    if (!(file->f_mode & FMODE_WRITE))
719        return -EBADF;
720    if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
721        return -EINVAL;
722
723    return do_readv_writev(WRITE, file, vec, vlen, pos);
724}
725
726EXPORT_SYMBOL(vfs_writev);
727
728SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
729        unsigned long, vlen)
730{
731    struct file *file;
732    ssize_t ret = -EBADF;
733    int fput_needed;
734
735    file = fget_light(fd, &fput_needed);
736    if (file) {
737        loff_t pos = file_pos_read(file);
738        ret = vfs_readv(file, vec, vlen, &pos);
739        file_pos_write(file, pos);
740        fput_light(file, fput_needed);
741    }
742
743    if (ret > 0)
744        add_rchar(current, ret);
745    inc_syscr(current);
746    return ret;
747}
748
749SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
750        unsigned long, vlen)
751{
752    struct file *file;
753    ssize_t ret = -EBADF;
754    int fput_needed;
755
756    file = fget_light(fd, &fput_needed);
757    if (file) {
758        loff_t pos = file_pos_read(file);
759        ret = vfs_writev(file, vec, vlen, &pos);
760        file_pos_write(file, pos);
761        fput_light(file, fput_needed);
762    }
763
764    if (ret > 0)
765        add_wchar(current, ret);
766    inc_syscw(current);
767    return ret;
768}
769
770static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
771{
772#define HALF_LONG_BITS (BITS_PER_LONG / 2)
773    return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
774}
775
776SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
777        unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
778{
779    loff_t pos = pos_from_hilo(pos_h, pos_l);
780    struct file *file;
781    ssize_t ret = -EBADF;
782    int fput_needed;
783
784    if (pos < 0)
785        return -EINVAL;
786
787    file = fget_light(fd, &fput_needed);
788    if (file) {
789        ret = -ESPIPE;
790        if (file->f_mode & FMODE_PREAD)
791            ret = vfs_readv(file, vec, vlen, &pos);
792        fput_light(file, fput_needed);
793    }
794
795    if (ret > 0)
796        add_rchar(current, ret);
797    inc_syscr(current);
798    return ret;
799}
800
801SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
802        unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
803{
804    loff_t pos = pos_from_hilo(pos_h, pos_l);
805    struct file *file;
806    ssize_t ret = -EBADF;
807    int fput_needed;
808
809    if (pos < 0)
810        return -EINVAL;
811
812    file = fget_light(fd, &fput_needed);
813    if (file) {
814        ret = -ESPIPE;
815        if (file->f_mode & FMODE_PWRITE)
816            ret = vfs_writev(file, vec, vlen, &pos);
817        fput_light(file, fput_needed);
818    }
819
820    if (ret > 0)
821        add_wchar(current, ret);
822    inc_syscw(current);
823    return ret;
824}
825
826static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
827               size_t count, loff_t max)
828{
829    struct file * in_file, * out_file;
830    struct inode * in_inode, * out_inode;
831    loff_t pos;
832    ssize_t retval;
833    int fput_needed_in, fput_needed_out, fl;
834
835    /*
836     * Get input file, and verify that it is ok..
837     */
838    retval = -EBADF;
839    in_file = fget_light(in_fd, &fput_needed_in);
840    if (!in_file)
841        goto out;
842    if (!(in_file->f_mode & FMODE_READ))
843        goto fput_in;
844    retval = -ESPIPE;
845    if (!ppos)
846        ppos = &in_file->f_pos;
847    else
848        if (!(in_file->f_mode & FMODE_PREAD))
849            goto fput_in;
850    retval = rw_verify_area(READ, in_file, ppos, count);
851    if (retval < 0)
852        goto fput_in;
853    count = retval;
854
855    /*
856     * Get output file, and verify that it is ok..
857     */
858    retval = -EBADF;
859    out_file = fget_light(out_fd, &fput_needed_out);
860    if (!out_file)
861        goto fput_in;
862    if (!(out_file->f_mode & FMODE_WRITE))
863        goto fput_out;
864    retval = -EINVAL;
865    in_inode = in_file->f_path.dentry->d_inode;
866    out_inode = out_file->f_path.dentry->d_inode;
867    retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
868    if (retval < 0)
869        goto fput_out;
870    count = retval;
871
872    if (!max)
873        max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
874
875    pos = *ppos;
876    if (unlikely(pos + count > max)) {
877        retval = -EOVERFLOW;
878        if (pos >= max)
879            goto fput_out;
880        count = max - pos;
881    }
882
883    fl = 0;
884#if 0
885    /*
886     * We need to debate whether we can enable this or not. The
887     * man page documents EAGAIN return for the output at least,
888     * and the application is arguably buggy if it doesn't expect
889     * EAGAIN on a non-blocking file descriptor.
890     */
891    if (in_file->f_flags & O_NONBLOCK)
892        fl = SPLICE_F_NONBLOCK;
893#endif
894    retval = do_splice_direct(in_file, ppos, out_file, count, fl);
895
896    if (retval > 0) {
897        add_rchar(current, retval);
898        add_wchar(current, retval);
899    }
900
901    inc_syscr(current);
902    inc_syscw(current);
903    if (*ppos > max)
904        retval = -EOVERFLOW;
905
906fput_out:
907    fput_light(out_file, fput_needed_out);
908fput_in:
909    fput_light(in_file, fput_needed_in);
910out:
911    return retval;
912}
913
914SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
915{
916    loff_t pos;
917    off_t off;
918    ssize_t ret;
919
920    if (offset) {
921        if (unlikely(get_user(off, offset)))
922            return -EFAULT;
923        pos = off;
924        ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
925        if (unlikely(put_user(pos, offset)))
926            return -EFAULT;
927        return ret;
928    }
929
930    return do_sendfile(out_fd, in_fd, NULL, count, 0);
931}
932
933SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
934{
935    loff_t pos;
936    ssize_t ret;
937
938    if (offset) {
939        if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
940            return -EFAULT;
941        ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
942        if (unlikely(put_user(pos, offset)))
943            return -EFAULT;
944        return ret;
945    }
946
947    return do_sendfile(out_fd, in_fd, NULL, count, 0);
948}
949

Archive Download this file



interactive