Root/fs/read_write.c

Source at commit 9845c1745d3d531a5b9544f5322c62bfb4d4e9bc created 1 year 2 months ago.
By Xiangfu, rtc: jz4740 fix hwclock give time out
1/*
2 * linux/fs/read_write.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7#include <linux/slab.h>
8#include <linux/stat.h>
9#include <linux/fcntl.h>
10#include <linux/file.h>
11#include <linux/uio.h>
12#include <linux/fsnotify.h>
13#include <linux/security.h>
14#include <linux/module.h>
15#include <linux/syscalls.h>
16#include <linux/pagemap.h>
17#include <linux/splice.h>
18#include "read_write.h"
19
20#include <asm/uaccess.h>
21#include <asm/unistd.h>
22
23const struct file_operations generic_ro_fops = {
24    .llseek = generic_file_llseek,
25    .read = do_sync_read,
26    .aio_read = generic_file_aio_read,
27    .mmap = generic_file_readonly_mmap,
28    .splice_read = generic_file_splice_read,
29};
30
31EXPORT_SYMBOL(generic_ro_fops);
32
33static inline int unsigned_offsets(struct file *file)
34{
35    return file->f_mode & FMODE_UNSIGNED_OFFSET;
36}
37
38static loff_t lseek_execute(struct file *file, struct inode *inode,
39        loff_t offset, loff_t maxsize)
40{
41    if (offset < 0 && !unsigned_offsets(file))
42        return -EINVAL;
43    if (offset > maxsize)
44        return -EINVAL;
45
46    if (offset != file->f_pos) {
47        file->f_pos = offset;
48        file->f_version = 0;
49    }
50    return offset;
51}
52
53/**
54 * generic_file_llseek_size - generic llseek implementation for regular files
55 * @file: file structure to seek on
56 * @offset: file offset to seek to
57 * @origin: type of seek
58 * @size: max size of file system
59 *
60 * This is a variant of generic_file_llseek that allows passing in a custom
61 * file size.
62 *
63 * Synchronization:
64 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
65 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
66 * read/writes behave like SEEK_SET against seeks.
67 */
68loff_t
69generic_file_llseek_size(struct file *file, loff_t offset, int origin,
70        loff_t maxsize)
71{
72    struct inode *inode = file->f_mapping->host;
73
74    switch (origin) {
75    case SEEK_END:
76        offset += i_size_read(inode);
77        break;
78    case SEEK_CUR:
79        /*
80         * Here we special-case the lseek(fd, 0, SEEK_CUR)
81         * position-querying operation. Avoid rewriting the "same"
82         * f_pos value back to the file because a concurrent read(),
83         * write() or lseek() might have altered it
84         */
85        if (offset == 0)
86            return file->f_pos;
87        /*
88         * f_lock protects against read/modify/write race with other
89         * SEEK_CURs. Note that parallel writes and reads behave
90         * like SEEK_SET.
91         */
92        spin_lock(&file->f_lock);
93        offset = lseek_execute(file, inode, file->f_pos + offset,
94                       maxsize);
95        spin_unlock(&file->f_lock);
96        return offset;
97    case SEEK_DATA:
98        /*
99         * In the generic case the entire file is data, so as long as
100         * offset isn't at the end of the file then the offset is data.
101         */
102        if (offset >= i_size_read(inode))
103            return -ENXIO;
104        break;
105    case SEEK_HOLE:
106        /*
107         * There is a virtual hole at the end of the file, so as long as
108         * offset isn't i_size or larger, return i_size.
109         */
110        if (offset >= i_size_read(inode))
111            return -ENXIO;
112        offset = i_size_read(inode);
113        break;
114    }
115
116    return lseek_execute(file, inode, offset, maxsize);
117}
118EXPORT_SYMBOL(generic_file_llseek_size);
119
120/**
121 * generic_file_llseek - generic llseek implementation for regular files
122 * @file: file structure to seek on
123 * @offset: file offset to seek to
124 * @origin: type of seek
125 *
126 * This is a generic implemenation of ->llseek useable for all normal local
127 * filesystems. It just updates the file offset to the value specified by
128 * @offset and @origin under i_mutex.
129 */
130loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
131{
132    struct inode *inode = file->f_mapping->host;
133
134    return generic_file_llseek_size(file, offset, origin,
135                    inode->i_sb->s_maxbytes);
136}
137EXPORT_SYMBOL(generic_file_llseek);
138
139/**
140 * noop_llseek - No Operation Performed llseek implementation
141 * @file: file structure to seek on
142 * @offset: file offset to seek to
143 * @origin: type of seek
144 *
145 * This is an implementation of ->llseek useable for the rare special case when
146 * userspace expects the seek to succeed but the (device) file is actually not
147 * able to perform the seek. In this case you use noop_llseek() instead of
148 * falling back to the default implementation of ->llseek.
149 */
150loff_t noop_llseek(struct file *file, loff_t offset, int origin)
151{
152    return file->f_pos;
153}
154EXPORT_SYMBOL(noop_llseek);
155
156loff_t no_llseek(struct file *file, loff_t offset, int origin)
157{
158    return -ESPIPE;
159}
160EXPORT_SYMBOL(no_llseek);
161
162loff_t default_llseek(struct file *file, loff_t offset, int origin)
163{
164    struct inode *inode = file->f_path.dentry->d_inode;
165    loff_t retval;
166
167    mutex_lock(&inode->i_mutex);
168    switch (origin) {
169        case SEEK_END:
170            offset += i_size_read(inode);
171            break;
172        case SEEK_CUR:
173            if (offset == 0) {
174                retval = file->f_pos;
175                goto out;
176            }
177            offset += file->f_pos;
178            break;
179        case SEEK_DATA:
180            /*
181             * In the generic case the entire file is data, so as
182             * long as offset isn't at the end of the file then the
183             * offset is data.
184             */
185            if (offset >= inode->i_size) {
186                retval = -ENXIO;
187                goto out;
188            }
189            break;
190        case SEEK_HOLE:
191            /*
192             * There is a virtual hole at the end of the file, so
193             * as long as offset isn't i_size or larger, return
194             * i_size.
195             */
196            if (offset >= inode->i_size) {
197                retval = -ENXIO;
198                goto out;
199            }
200            offset = inode->i_size;
201            break;
202    }
203    retval = -EINVAL;
204    if (offset >= 0 || unsigned_offsets(file)) {
205        if (offset != file->f_pos) {
206            file->f_pos = offset;
207            file->f_version = 0;
208        }
209        retval = offset;
210    }
211out:
212    mutex_unlock(&inode->i_mutex);
213    return retval;
214}
215EXPORT_SYMBOL(default_llseek);
216
217loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
218{
219    loff_t (*fn)(struct file *, loff_t, int);
220
221    fn = no_llseek;
222    if (file->f_mode & FMODE_LSEEK) {
223        if (file->f_op && file->f_op->llseek)
224            fn = file->f_op->llseek;
225    }
226    return fn(file, offset, origin);
227}
228EXPORT_SYMBOL(vfs_llseek);
229
230SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
231{
232    off_t retval;
233    struct file * file;
234    int fput_needed;
235
236    retval = -EBADF;
237    file = fget_light(fd, &fput_needed);
238    if (!file)
239        goto bad;
240
241    retval = -EINVAL;
242    if (origin <= SEEK_MAX) {
243        loff_t res = vfs_llseek(file, offset, origin);
244        retval = res;
245        if (res != (loff_t)retval)
246            retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
247    }
248    fput_light(file, fput_needed);
249bad:
250    return retval;
251}
252
253#ifdef __ARCH_WANT_SYS_LLSEEK
254SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
255        unsigned long, offset_low, loff_t __user *, result,
256        unsigned int, origin)
257{
258    int retval;
259    struct file * file;
260    loff_t offset;
261    int fput_needed;
262
263    retval = -EBADF;
264    file = fget_light(fd, &fput_needed);
265    if (!file)
266        goto bad;
267
268    retval = -EINVAL;
269    if (origin > SEEK_MAX)
270        goto out_putf;
271
272    offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
273            origin);
274
275    retval = (int)offset;
276    if (offset >= 0) {
277        retval = -EFAULT;
278        if (!copy_to_user(result, &offset, sizeof(offset)))
279            retval = 0;
280    }
281out_putf:
282    fput_light(file, fput_needed);
283bad:
284    return retval;
285}
286#endif
287
288
289/*
290 * rw_verify_area doesn't like huge counts. We limit
291 * them to something that fits in "int" so that others
292 * won't have to do range checks all the time.
293 */
294int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
295{
296    struct inode *inode;
297    loff_t pos;
298    int retval = -EINVAL;
299
300    inode = file->f_path.dentry->d_inode;
301    if (unlikely((ssize_t) count < 0))
302        return retval;
303    pos = *ppos;
304    if (unlikely(pos < 0)) {
305        if (!unsigned_offsets(file))
306            return retval;
307        if (count >= -pos) /* both values are in 0..LLONG_MAX */
308            return -EOVERFLOW;
309    } else if (unlikely((loff_t) (pos + count) < 0)) {
310        if (!unsigned_offsets(file))
311            return retval;
312    }
313
314    if (unlikely(inode->i_flock && mandatory_lock(inode))) {
315        retval = locks_mandatory_area(
316            read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
317            inode, file, pos, count);
318        if (retval < 0)
319            return retval;
320    }
321    retval = security_file_permission(file,
322                read_write == READ ? MAY_READ : MAY_WRITE);
323    if (retval)
324        return retval;
325    return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
326}
327
328static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
329{
330    set_current_state(TASK_UNINTERRUPTIBLE);
331    if (!kiocbIsKicked(iocb))
332        schedule();
333    else
334        kiocbClearKicked(iocb);
335    __set_current_state(TASK_RUNNING);
336}
337
338ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
339{
340    struct iovec iov = { .iov_base = buf, .iov_len = len };
341    struct kiocb kiocb;
342    ssize_t ret;
343
344    init_sync_kiocb(&kiocb, filp);
345    kiocb.ki_pos = *ppos;
346    kiocb.ki_left = len;
347    kiocb.ki_nbytes = len;
348
349    for (;;) {
350        ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
351        if (ret != -EIOCBRETRY)
352            break;
353        wait_on_retry_sync_kiocb(&kiocb);
354    }
355
356    if (-EIOCBQUEUED == ret)
357        ret = wait_on_sync_kiocb(&kiocb);
358    *ppos = kiocb.ki_pos;
359    return ret;
360}
361
362EXPORT_SYMBOL(do_sync_read);
363
364ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
365{
366    ssize_t ret;
367
368    if (!(file->f_mode & FMODE_READ))
369        return -EBADF;
370    if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
371        return -EINVAL;
372    if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
373        return -EFAULT;
374
375    ret = rw_verify_area(READ, file, pos, count);
376    if (ret >= 0) {
377        count = ret;
378        if (file->f_op->read)
379            ret = file->f_op->read(file, buf, count, pos);
380        else
381            ret = do_sync_read(file, buf, count, pos);
382        if (ret > 0) {
383            fsnotify_access(file);
384            add_rchar(current, ret);
385        }
386        inc_syscr(current);
387    }
388
389    return ret;
390}
391
392EXPORT_SYMBOL(vfs_read);
393
394ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
395{
396    struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
397    struct kiocb kiocb;
398    ssize_t ret;
399
400    init_sync_kiocb(&kiocb, filp);
401    kiocb.ki_pos = *ppos;
402    kiocb.ki_left = len;
403    kiocb.ki_nbytes = len;
404
405    for (;;) {
406        ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
407        if (ret != -EIOCBRETRY)
408            break;
409        wait_on_retry_sync_kiocb(&kiocb);
410    }
411
412    if (-EIOCBQUEUED == ret)
413        ret = wait_on_sync_kiocb(&kiocb);
414    *ppos = kiocb.ki_pos;
415    return ret;
416}
417
418EXPORT_SYMBOL(do_sync_write);
419
420ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
421{
422    ssize_t ret;
423
424    if (!(file->f_mode & FMODE_WRITE))
425        return -EBADF;
426    if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
427        return -EINVAL;
428    if (unlikely(!access_ok(VERIFY_READ, buf, count)))
429        return -EFAULT;
430
431    ret = rw_verify_area(WRITE, file, pos, count);
432    if (ret >= 0) {
433        count = ret;
434        if (file->f_op->write)
435            ret = file->f_op->write(file, buf, count, pos);
436        else
437            ret = do_sync_write(file, buf, count, pos);
438        if (ret > 0) {
439            fsnotify_modify(file);
440            add_wchar(current, ret);
441        }
442        inc_syscw(current);
443    }
444
445    return ret;
446}
447
448EXPORT_SYMBOL(vfs_write);
449
450static inline loff_t file_pos_read(struct file *file)
451{
452    return file->f_pos;
453}
454
455static inline void file_pos_write(struct file *file, loff_t pos)
456{
457    file->f_pos = pos;
458}
459
460SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
461{
462    struct file *file;
463    ssize_t ret = -EBADF;
464    int fput_needed;
465
466    file = fget_light(fd, &fput_needed);
467    if (file) {
468        loff_t pos = file_pos_read(file);
469        ret = vfs_read(file, buf, count, &pos);
470        file_pos_write(file, pos);
471        fput_light(file, fput_needed);
472    }
473
474    return ret;
475}
476
477SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
478        size_t, count)
479{
480    struct file *file;
481    ssize_t ret = -EBADF;
482    int fput_needed;
483
484    file = fget_light(fd, &fput_needed);
485    if (file) {
486        loff_t pos = file_pos_read(file);
487        ret = vfs_write(file, buf, count, &pos);
488        file_pos_write(file, pos);
489        fput_light(file, fput_needed);
490    }
491
492    return ret;
493}
494
495SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
496            size_t count, loff_t pos)
497{
498    struct file *file;
499    ssize_t ret = -EBADF;
500    int fput_needed;
501
502    if (pos < 0)
503        return -EINVAL;
504
505    file = fget_light(fd, &fput_needed);
506    if (file) {
507        ret = -ESPIPE;
508        if (file->f_mode & FMODE_PREAD)
509            ret = vfs_read(file, buf, count, &pos);
510        fput_light(file, fput_needed);
511    }
512
513    return ret;
514}
515#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
516asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos)
517{
518    return SYSC_pread64((unsigned int) fd, (char __user *) buf,
519                (size_t) count, pos);
520}
521SYSCALL_ALIAS(sys_pread64, SyS_pread64);
522#endif
523
524SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
525             size_t count, loff_t pos)
526{
527    struct file *file;
528    ssize_t ret = -EBADF;
529    int fput_needed;
530
531    if (pos < 0)
532        return -EINVAL;
533
534    file = fget_light(fd, &fput_needed);
535    if (file) {
536        ret = -ESPIPE;
537        if (file->f_mode & FMODE_PWRITE)
538            ret = vfs_write(file, buf, count, &pos);
539        fput_light(file, fput_needed);
540    }
541
542    return ret;
543}
544#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
545asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos)
546{
547    return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf,
548                 (size_t) count, pos);
549}
550SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64);
551#endif
552
553/*
554 * Reduce an iovec's length in-place. Return the resulting number of segments
555 */
556unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
557{
558    unsigned long seg = 0;
559    size_t len = 0;
560
561    while (seg < nr_segs) {
562        seg++;
563        if (len + iov->iov_len >= to) {
564            iov->iov_len = to - len;
565            break;
566        }
567        len += iov->iov_len;
568        iov++;
569    }
570    return seg;
571}
572EXPORT_SYMBOL(iov_shorten);
573
574ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
575        unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
576{
577    struct kiocb kiocb;
578    ssize_t ret;
579
580    init_sync_kiocb(&kiocb, filp);
581    kiocb.ki_pos = *ppos;
582    kiocb.ki_left = len;
583    kiocb.ki_nbytes = len;
584
585    for (;;) {
586        ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
587        if (ret != -EIOCBRETRY)
588            break;
589        wait_on_retry_sync_kiocb(&kiocb);
590    }
591
592    if (ret == -EIOCBQUEUED)
593        ret = wait_on_sync_kiocb(&kiocb);
594    *ppos = kiocb.ki_pos;
595    return ret;
596}
597
598/* Do it by hand, with file-ops */
599ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
600        unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
601{
602    struct iovec *vector = iov;
603    ssize_t ret = 0;
604
605    while (nr_segs > 0) {
606        void __user *base;
607        size_t len;
608        ssize_t nr;
609
610        base = vector->iov_base;
611        len = vector->iov_len;
612        vector++;
613        nr_segs--;
614
615        nr = fn(filp, base, len, ppos);
616
617        if (nr < 0) {
618            if (!ret)
619                ret = nr;
620            break;
621        }
622        ret += nr;
623        if (nr != len)
624            break;
625    }
626
627    return ret;
628}
629
630/* A write operation does a read from user space and vice versa */
631#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
632
633ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
634                  unsigned long nr_segs, unsigned long fast_segs,
635                  struct iovec *fast_pointer,
636                  struct iovec **ret_pointer,
637                  int check_access)
638{
639    unsigned long seg;
640    ssize_t ret;
641    struct iovec *iov = fast_pointer;
642
643    /*
644     * SuS says "The readv() function *may* fail if the iovcnt argument
645     * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
646     * traditionally returned zero for zero segments, so...
647     */
648    if (nr_segs == 0) {
649        ret = 0;
650        goto out;
651    }
652
653    /*
654     * First get the "struct iovec" from user memory and
655     * verify all the pointers
656     */
657    if (nr_segs > UIO_MAXIOV) {
658        ret = -EINVAL;
659        goto out;
660    }
661    if (nr_segs > fast_segs) {
662        iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
663        if (iov == NULL) {
664            ret = -ENOMEM;
665            goto out;
666        }
667    }
668    if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
669        ret = -EFAULT;
670        goto out;
671    }
672
673    /*
674     * According to the Single Unix Specification we should return EINVAL
675     * if an element length is < 0 when cast to ssize_t or if the
676     * total length would overflow the ssize_t return value of the
677     * system call.
678     *
679     * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
680     * overflow case.
681     */
682    ret = 0;
683    for (seg = 0; seg < nr_segs; seg++) {
684        void __user *buf = iov[seg].iov_base;
685        ssize_t len = (ssize_t)iov[seg].iov_len;
686
687        /* see if we we're about to use an invalid len or if
688         * it's about to overflow ssize_t */
689        if (len < 0) {
690            ret = -EINVAL;
691            goto out;
692        }
693        if (check_access
694            && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
695            ret = -EFAULT;
696            goto out;
697        }
698        if (len > MAX_RW_COUNT - ret) {
699            len = MAX_RW_COUNT - ret;
700            iov[seg].iov_len = len;
701        }
702        ret += len;
703    }
704out:
705    *ret_pointer = iov;
706    return ret;
707}
708
709static ssize_t do_readv_writev(int type, struct file *file,
710                   const struct iovec __user * uvector,
711                   unsigned long nr_segs, loff_t *pos)
712{
713    size_t tot_len;
714    struct iovec iovstack[UIO_FASTIOV];
715    struct iovec *iov = iovstack;
716    ssize_t ret;
717    io_fn_t fn;
718    iov_fn_t fnv;
719
720    if (!file->f_op) {
721        ret = -EINVAL;
722        goto out;
723    }
724
725    ret = rw_copy_check_uvector(type, uvector, nr_segs,
726                    ARRAY_SIZE(iovstack), iovstack, &iov, 1);
727    if (ret <= 0)
728        goto out;
729
730    tot_len = ret;
731    ret = rw_verify_area(type, file, pos, tot_len);
732    if (ret < 0)
733        goto out;
734
735    fnv = NULL;
736    if (type == READ) {
737        fn = file->f_op->read;
738        fnv = file->f_op->aio_read;
739    } else {
740        fn = (io_fn_t)file->f_op->write;
741        fnv = file->f_op->aio_write;
742    }
743
744    if (fnv)
745        ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
746                        pos, fnv);
747    else
748        ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
749
750out:
751    if (iov != iovstack)
752        kfree(iov);
753    if ((ret + (type == READ)) > 0) {
754        if (type == READ)
755            fsnotify_access(file);
756        else
757            fsnotify_modify(file);
758    }
759    return ret;
760}
761
762ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
763          unsigned long vlen, loff_t *pos)
764{
765    if (!(file->f_mode & FMODE_READ))
766        return -EBADF;
767    if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
768        return -EINVAL;
769
770    return do_readv_writev(READ, file, vec, vlen, pos);
771}
772
773EXPORT_SYMBOL(vfs_readv);
774
775ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
776           unsigned long vlen, loff_t *pos)
777{
778    if (!(file->f_mode & FMODE_WRITE))
779        return -EBADF;
780    if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
781        return -EINVAL;
782
783    return do_readv_writev(WRITE, file, vec, vlen, pos);
784}
785
786EXPORT_SYMBOL(vfs_writev);
787
788SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
789        unsigned long, vlen)
790{
791    struct file *file;
792    ssize_t ret = -EBADF;
793    int fput_needed;
794
795    file = fget_light(fd, &fput_needed);
796    if (file) {
797        loff_t pos = file_pos_read(file);
798        ret = vfs_readv(file, vec, vlen, &pos);
799        file_pos_write(file, pos);
800        fput_light(file, fput_needed);
801    }
802
803    if (ret > 0)
804        add_rchar(current, ret);
805    inc_syscr(current);
806    return ret;
807}
808
809SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
810        unsigned long, vlen)
811{
812    struct file *file;
813    ssize_t ret = -EBADF;
814    int fput_needed;
815
816    file = fget_light(fd, &fput_needed);
817    if (file) {
818        loff_t pos = file_pos_read(file);
819        ret = vfs_writev(file, vec, vlen, &pos);
820        file_pos_write(file, pos);
821        fput_light(file, fput_needed);
822    }
823
824    if (ret > 0)
825        add_wchar(current, ret);
826    inc_syscw(current);
827    return ret;
828}
829
830static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
831{
832#define HALF_LONG_BITS (BITS_PER_LONG / 2)
833    return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
834}
835
836SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
837        unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
838{
839    loff_t pos = pos_from_hilo(pos_h, pos_l);
840    struct file *file;
841    ssize_t ret = -EBADF;
842    int fput_needed;
843
844    if (pos < 0)
845        return -EINVAL;
846
847    file = fget_light(fd, &fput_needed);
848    if (file) {
849        ret = -ESPIPE;
850        if (file->f_mode & FMODE_PREAD)
851            ret = vfs_readv(file, vec, vlen, &pos);
852        fput_light(file, fput_needed);
853    }
854
855    if (ret > 0)
856        add_rchar(current, ret);
857    inc_syscr(current);
858    return ret;
859}
860
861SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
862        unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
863{
864    loff_t pos = pos_from_hilo(pos_h, pos_l);
865    struct file *file;
866    ssize_t ret = -EBADF;
867    int fput_needed;
868
869    if (pos < 0)
870        return -EINVAL;
871
872    file = fget_light(fd, &fput_needed);
873    if (file) {
874        ret = -ESPIPE;
875        if (file->f_mode & FMODE_PWRITE)
876            ret = vfs_writev(file, vec, vlen, &pos);
877        fput_light(file, fput_needed);
878    }
879
880    if (ret > 0)
881        add_wchar(current, ret);
882    inc_syscw(current);
883    return ret;
884}
885
886static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
887               size_t count, loff_t max)
888{
889    struct file * in_file, * out_file;
890    struct inode * in_inode, * out_inode;
891    loff_t pos;
892    ssize_t retval;
893    int fput_needed_in, fput_needed_out, fl;
894
895    /*
896     * Get input file, and verify that it is ok..
897     */
898    retval = -EBADF;
899    in_file = fget_light(in_fd, &fput_needed_in);
900    if (!in_file)
901        goto out;
902    if (!(in_file->f_mode & FMODE_READ))
903        goto fput_in;
904    retval = -ESPIPE;
905    if (!ppos)
906        ppos = &in_file->f_pos;
907    else
908        if (!(in_file->f_mode & FMODE_PREAD))
909            goto fput_in;
910    retval = rw_verify_area(READ, in_file, ppos, count);
911    if (retval < 0)
912        goto fput_in;
913    count = retval;
914
915    /*
916     * Get output file, and verify that it is ok..
917     */
918    retval = -EBADF;
919    out_file = fget_light(out_fd, &fput_needed_out);
920    if (!out_file)
921        goto fput_in;
922    if (!(out_file->f_mode & FMODE_WRITE))
923        goto fput_out;
924    retval = -EINVAL;
925    in_inode = in_file->f_path.dentry->d_inode;
926    out_inode = out_file->f_path.dentry->d_inode;
927    retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
928    if (retval < 0)
929        goto fput_out;
930    count = retval;
931
932    if (!max)
933        max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
934
935    pos = *ppos;
936    if (unlikely(pos + count > max)) {
937        retval = -EOVERFLOW;
938        if (pos >= max)
939            goto fput_out;
940        count = max - pos;
941    }
942
943    fl = 0;
944#if 0
945    /*
946     * We need to debate whether we can enable this or not. The
947     * man page documents EAGAIN return for the output at least,
948     * and the application is arguably buggy if it doesn't expect
949     * EAGAIN on a non-blocking file descriptor.
950     */
951    if (in_file->f_flags & O_NONBLOCK)
952        fl = SPLICE_F_NONBLOCK;
953#endif
954    retval = do_splice_direct(in_file, ppos, out_file, count, fl);
955
956    if (retval > 0) {
957        add_rchar(current, retval);
958        add_wchar(current, retval);
959    }
960
961    inc_syscr(current);
962    inc_syscw(current);
963    if (*ppos > max)
964        retval = -EOVERFLOW;
965
966fput_out:
967    fput_light(out_file, fput_needed_out);
968fput_in:
969    fput_light(in_file, fput_needed_in);
970out:
971    return retval;
972}
973
974SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
975{
976    loff_t pos;
977    off_t off;
978    ssize_t ret;
979
980    if (offset) {
981        if (unlikely(get_user(off, offset)))
982            return -EFAULT;
983        pos = off;
984        ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
985        if (unlikely(put_user(pos, offset)))
986            return -EFAULT;
987        return ret;
988    }
989
990    return do_sendfile(out_fd, in_fd, NULL, count, 0);
991}
992
993SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
994{
995    loff_t pos;
996    ssize_t ret;
997
998    if (offset) {
999        if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
1000            return -EFAULT;
1001        ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
1002        if (unlikely(put_user(pos, offset)))
1003            return -EFAULT;
1004        return ret;
1005    }
1006
1007    return do_sendfile(out_fd, in_fd, NULL, count, 0);
1008}
1009

Archive Download this file



interactive