Root/target/linux/generic/patches-2.6.39/100-overlayfs.patch

1--- a/include/linux/fs.h
2+++ b/include/linux/fs.h
3@@ -1594,6 +1594,7 @@ struct inode_operations {
4     void (*truncate_range)(struct inode *, loff_t, loff_t);
5     int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
6               u64 len);
7+ struct file *(*open)(struct dentry *, int flags, const struct cred *);
8 } ____cacheline_aligned;
9 
10 struct seq_file;
11@@ -1988,6 +1989,7 @@ extern long do_sys_open(int dfd, const c
12 extern struct file *filp_open(const char *, int, int);
13 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
14                    const char *, int);
15+extern struct file *vfs_open(struct path *, int flags, const struct cred *);
16 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
17                  const struct cred *);
18 extern int filp_close(struct file *, fl_owner_t id);
19--- a/fs/open.c
20+++ b/fs/open.c
21@@ -666,8 +666,7 @@ static inline int __get_file_write_acces
22     return error;
23 }
24 
25-static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
26- struct file *f,
27+static struct file *__dentry_open(struct path *path, struct file *f,
28                     int (*open)(struct inode *, struct file *),
29                     const struct cred *cred)
30 {
31@@ -675,15 +674,16 @@ static struct file *__dentry_open(struct
32     struct inode *inode;
33     int error;
34 
35+ path_get(path);
36     f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
37                 FMODE_PREAD | FMODE_PWRITE;
38 
39     if (unlikely(f->f_flags & O_PATH))
40         f->f_mode = FMODE_PATH;
41 
42- inode = dentry->d_inode;
43+ inode = path->dentry->d_inode;
44     if (f->f_mode & FMODE_WRITE) {
45- error = __get_file_write_access(inode, mnt);
46+ error = __get_file_write_access(inode, path->mnt);
47         if (error)
48             goto cleanup_file;
49         if (!special_file(inode->i_mode))
50@@ -691,8 +691,7 @@ static struct file *__dentry_open(struct
51     }
52 
53     f->f_mapping = inode->i_mapping;
54- f->f_path.dentry = dentry;
55- f->f_path.mnt = mnt;
56+ f->f_path = *path;
57     f->f_pos = 0;
58     file_sb_list_add(f, inode->i_sb);
59 
60@@ -745,7 +744,7 @@ cleanup_all:
61              * here, so just reset the state.
62              */
63             file_reset_write(f);
64- mnt_drop_write(mnt);
65+ mnt_drop_write(path->mnt);
66         }
67     }
68     file_sb_list_del(f);
69@@ -753,8 +752,7 @@ cleanup_all:
70     f->f_path.mnt = NULL;
71 cleanup_file:
72     put_filp(f);
73- dput(dentry);
74- mntput(mnt);
75+ path_put(path);
76     return ERR_PTR(error);
77 }
78 
79@@ -780,14 +778,14 @@ cleanup_file:
80 struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
81         int (*open)(struct inode *, struct file *))
82 {
83+ struct path path = { .dentry = dentry, .mnt = nd->path.mnt };
84     const struct cred *cred = current_cred();
85 
86     if (IS_ERR(nd->intent.open.file))
87         goto out;
88     if (IS_ERR(dentry))
89         goto out_err;
90- nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
91- nd->intent.open.file,
92+ nd->intent.open.file = __dentry_open(&path, nd->intent.open.file,
93                          open, cred);
94 out:
95     return nd->intent.open.file;
96@@ -816,10 +814,17 @@ struct file *nameidata_to_filp(struct na
97 
98     /* Has the filesystem initialised the file for us? */
99     if (filp->f_path.dentry == NULL) {
100- path_get(&nd->path);
101- filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
102- NULL, cred);
103+ struct inode *inode = nd->path.dentry->d_inode;
104+
105+ if (inode->i_op->open) {
106+ int flags = filp->f_flags;
107+ put_filp(filp);
108+ filp = inode->i_op->open(nd->path.dentry, flags, cred);
109+ } else {
110+ filp = __dentry_open(&nd->path, filp, NULL, cred);
111+ }
112     }
113+
114     return filp;
115 }
116 
117@@ -830,26 +835,45 @@ struct file *nameidata_to_filp(struct na
118 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
119              const struct cred *cred)
120 {
121- int error;
122- struct file *f;
123-
124- validate_creds(cred);
125+ struct path path = { .dentry = dentry, .mnt = mnt };
126+ struct file *ret;
127 
128     /* We must always pass in a valid mount pointer. */
129     BUG_ON(!mnt);
130 
131- error = -ENFILE;
132+ ret = vfs_open(&path, flags, cred);
133+ path_put(&path);
134+
135+ return ret;
136+}
137+EXPORT_SYMBOL(dentry_open);
138+
139+/**
140+ * vfs_open - open the file at the given path
141+ * @path: path to open
142+ * @flags: open flags
143+ * @cred: credentials to use
144+ *
145+ * Open the file. If successful, the returned file will have acquired
146+ * an additional reference for path.
147+ */
148+struct file *vfs_open(struct path *path, int flags, const struct cred *cred)
149+{
150+ struct file *f;
151+ struct inode *inode = path->dentry->d_inode;
152+
153+ validate_creds(cred);
154+
155+ if (inode->i_op->open)
156+ return inode->i_op->open(path->dentry, flags, cred);
157     f = get_empty_filp();
158- if (f == NULL) {
159- dput(dentry);
160- mntput(mnt);
161- return ERR_PTR(error);
162- }
163+ if (f == NULL)
164+ return ERR_PTR(-ENFILE);
165 
166     f->f_flags = flags;
167- return __dentry_open(dentry, mnt, f, NULL, cred);
168+ return __dentry_open(path, f, NULL, cred);
169 }
170-EXPORT_SYMBOL(dentry_open);
171+EXPORT_SYMBOL(vfs_open);
172 
173 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
174 {
175--- a/fs/splice.c
176+++ b/fs/splice.c
177@@ -1296,6 +1296,7 @@ long do_splice_direct(struct file *in, l
178 
179     return ret;
180 }
181+EXPORT_SYMBOL(do_splice_direct);
182 
183 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
184                    struct pipe_inode_info *opipe,
185--- a/fs/namespace.c
186+++ b/fs/namespace.c
187@@ -1494,6 +1494,23 @@ void drop_collected_mounts(struct vfsmou
188     release_mounts(&umount_list);
189 }
190 
191+struct vfsmount *clone_private_mount(struct path *path)
192+{
193+ struct vfsmount *mnt;
194+
195+ if (IS_MNT_UNBINDABLE(path->mnt))
196+ return ERR_PTR(-EINVAL);
197+
198+ down_read(&namespace_sem);
199+ mnt = clone_mnt(path->mnt, path->dentry, CL_PRIVATE);
200+ up_read(&namespace_sem);
201+ if (!mnt)
202+ return ERR_PTR(-ENOMEM);
203+
204+ return mnt;
205+}
206+EXPORT_SYMBOL_GPL(clone_private_mount);
207+
208 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
209            struct vfsmount *root)
210 {
211--- a/include/linux/mount.h
212+++ b/include/linux/mount.h
213@@ -100,6 +100,9 @@ extern void mnt_pin(struct vfsmount *mnt
214 extern void mnt_unpin(struct vfsmount *mnt);
215 extern int __mnt_is_readonly(struct vfsmount *mnt);
216 
217+struct path;
218+extern struct vfsmount *clone_private_mount(struct path *path);
219+
220 extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
221                       const char *name, void *data);
222 
223--- /dev/null
224+++ b/fs/overlayfs/overlayfs.c
225@@ -0,0 +1,2414 @@
226+#include <linux/fs.h>
227+#include <linux/namei.h>
228+#include <linux/sched.h>
229+#include <linux/fs_struct.h>
230+#include <linux/file.h>
231+#include <linux/xattr.h>
232+#include <linux/security.h>
233+#include <linux/device_cgroup.h>
234+#include <linux/mount.h>
235+#include <linux/splice.h>
236+#include <linux/slab.h>
237+#include <linux/parser.h>
238+#include <linux/module.h>
239+#include <linux/uaccess.h>
240+#include <linux/rbtree.h>
241+
242+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
243+MODULE_DESCRIPTION("Overlay filesystem");
244+MODULE_LICENSE("GPL");
245+
246+#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
247+
248+struct ovl_fs {
249+ struct vfsmount *upper_mnt;
250+ struct vfsmount *lower_mnt;
251+};
252+
253+struct ovl_entry {
254+ struct dentry *__upperdentry;
255+ struct dentry *lowerdentry;
256+ union {
257+ struct {
258+ u64 version;
259+ bool opaque;
260+ };
261+ struct rcu_head rcu;
262+ };
263+};
264+
265+static const char *ovl_whiteout_xattr = "trusted.overlay.whiteout";
266+static const char *ovl_opaque_xattr = "trusted.overlay.opaque";
267+static const char *ovl_whiteout_symlink = "(overlay-whiteout)";
268+
269+enum ovl_path_type {
270+ OVL_PATH_UPPER,
271+ OVL_PATH_MERGE,
272+ OVL_PATH_LOWER,
273+};
274+
275+static enum ovl_path_type ovl_path_type(struct dentry *dentry)
276+{
277+ struct ovl_entry *oe = dentry->d_fsdata;
278+
279+ if (oe->__upperdentry) {
280+ if (oe->lowerdentry && S_ISDIR(dentry->d_inode->i_mode))
281+ return OVL_PATH_MERGE;
282+ else
283+ return OVL_PATH_UPPER;
284+ } else {
285+ return OVL_PATH_LOWER;
286+ }
287+}
288+
289+static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
290+{
291+ struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
292+ smp_read_barrier_depends();
293+ return upperdentry;
294+}
295+
296+static void ovl_path_upper(struct dentry *dentry, struct path *path)
297+{
298+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
299+ struct ovl_entry *oe = dentry->d_fsdata;
300+
301+ path->mnt = ofs->upper_mnt;
302+ path->dentry = ovl_upperdentry_dereference(oe);
303+}
304+
305+static void ovl_path_lower(struct dentry *dentry, struct path *path)
306+{
307+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
308+ struct ovl_entry *oe = dentry->d_fsdata;
309+
310+ path->mnt = ofs->lower_mnt;
311+ path->dentry = oe->lowerdentry;
312+}
313+
314+static enum ovl_path_type ovl_path_real(struct dentry *dentry,
315+ struct path *path)
316+{
317+
318+ enum ovl_path_type type = ovl_path_type(dentry);
319+
320+ if (type == OVL_PATH_LOWER)
321+ ovl_path_lower(dentry, path);
322+ else
323+ ovl_path_upper(dentry, path);
324+
325+ return type;
326+}
327+
328+static struct dentry *ovl_dentry_upper(struct dentry *dentry)
329+{
330+ struct ovl_entry *oe = dentry->d_fsdata;
331+
332+ return ovl_upperdentry_dereference(oe);
333+}
334+
335+static struct dentry *ovl_dentry_lower(struct dentry *dentry)
336+{
337+ struct ovl_entry *oe = dentry->d_fsdata;
338+
339+ return oe->lowerdentry;
340+}
341+
342+static struct dentry *ovl_dentry_real(struct dentry *dentry)
343+{
344+ struct ovl_entry *oe = dentry->d_fsdata;
345+ struct dentry *realdentry;
346+
347+ realdentry = ovl_upperdentry_dereference(oe);
348+ if (!realdentry)
349+ realdentry = oe->lowerdentry;
350+
351+ return realdentry;
352+}
353+
354+static bool ovl_dentry_is_opaque(struct dentry *dentry)
355+{
356+ struct ovl_entry *oe = dentry->d_fsdata;
357+ return oe->opaque;
358+}
359+
360+static void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
361+{
362+ struct ovl_entry *oe = dentry->d_fsdata;
363+ oe->opaque = opaque;
364+}
365+
366+static void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
367+{
368+ struct ovl_entry *oe = dentry->d_fsdata;
369+
370+ WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
371+ WARN_ON(oe->__upperdentry);
372+ smp_wmb();
373+ oe->__upperdentry = upperdentry;
374+}
375+
376+static void ovl_dentry_version_inc(struct dentry *dentry)
377+{
378+ struct ovl_entry *oe = dentry->d_fsdata;
379+
380+ WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
381+ oe->version++;
382+}
383+
384+static u64 ovl_dentry_version_get(struct dentry *dentry)
385+{
386+ struct ovl_entry *oe = dentry->d_fsdata;
387+
388+ WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
389+ return oe->version;
390+}
391+
392+static bool ovl_is_whiteout(struct dentry *dentry)
393+{
394+ int res;
395+ char val;
396+
397+ if (!dentry)
398+ return false;
399+ if (!dentry->d_inode)
400+ return false;
401+ if (!S_ISLNK(dentry->d_inode->i_mode))
402+ return false;
403+
404+ res = vfs_getxattr(dentry, ovl_whiteout_xattr, &val, 1);
405+ if (res == 1 && val == 'y')
406+ return true;
407+
408+ return false;
409+}
410+
411+static bool ovl_is_opaquedir(struct dentry *dentry)
412+{
413+ int res;
414+ char val;
415+
416+ if (!S_ISDIR(dentry->d_inode->i_mode))
417+ return false;
418+
419+ res = vfs_getxattr(dentry, ovl_opaque_xattr, &val, 1);
420+ if (res == 1 && val == 'y')
421+ return true;
422+
423+ return false;
424+}
425+
426+struct ovl_cache_entry {
427+ const char *name;
428+ unsigned int len;
429+ unsigned int type;
430+ u64 ino;
431+ bool is_whiteout;
432+ struct list_head l_node;
433+ struct rb_node node;
434+};
435+
436+struct ovl_readdir_data {
437+ struct rb_root *root;
438+ struct list_head *list;
439+ struct list_head *middle;
440+ struct dentry *dir;
441+ int count;
442+ int err;
443+};
444+
445+struct ovl_dir_file {
446+ bool is_real;
447+ bool is_cached;
448+ struct list_head cursor;
449+ u64 cache_version;
450+ struct list_head cache;
451+ struct file *realfile;
452+};
453+
454+static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
455+{
456+ return container_of(n, struct ovl_cache_entry, node);
457+}
458+
459+static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
460+ const char *name, int len)
461+{
462+ struct rb_node *node = root->rb_node;
463+ int cmp;
464+
465+ while (node) {
466+ struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
467+
468+ cmp = strncmp(name, p->name, len);
469+ if (cmp > 0)
470+ node = p->node.rb_right;
471+ else if (cmp < 0 || len < p->len)
472+ node = p->node.rb_left;
473+ else
474+ return p;
475+ }
476+
477+ return NULL;
478+}
479+
480+static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
481+ u64 ino, unsigned int d_type)
482+{
483+ struct ovl_cache_entry *p;
484+
485+ p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL);
486+ if (p) {
487+ char *name_copy = (char *) (p + 1);
488+ memcpy(name_copy, name, len);
489+ name_copy[len] = '\0';
490+ p->name = name_copy;
491+ p->len = len;
492+ p->type = d_type;
493+ p->ino = ino;
494+ p->is_whiteout = false;
495+ }
496+
497+ return p;
498+}
499+
500+static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
501+ const char *name, int len, u64 ino,
502+ unsigned int d_type)
503+{
504+ struct rb_node **newp = &rdd->root->rb_node;
505+ struct rb_node *parent = NULL;
506+ struct ovl_cache_entry *p;
507+
508+ while (*newp) {
509+ int cmp;
510+ struct ovl_cache_entry *tmp;
511+
512+ parent = *newp;
513+ tmp = ovl_cache_entry_from_node(*newp);
514+ cmp = strncmp(name, tmp->name, len);
515+ if (cmp > 0)
516+ newp = &tmp->node.rb_right;
517+ else if (cmp < 0 || len < tmp->len)
518+ newp = &tmp->node.rb_left;
519+ else
520+ return 0;
521+ }
522+
523+ p = ovl_cache_entry_new(name, len, ino, d_type);
524+ if (p == NULL)
525+ return -ENOMEM;
526+
527+ list_add_tail(&p->l_node, rdd->list);
528+ rb_link_node(&p->node, parent, newp);
529+ rb_insert_color(&p->node, rdd->root);
530+
531+ return 0;
532+}
533+
534+static int ovl_fill_lower(void *buf, const char *name, int namelen,
535+ loff_t offset, u64 ino, unsigned int d_type)
536+{
537+ struct ovl_readdir_data *rdd = buf;
538+ struct ovl_cache_entry *p;
539+
540+ rdd->count++;
541+ p = ovl_cache_entry_find(rdd->root, name, namelen);
542+ if (p) {
543+ list_move_tail(&p->l_node, rdd->middle);
544+ } else {
545+ p = ovl_cache_entry_new(name, namelen, ino, d_type);
546+ if (p == NULL)
547+ rdd->err = -ENOMEM;
548+ else
549+ list_add_tail(&p->l_node, rdd->middle);
550+ }
551+
552+ return rdd->err;
553+}
554+
555+static void ovl_cache_free(struct list_head *list)
556+{
557+ struct ovl_cache_entry *p;
558+ struct ovl_cache_entry *n;
559+
560+ list_for_each_entry_safe(p, n, list, l_node)
561+ kfree(p);
562+
563+ INIT_LIST_HEAD(list);
564+}
565+
566+static int ovl_fill_upper(void *buf, const char *name, int namelen,
567+ loff_t offset, u64 ino, unsigned int d_type)
568+{
569+ struct ovl_readdir_data *rdd = buf;
570+
571+ rdd->count++;
572+ return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
573+}
574+
575+static int ovl_dir_read(struct path *realpath, struct ovl_readdir_data *rdd,
576+ filldir_t filler)
577+{
578+ struct file *realfile;
579+ int err;
580+
581+ realfile = vfs_open(realpath, O_RDONLY | O_DIRECTORY, current_cred());
582+ if (IS_ERR(realfile))
583+ return PTR_ERR(realfile);
584+
585+ do {
586+ rdd->count = 0;
587+ rdd->err = 0;
588+ err = vfs_readdir(realfile, filler, rdd);
589+ if (err >= 0)
590+ err = rdd->err;
591+ } while (!err && rdd->count);
592+ fput(realfile);
593+
594+ return 0;
595+}
596+
597+static void ovl_dir_reset(struct file *file)
598+{
599+ struct ovl_dir_file *od = file->private_data;
600+ enum ovl_path_type type = ovl_path_type(file->f_path.dentry);
601+
602+ if (ovl_dentry_version_get(file->f_path.dentry) != od->cache_version) {
603+ list_del_init(&od->cursor);
604+ ovl_cache_free(&od->cache);
605+ od->is_cached = false;
606+ }
607+ WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
608+ if (od->is_real && type == OVL_PATH_MERGE) {
609+ fput(od->realfile);
610+ od->realfile = NULL;
611+ od->is_real = false;
612+ }
613+}
614+
615+static int ovl_dir_mark_whiteouts(struct ovl_readdir_data *rdd)
616+{
617+ struct ovl_cache_entry *p;
618+ struct dentry *dentry;
619+ const struct cred *old_cred;
620+ struct cred *override_cred;
621+
622+ override_cred = prepare_creds();
623+ if (!override_cred) {
624+ ovl_cache_free(rdd->list);
625+ return -ENOMEM;
626+ }
627+
628+ /*
629+ * CAP_SYS_ADMIN for getxattr
630+ * CAP_DAC_OVERRIDE for lookup
631+ */
632+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
633+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
634+ old_cred = override_creds(override_cred);
635+
636+ mutex_lock(&rdd->dir->d_inode->i_mutex);
637+ list_for_each_entry(p, rdd->list, l_node) {
638+ if (p->type != DT_LNK)
639+ continue;
640+
641+ dentry = lookup_one_len(p->name, rdd->dir, p->len);
642+ if (IS_ERR(dentry))
643+ continue;
644+
645+ p->is_whiteout = ovl_is_whiteout(dentry);
646+ dput(dentry);
647+ }
648+ mutex_unlock(&rdd->dir->d_inode->i_mutex);
649+
650+ revert_creds(old_cred);
651+ put_cred(override_cred);
652+
653+ return 0;
654+}
655+
656+static int ovl_dir_read_merged(struct path *upperpath, struct path *lowerpath,
657+ struct ovl_readdir_data *rdd)
658+{
659+ int err;
660+ struct rb_root root = RB_ROOT;
661+ struct list_head middle;
662+
663+ rdd->root = &root;
664+ if (upperpath->dentry) {
665+ rdd->dir = upperpath->dentry;
666+ err = ovl_dir_read(upperpath, rdd, ovl_fill_upper);
667+ if (err)
668+ goto out;
669+
670+ err = ovl_dir_mark_whiteouts(rdd);
671+ if (err)
672+ goto out;
673+ }
674+ /*
675+ * Insert lowerpath entries before upperpath ones, this allows
676+ * offsets to be reasonably constant
677+ */
678+ list_add(&middle, rdd->list);
679+ rdd->middle = &middle;
680+ err = ovl_dir_read(lowerpath, rdd, ovl_fill_lower);
681+ list_del(&middle);
682+out:
683+ rdd->root = NULL;
684+
685+ return err;
686+}
687+
688+static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
689+{
690+ struct list_head *l;
691+ loff_t off;
692+
693+ l = od->cache.next;
694+ for (off = 0; off < pos; off++) {
695+ if (l == &od->cache)
696+ break;
697+ l = l->next;
698+ }
699+ list_move_tail(&od->cursor, l);
700+}
701+
702+static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
703+{
704+ struct ovl_dir_file *od = file->private_data;
705+ int res;
706+
707+ if (!file->f_pos)
708+ ovl_dir_reset(file);
709+
710+ if (od->is_real) {
711+ res = vfs_readdir(od->realfile, filler, buf);
712+ file->f_pos = od->realfile->f_pos;
713+
714+ return res;
715+ }
716+
717+ if (!od->is_cached) {
718+ struct path lowerpath;
719+ struct path upperpath;
720+ struct ovl_readdir_data rdd = { .list = &od->cache };
721+
722+ ovl_path_lower(file->f_path.dentry, &lowerpath);
723+ ovl_path_upper(file->f_path.dentry, &upperpath);
724+
725+ res = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd);
726+ if (res) {
727+ ovl_cache_free(rdd.list);
728+ return res;
729+ }
730+
731+ od->cache_version = ovl_dentry_version_get(file->f_path.dentry);
732+ od->is_cached = true;
733+
734+ ovl_seek_cursor(od, file->f_pos);
735+ }
736+
737+ while (od->cursor.next != &od->cache) {
738+ int over;
739+ loff_t off;
740+ struct ovl_cache_entry *p;
741+
742+ p = list_entry(od->cursor.next, struct ovl_cache_entry, l_node);
743+ off = file->f_pos;
744+ file->f_pos++;
745+ list_move(&od->cursor, &p->l_node);
746+
747+ if (p->is_whiteout)
748+ continue;
749+
750+ over = filler(buf, p->name, p->len, off, p->ino, p->type);
751+ if (over)
752+ break;
753+ }
754+
755+ return 0;
756+}
757+
758+static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
759+{
760+ loff_t res;
761+ struct ovl_dir_file *od = file->private_data;
762+
763+ mutex_lock(&file->f_dentry->d_inode->i_mutex);
764+ if (!file->f_pos)
765+ ovl_dir_reset(file);
766+
767+ if (od->is_real) {
768+ res = vfs_llseek(od->realfile, offset, origin);
769+ file->f_pos = od->realfile->f_pos;
770+ } else {
771+ res = -EINVAL;
772+
773+ switch (origin) {
774+ case SEEK_CUR:
775+ offset += file->f_pos;
776+ break;
777+ case SEEK_SET:
778+ break;
779+ default:
780+ goto out_unlock;
781+ }
782+ if (offset < 0)
783+ goto out_unlock;
784+
785+ if (offset != file->f_pos) {
786+ file->f_pos = offset;
787+ if (od->is_cached)
788+ ovl_seek_cursor(od, offset);
789+ }
790+ res = offset;
791+ }
792+out_unlock:
793+ mutex_unlock(&file->f_dentry->d_inode->i_mutex);
794+
795+ return res;
796+}
797+
798+static int ovl_dir_fsync(struct file *file, int datasync)
799+{
800+ struct ovl_dir_file *od = file->private_data;
801+
802+ /* May need to reopen directory if it got copied up */
803+ if (!od->realfile) {
804+ struct path upperpath;
805+
806+ ovl_path_upper(file->f_path.dentry, &upperpath);
807+ od->realfile = vfs_open(&upperpath, O_RDONLY, current_cred());
808+ if (IS_ERR(od->realfile))
809+ return PTR_ERR(od->realfile);
810+ }
811+
812+ return vfs_fsync(od->realfile, datasync);
813+}
814+
815+static int ovl_dir_release(struct inode *inode, struct file *file)
816+{
817+ struct ovl_dir_file *od = file->private_data;
818+
819+ list_del(&od->cursor);
820+ ovl_cache_free(&od->cache);
821+ if (od->realfile)
822+ fput(od->realfile);
823+ kfree(od);
824+
825+ return 0;
826+}
827+
828+static int ovl_dir_open(struct inode *inode, struct file *file)
829+{
830+ struct path realpath;
831+ struct file *realfile;
832+ struct ovl_dir_file *od;
833+ enum ovl_path_type type;
834+
835+ od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
836+ if (!od)
837+ return -ENOMEM;
838+
839+ type = ovl_path_real(file->f_path.dentry, &realpath);
840+ realfile = vfs_open(&realpath, file->f_flags, current_cred());
841+ if (IS_ERR(realfile)) {
842+ kfree(od);
843+ return PTR_ERR(realfile);
844+ }
845+ INIT_LIST_HEAD(&od->cache);
846+ INIT_LIST_HEAD(&od->cursor);
847+ od->is_cached = false;
848+ od->realfile = realfile;
849+ od->is_real = (type != OVL_PATH_MERGE);
850+ file->private_data = od;
851+
852+ return 0;
853+}
854+
855+static const struct file_operations ovl_dir_operations = {
856+ .read = generic_read_dir,
857+ .open = ovl_dir_open,
858+ .readdir = ovl_readdir,
859+ .llseek = ovl_dir_llseek,
860+ .fsync = ovl_dir_fsync,
861+ .release = ovl_dir_release,
862+};
863+
864+static const struct inode_operations ovl_dir_inode_operations;
865+
866+static void ovl_entry_free(struct rcu_head *head)
867+{
868+ struct ovl_entry *oe = container_of(head, struct ovl_entry, rcu);
869+ kfree(oe);
870+}
871+
872+static void ovl_dentry_release(struct dentry *dentry)
873+{
874+ struct ovl_entry *oe = dentry->d_fsdata;
875+
876+ if (oe) {
877+ dput(oe->__upperdentry);
878+ dput(oe->lowerdentry);
879+ call_rcu(&oe->rcu, ovl_entry_free);
880+ }
881+}
882+
883+static const struct dentry_operations ovl_dentry_operations = {
884+ .d_release = ovl_dentry_release,
885+};
886+
887+static struct dentry *ovl_lookup_real(struct dentry *dir, struct qstr *name)
888+{
889+ struct dentry *dentry;
890+
891+ mutex_lock(&dir->d_inode->i_mutex);
892+ dentry = lookup_one_len(name->name, dir, name->len);
893+ mutex_unlock(&dir->d_inode->i_mutex);
894+
895+ if (IS_ERR(dentry)) {
896+ if (PTR_ERR(dentry) == -ENOENT)
897+ dentry = NULL;
898+ } else if (!dentry->d_inode) {
899+ dput(dentry);
900+ dentry = NULL;
901+ }
902+ return dentry;
903+}
904+
905+static struct ovl_entry *ovl_alloc_entry(void)
906+{
907+ return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
908+}
909+
910+static struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
911+ struct ovl_entry *oe);
912+
913+static int ovl_whiteout(struct dentry *upperdir, struct dentry *dentry)
914+{
915+ int err;
916+ struct dentry *newdentry;
917+ const struct cred *old_cred;
918+ struct cred *override_cred;
919+
920+ /* FIXME: recheck lower dentry to see if whiteout is really needed */
921+
922+ err = -ENOMEM;
923+ override_cred = prepare_creds();
924+ if (!override_cred)
925+ goto out;
926+
927+ /*
928+ * CAP_SYS_ADMIN for setxattr
929+ * CAP_DAC_OVERRIDE for symlink creation
930+ */
931+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
932+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
933+ override_cred->fsuid = 0;
934+ override_cred->fsgid = 0;
935+ old_cred = override_creds(override_cred);
936+
937+ newdentry = lookup_one_len(dentry->d_name.name, upperdir,
938+ dentry->d_name.len);
939+ err = PTR_ERR(newdentry);
940+ if (IS_ERR(newdentry))
941+ goto out_put_cred;
942+
943+ /* Just been removed within the same locked region */
944+ WARN_ON(newdentry->d_inode);
945+
946+ err = vfs_symlink(upperdir->d_inode, newdentry, ovl_whiteout_symlink);
947+ if (err)
948+ goto out_dput;
949+
950+ ovl_dentry_version_inc(dentry->d_parent);
951+
952+ err = vfs_setxattr(newdentry, ovl_whiteout_xattr, "y", 1, 0);
953+ if (err)
954+ vfs_unlink(upperdir->d_inode, newdentry);
955+
956+out_dput:
957+ dput(newdentry);
958+out_put_cred:
959+ revert_creds(old_cred);
960+ put_cred(override_cred);
961+out:
962+ if (err) {
963+ /*
964+ * There's no way to recover from failure to whiteout.
965+ * What should we do? Log a big fat error and... ?
966+ */
967+ printk(KERN_ERR "overlayfs: ERROR - failed to whiteout '%s'\n",
968+ dentry->d_name.name);
969+ }
970+
971+ return err;
972+}
973+
974+static struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
975+ struct nameidata *nd)
976+{
977+ struct ovl_entry *oe;
978+ struct dentry *upperdir;
979+ struct dentry *lowerdir;
980+ struct dentry *upperdentry = NULL;
981+ struct dentry *lowerdentry = NULL;
982+ struct inode *inode = NULL;
983+ int err;
984+
985+ err = -ENOMEM;
986+ oe = ovl_alloc_entry();
987+ if (!oe)
988+ goto out;
989+
990+ upperdir = ovl_dentry_upper(dentry->d_parent);
991+ lowerdir = ovl_dentry_lower(dentry->d_parent);
992+
993+ if (upperdir) {
994+ upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
995+ err = PTR_ERR(upperdentry);
996+ if (IS_ERR(upperdentry))
997+ goto out_put_dir;
998+
999+ if (lowerdir && upperdentry &&
1000+ (S_ISLNK(upperdentry->d_inode->i_mode) ||
1001+ S_ISDIR(upperdentry->d_inode->i_mode))) {
1002+ const struct cred *old_cred;
1003+ struct cred *override_cred;
1004+
1005+ err = -ENOMEM;
1006+ override_cred = prepare_creds();
1007+ if (!override_cred)
1008+ goto out_dput_upper;
1009+
1010+ /* CAP_SYS_ADMIN needed for getxattr */
1011+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
1012+ old_cred = override_creds(override_cred);
1013+
1014+ if (ovl_is_opaquedir(upperdentry)) {
1015+ oe->opaque = true;
1016+ } else if (ovl_is_whiteout(upperdentry)) {
1017+ dput(upperdentry);
1018+ upperdentry = NULL;
1019+ oe->opaque = true;
1020+ }
1021+ revert_creds(old_cred);
1022+ put_cred(override_cred);
1023+ }
1024+ }
1025+ if (lowerdir && !oe->opaque) {
1026+ lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
1027+ err = PTR_ERR(lowerdentry);
1028+ if (IS_ERR(lowerdentry))
1029+ goto out_dput_upper;
1030+ }
1031+
1032+ if (lowerdentry && upperdentry &&
1033+ (!S_ISDIR(upperdentry->d_inode->i_mode) ||
1034+ !S_ISDIR(lowerdentry->d_inode->i_mode))) {
1035+ dput(lowerdentry);
1036+ lowerdentry = NULL;
1037+ oe->opaque = true;
1038+ }
1039+
1040+ if (lowerdentry || upperdentry) {
1041+ struct dentry *realdentry;
1042+
1043+ realdentry = upperdentry ? upperdentry : lowerdentry;
1044+ err = -ENOMEM;
1045+ inode = ovl_new_inode(dir->i_sb, realdentry->d_inode->i_mode, oe);
1046+ if (!inode)
1047+ goto out_dput;
1048+ }
1049+
1050+ if (upperdentry)
1051+ oe->__upperdentry = upperdentry;
1052+
1053+ if (lowerdentry)
1054+ oe->lowerdentry = lowerdentry;
1055+
1056+ dentry->d_fsdata = oe;
1057+ dentry->d_op = &ovl_dentry_operations;
1058+ d_add(dentry, inode);
1059+
1060+ return NULL;
1061+
1062+out_dput:
1063+ dput(lowerdentry);
1064+out_dput_upper:
1065+ dput(upperdentry);
1066+out_put_dir:
1067+ kfree(oe);
1068+out:
1069+ return ERR_PTR(err);
1070+}
1071+
1072+static int ovl_copy_up_xattr(struct dentry *old, struct dentry *new)
1073+{
1074+ ssize_t list_size, size;
1075+ char *buf, *name, *value;
1076+ int error;
1077+
1078+ if (!old->d_inode->i_op->getxattr ||
1079+ !new->d_inode->i_op->getxattr)
1080+ return 0;
1081+
1082+ list_size = vfs_listxattr(old, NULL, 0);
1083+ if (list_size <= 0) {
1084+ if (list_size == -EOPNOTSUPP)
1085+ return 0;
1086+ return list_size;
1087+ }
1088+
1089+ buf = kzalloc(list_size, GFP_KERNEL);
1090+ if (!buf)
1091+ return -ENOMEM;
1092+
1093+ error = -ENOMEM;
1094+ value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
1095+ if (!value)
1096+ goto out;
1097+
1098+ list_size = vfs_listxattr(old, buf, list_size);
1099+ if (list_size <= 0) {
1100+ error = list_size;
1101+ goto out_free_value;
1102+ }
1103+
1104+ for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
1105+ size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
1106+ if (size <= 0) {
1107+ error = size;
1108+ goto out_free_value;
1109+ }
1110+ error = vfs_setxattr(new, name, value, size, 0);
1111+ if (error)
1112+ goto out_free_value;
1113+ }
1114+
1115+out_free_value:
1116+ kfree(value);
1117+out:
1118+ kfree(buf);
1119+ return error;
1120+}
1121+
1122+static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
1123+{
1124+ struct file *old_file;
1125+ struct file *new_file;
1126+ int error = 0;
1127+
1128+ if (len == 0)
1129+ return 0;
1130+
1131+ old_file = vfs_open(old, O_RDONLY, current_cred());
1132+ if (IS_ERR(old_file))
1133+ return PTR_ERR(old_file);
1134+
1135+ new_file = vfs_open(new, O_WRONLY, current_cred());
1136+ if (IS_ERR(new_file)) {
1137+ error = PTR_ERR(new_file);
1138+ goto out_fput;
1139+ }
1140+
1141+ /* FIXME: copy up sparse files efficiently */
1142+ while (len) {
1143+ loff_t offset = new_file->f_pos;
1144+ size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
1145+ long bytes;
1146+
1147+ if (len < this_len)
1148+ this_len = len;
1149+
1150+ if (signal_pending_state(TASK_KILLABLE, current)) {
1151+ error = -EINTR;
1152+ break;
1153+ }
1154+
1155+ bytes = do_splice_direct(old_file, &offset, new_file, this_len,
1156+ SPLICE_F_MOVE);
1157+ if (bytes <= 0) {
1158+ error = bytes;
1159+ break;
1160+ }
1161+
1162+ len -= bytes;
1163+ }
1164+
1165+ fput(new_file);
1166+out_fput:
1167+ fput(old_file);
1168+ return error;
1169+}
1170+
1171+static struct dentry *ovl_lookup_create(struct dentry *upperdir,
1172+ struct dentry *template)
1173+{
1174+ int err;
1175+ struct dentry *newdentry;
1176+ struct qstr *name = &template->d_name;
1177+
1178+ newdentry = lookup_one_len(name->name, upperdir, name->len);
1179+ if (IS_ERR(newdentry))
1180+ return newdentry;
1181+
1182+ if (newdentry->d_inode) {
1183+ const struct cred *old_cred;
1184+ struct cred *override_cred;
1185+
1186+ /* No need to check whiteout if lower parent is non-existent */
1187+ err = -EEXIST;
1188+ if (!ovl_dentry_lower(template->d_parent))
1189+ goto out_dput;
1190+
1191+ if (!S_ISLNK(newdentry->d_inode->i_mode))
1192+ goto out_dput;
1193+
1194+ err = -ENOMEM;
1195+ override_cred = prepare_creds();
1196+ if (!override_cred)
1197+ goto out_dput;
1198+
1199+ /*
1200+ * CAP_SYS_ADMIN for getxattr
1201+ * CAP_FOWNER for unlink in sticky directory
1202+ */
1203+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
1204+ cap_raise(override_cred->cap_effective, CAP_FOWNER);
1205+ old_cred = override_creds(override_cred);
1206+
1207+ err = -EEXIST;
1208+ if (ovl_is_whiteout(newdentry))
1209+ err = vfs_unlink(upperdir->d_inode, newdentry);
1210+
1211+ revert_creds(old_cred);
1212+ put_cred(override_cred);
1213+ if (err)
1214+ goto out_dput;
1215+
1216+ dput(newdentry);
1217+ newdentry = lookup_one_len(name->name, upperdir, name->len);
1218+ if (IS_ERR(newdentry)) {
1219+ ovl_whiteout(upperdir, template);
1220+ return newdentry;
1221+ }
1222+
1223+ /*
1224+ * Whiteout just been successfully removed, parent
1225+ * i_mutex is still held, there's no way the lookup
1226+ * could return positive.
1227+ */
1228+ WARN_ON(newdentry->d_inode);
1229+ }
1230+
1231+ return newdentry;
1232+
1233+out_dput:
1234+ dput(newdentry);
1235+ return ERR_PTR(err);
1236+}
1237+
1238+static struct dentry *ovl_upper_create(struct dentry *upperdir,
1239+ struct dentry *dentry,
1240+ struct kstat *stat, const char *link)
1241+{
1242+ int err;
1243+ struct dentry *newdentry;
1244+ struct inode *dir = upperdir->d_inode;
1245+
1246+ newdentry = ovl_lookup_create(upperdir, dentry);
1247+ if (IS_ERR(newdentry))
1248+ goto out;
1249+
1250+ switch (stat->mode & S_IFMT) {
1251+ case S_IFREG:
1252+ err = vfs_create(dir, newdentry, stat->mode, NULL);
1253+ break;
1254+
1255+ case S_IFDIR:
1256+ err = vfs_mkdir(dir, newdentry, stat->mode);
1257+ break;
1258+
1259+ case S_IFCHR:
1260+ case S_IFBLK:
1261+ case S_IFIFO:
1262+ case S_IFSOCK:
1263+ err = vfs_mknod(dir, newdentry, stat->mode, stat->rdev);
1264+ break;
1265+
1266+ case S_IFLNK:
1267+ err = vfs_symlink(dir, newdentry, link);
1268+ break;
1269+
1270+ default:
1271+ err = -EPERM;
1272+ }
1273+ if (err) {
1274+ if (ovl_dentry_is_opaque(dentry))
1275+ ovl_whiteout(upperdir, dentry);
1276+ dput(newdentry);
1277+ newdentry = ERR_PTR(err);
1278+ }
1279+
1280+out:
1281+ return newdentry;
1282+
1283+}
1284+
1285+static char *ovl_read_symlink(struct dentry *realdentry)
1286+{
1287+ int res;
1288+ char *buf;
1289+ struct inode *inode = realdentry->d_inode;
1290+ mm_segment_t old_fs;
1291+
1292+ res = -EINVAL;
1293+ if (!inode->i_op->readlink)
1294+ goto err;
1295+
1296+ res = -ENOMEM;
1297+ buf = (char *) __get_free_page(GFP_KERNEL);
1298+ if (!buf)
1299+ goto err;
1300+
1301+ old_fs = get_fs();
1302+ set_fs(get_ds());
1303+ /* The cast to a user pointer is valid due to the set_fs() */
1304+ res = inode->i_op->readlink(realdentry,
1305+ (char __user *)buf, PAGE_SIZE - 1);
1306+ set_fs(old_fs);
1307+ if (res < 0) {
1308+ free_page((unsigned long) buf);
1309+ goto err;
1310+ }
1311+ buf[res] = '\0';
1312+
1313+ return buf;
1314+
1315+err:
1316+ return ERR_PTR(res);
1317+}
1318+
1319+static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
1320+{
1321+ struct iattr attr = {
1322+ .ia_valid = ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
1323+ .ia_atime = stat->atime,
1324+ .ia_mtime = stat->mtime,
1325+ };
1326+
1327+ return notify_change(upperdentry, &attr);
1328+}
1329+
1330+static int ovl_set_mode(struct dentry *upperdentry, umode_t mode)
1331+{
1332+ struct iattr attr = {
1333+ .ia_valid = ATTR_MODE,
1334+ .ia_mode = mode,
1335+ };
1336+
1337+ return notify_change(upperdentry, &attr);
1338+}
1339+
1340+static int ovl_set_opaque(struct dentry *upperdentry)
1341+{
1342+ int err;
1343+ const struct cred *old_cred;
1344+ struct cred *override_cred;
1345+
1346+ override_cred = prepare_creds();
1347+ if (!override_cred)
1348+ return -ENOMEM;
1349+
1350+ /* CAP_SYS_ADMIN for setxattr of "trusted" namespace */
1351+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
1352+ old_cred = override_creds(override_cred);
1353+ err = vfs_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
1354+ revert_creds(old_cred);
1355+ put_cred(override_cred);
1356+
1357+ return err;
1358+}
1359+
1360+static int ovl_remove_opaque(struct dentry *upperdentry)
1361+{
1362+ int err;
1363+ const struct cred *old_cred;
1364+ struct cred *override_cred;
1365+
1366+ override_cred = prepare_creds();
1367+ if (!override_cred)
1368+ return -ENOMEM;
1369+
1370+ /* CAP_SYS_ADMIN for removexattr of "trusted" namespace */
1371+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
1372+ old_cred = override_creds(override_cred);
1373+ err = vfs_removexattr(upperdentry, ovl_opaque_xattr);
1374+ revert_creds(old_cred);
1375+ put_cred(override_cred);
1376+
1377+ return err;
1378+}
1379+
1380+static int ovl_copy_up_locked(struct dentry *upperdir, struct dentry *dentry,
1381+ struct path *lowerpath, struct kstat *stat,
1382+ const char *link)
1383+{
1384+ int err;
1385+ struct path newpath;
1386+ umode_t mode = stat->mode;
1387+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
1388+
1389+ /* Can't properly set mode on creation because of the umask */
1390+ stat->mode &= S_IFMT;
1391+
1392+ newpath.mnt = ofs->upper_mnt;
1393+ newpath.dentry = ovl_upper_create(upperdir, dentry, stat, link);
1394+ if (IS_ERR(newpath.dentry)) {
1395+ err = PTR_ERR(newpath.dentry);
1396+
1397+ /* Already copied up? */
1398+ if (err == -EEXIST && ovl_path_type(dentry) != OVL_PATH_LOWER)
1399+ return 0;
1400+
1401+ return err;
1402+ }
1403+
1404+ if (S_ISREG(stat->mode)) {
1405+ err = ovl_copy_up_data(lowerpath, &newpath, stat->size);
1406+ if (err)
1407+ goto err_remove;
1408+ }
1409+
1410+ err = ovl_copy_up_xattr(lowerpath->dentry, newpath.dentry);
1411+ if (err)
1412+ goto err_remove;
1413+
1414+ mutex_lock(&newpath.dentry->d_inode->i_mutex);
1415+ if (!S_ISLNK(stat->mode))
1416+ err = ovl_set_mode(newpath.dentry, mode);
1417+ if (!err)
1418+ err = ovl_set_timestamps(newpath.dentry, stat);
1419+ mutex_unlock(&newpath.dentry->d_inode->i_mutex);
1420+ if (err)
1421+ goto err_remove;
1422+
1423+ ovl_dentry_update(dentry, newpath.dentry);
1424+
1425+ /*
1426+ * Easiest way to get rid of the lower dentry reference is to
1427+ * drop this dentry. This is neither needed nor possible for
1428+ * directories.
1429+ */
1430+ if (!S_ISDIR(stat->mode))
1431+ d_drop(dentry);
1432+
1433+ return 0;
1434+
1435+err_remove:
1436+ if (S_ISDIR(stat->mode))
1437+ vfs_rmdir(upperdir->d_inode, newpath.dentry);
1438+ else
1439+ vfs_unlink(upperdir->d_inode, newpath.dentry);
1440+
1441+ dput(newpath.dentry);
1442+
1443+ return err;
1444+}
1445+
1446+static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
1447+ struct path *lowerpath, struct kstat *stat)
1448+{
1449+ int err;
1450+ struct kstat pstat;
1451+ struct path parentpath;
1452+ struct dentry *upperdir;
1453+ const struct cred *old_cred;
1454+ struct cred *override_cred;
1455+ char *link = NULL;
1456+
1457+ ovl_path_upper(parent, &parentpath);
1458+ upperdir = parentpath.dentry;
1459+
1460+ err = vfs_getattr(parentpath.mnt, parentpath.dentry, &pstat);
1461+ if (err)
1462+ return err;
1463+
1464+ if (S_ISLNK(stat->mode)) {
1465+ link = ovl_read_symlink(lowerpath->dentry);
1466+ if (IS_ERR(link))
1467+ return PTR_ERR(link);
1468+ }
1469+
1470+ err = -ENOMEM;
1471+ override_cred = prepare_creds();
1472+ if (!override_cred)
1473+ goto out_free_link;
1474+
1475+ override_cred->fsuid = stat->uid;
1476+ override_cred->fsgid = stat->gid;
1477+ /*
1478+ * CAP_SYS_ADMIN for copying up extended attributes
1479+ * CAP_DAC_OVERRIDE for create
1480+ * CAP_FOWNER for chmod, timestamp update
1481+ * CAP_FSETID for chmod
1482+ * CAP_MKNOD for mknod
1483+ */
1484+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
1485+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
1486+ cap_raise(override_cred->cap_effective, CAP_FOWNER);
1487+ cap_raise(override_cred->cap_effective, CAP_FSETID);
1488+ cap_raise(override_cred->cap_effective, CAP_MKNOD);
1489+ old_cred = override_creds(override_cred);
1490+
1491+ mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
1492+ /*
1493+ * Using upper filesystem locking to protect against copy up
1494+ * racing with rename (rename means the copy up was already
1495+ * successful).
1496+ */
1497+ if (dentry->d_parent != parent) {
1498+ WARN_ON((ovl_path_type(dentry) == OVL_PATH_LOWER));
1499+ err = 0;
1500+ } else {
1501+ err = ovl_copy_up_locked(upperdir, dentry, lowerpath,
1502+ stat, link);
1503+ if (!err) {
1504+ /* Restore timestamps on parent (best effort) */
1505+ ovl_set_timestamps(upperdir, &pstat);
1506+ }
1507+ }
1508+
1509+ mutex_unlock(&upperdir->d_inode->i_mutex);
1510+
1511+ revert_creds(old_cred);
1512+ put_cred(override_cred);
1513+
1514+out_free_link:
1515+ if (link)
1516+ free_page((unsigned long) link);
1517+
1518+ return err;
1519+}
1520+
1521+static int ovl_copy_up(struct dentry *dentry)
1522+{
1523+ int err;
1524+
1525+ err = 0;
1526+ while (!err) {
1527+ struct dentry *next;
1528+ struct dentry *parent;
1529+ struct path lowerpath;
1530+ struct kstat stat;
1531+ enum ovl_path_type type = ovl_path_type(dentry);
1532+
1533+ if (type != OVL_PATH_LOWER)
1534+ break;
1535+
1536+ next = dget(dentry);
1537+ /* find the topmost dentry not yet copied up */
1538+ for (;;) {
1539+ parent = dget_parent(next);
1540+
1541+ type = ovl_path_type(parent);
1542+ if (type != OVL_PATH_LOWER)
1543+ break;
1544+
1545+ dput(next);
1546+ next = parent;
1547+ }
1548+
1549+ ovl_path_lower(next, &lowerpath);
1550+ err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat);
1551+ if (!err)
1552+ err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
1553+
1554+ dput(parent);
1555+ dput(next);
1556+ }
1557+
1558+ return err;
1559+}
1560+
1561+/* Optimize by not copying up the file first and truncating later */
1562+static int ovl_copy_up_truncate(struct dentry *dentry, loff_t size)
1563+{
1564+ int err;
1565+ struct kstat stat;
1566+ struct path lowerpath;
1567+ struct dentry *parent = dget_parent(dentry);
1568+
1569+ err = ovl_copy_up(parent);
1570+ if (err)
1571+ goto out_dput_parent;
1572+
1573+ ovl_path_lower(dentry, &lowerpath);
1574+ err = vfs_getattr(lowerpath.mnt, lowerpath.dentry, &stat);
1575+ if (err)
1576+ goto out_dput_parent;
1577+
1578+ if (size < stat.size)
1579+ stat.size = size;
1580+
1581+ err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
1582+
1583+out_dput_parent:
1584+ dput(parent);
1585+ return err;
1586+}
1587+
1588+static int ovl_setattr(struct dentry *dentry, struct iattr *attr)
1589+{
1590+ struct dentry *upperdentry;
1591+ int err;
1592+
1593+ if ((attr->ia_valid & ATTR_SIZE) && !ovl_dentry_upper(dentry))
1594+ err = ovl_copy_up_truncate(dentry, attr->ia_size);
1595+ else
1596+ err = ovl_copy_up(dentry);
1597+ if (err)
1598+ return err;
1599+
1600+ upperdentry = ovl_dentry_upper(dentry);
1601+
1602+ if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
1603+ attr->ia_valid &= ~ATTR_MODE;
1604+
1605+ mutex_lock(&upperdentry->d_inode->i_mutex);
1606+ err = notify_change(upperdentry, attr);
1607+ mutex_unlock(&upperdentry->d_inode->i_mutex);
1608+
1609+ return err;
1610+}
1611+
1612+static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
1613+ struct kstat *stat)
1614+{
1615+ struct path realpath;
1616+
1617+ ovl_path_real(dentry, &realpath);
1618+ return vfs_getattr(realpath.mnt, realpath.dentry, stat);
1619+}
1620+
1621+static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
1622+ struct kstat *stat)
1623+{
1624+ int err;
1625+ enum ovl_path_type type;
1626+ struct path realpath;
1627+
1628+ type = ovl_path_real(dentry, &realpath);
1629+ err = vfs_getattr(realpath.mnt, realpath.dentry, stat);
1630+ if (err)
1631+ return err;
1632+
1633+ stat->dev = dentry->d_sb->s_dev;
1634+ stat->ino = dentry->d_inode->i_ino;
1635+
1636+ /*
1637+ * It's probably not worth it to count subdirs to get the
1638+ * correct link count. nlink=1 seems to pacify 'find' and
1639+ * other utilities.
1640+ */
1641+ if (type == OVL_PATH_MERGE)
1642+ stat->nlink = 1;
1643+
1644+ return 0;
1645+}
1646+
1647+static int ovl_permission(struct inode *inode, int mask, unsigned int flags)
1648+{
1649+ struct ovl_entry *oe;
1650+ struct dentry *alias = NULL;
1651+ struct inode *realinode;
1652+ struct dentry *realdentry;
1653+ bool is_upper;
1654+ int err;
1655+
1656+ if (S_ISDIR(inode->i_mode)) {
1657+ oe = inode->i_private;
1658+ } else if (flags & IPERM_FLAG_RCU) {
1659+ return -ECHILD;
1660+ } else {
1661+ /*
1662+ * For non-directories find an alias and get the info
1663+ * from there.
1664+ */
1665+ spin_lock(&inode->i_lock);
1666+ if (WARN_ON(list_empty(&inode->i_dentry))) {
1667+ spin_unlock(&inode->i_lock);
1668+ return -ENOENT;
1669+ }
1670+ alias = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1671+ dget(alias);
1672+ spin_unlock(&inode->i_lock);
1673+ oe = alias->d_fsdata;
1674+ }
1675+
1676+ realdentry = ovl_upperdentry_dereference(oe);
1677+ is_upper = true;
1678+ if (!realdentry) {
1679+ realdentry = oe->lowerdentry;
1680+ is_upper = false;
1681+ }
1682+
1683+ /* Careful in RCU walk mode */
1684+ realinode = ACCESS_ONCE(realdentry->d_inode);
1685+ if (!realinode) {
1686+ WARN_ON(!(flags & IPERM_FLAG_RCU));
1687+ return -ENOENT;
1688+ }
1689+
1690+ if (mask & MAY_WRITE) {
1691+ umode_t mode = realinode->i_mode;
1692+
1693+ /*
1694+ * Writes will always be redirected to upper layer, so
1695+ * ignore lower layer being read-only.
1696+ */
1697+ err = -EROFS;
1698+ if (is_upper && IS_RDONLY(realinode) &&
1699+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
1700+ goto out_dput;
1701+
1702+ /*
1703+ * Nobody gets write access to an immutable file.
1704+ */
1705+ err = -EACCES;
1706+ if (IS_IMMUTABLE(realinode))
1707+ goto out_dput;
1708+ }
1709+
1710+ if (realinode->i_op->permission)
1711+ err = realinode->i_op->permission(realinode, mask, flags);
1712+ else
1713+ err = generic_permission(realinode, mask, flags,
1714+ realinode->i_op->check_acl);
1715+out_dput:
1716+ dput(alias);
1717+ return err;
1718+}
1719+
1720+static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
1721+ const char *link)
1722+{
1723+ int err;
1724+ struct dentry *newdentry;
1725+ struct dentry *upperdir;
1726+ struct inode *inode;
1727+ struct kstat stat = {
1728+ .mode = mode,
1729+ .rdev = rdev,
1730+ };
1731+
1732+ err = -ENOMEM;
1733+ inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
1734+ if (!inode)
1735+ goto out;
1736+
1737+ err = ovl_copy_up(dentry->d_parent);
1738+ if (err)
1739+ goto out_iput;
1740+
1741+ upperdir = ovl_dentry_upper(dentry->d_parent);
1742+ mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
1743+
1744+ newdentry = ovl_upper_create(upperdir, dentry, &stat, link);
1745+ err = PTR_ERR(newdentry);
1746+ if (IS_ERR(newdentry))
1747+ goto out_unlock;
1748+
1749+ ovl_dentry_version_inc(dentry->d_parent);
1750+ if (ovl_dentry_is_opaque(dentry) && S_ISDIR(mode)) {
1751+ err = ovl_set_opaque(newdentry);
1752+ if (err) {
1753+ vfs_rmdir(upperdir->d_inode, newdentry);
1754+ ovl_whiteout(upperdir, dentry);
1755+ goto out_dput;
1756+ }
1757+ }
1758+ ovl_dentry_update(dentry, newdentry);
1759+ d_instantiate(dentry, inode);
1760+ inode = NULL;
1761+ newdentry = NULL;
1762+ err = 0;
1763+
1764+out_dput:
1765+ dput(newdentry);
1766+out_unlock:
1767+ mutex_unlock(&upperdir->d_inode->i_mutex);
1768+out_iput:
1769+ iput(inode);
1770+out:
1771+ return err;
1772+}
1773+
1774+static int ovl_create(struct inode *dir, struct dentry *dentry, int mode,
1775+ struct nameidata *nd)
1776+{
1777+ return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
1778+}
1779+
1780+static int ovl_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1781+{
1782+ return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
1783+}
1784+
1785+static int ovl_mknod(struct inode *dir, struct dentry *dentry, int mode,
1786+ dev_t rdev)
1787+{
1788+ return ovl_create_object(dentry, mode, rdev, NULL);
1789+}
1790+
1791+static int ovl_symlink(struct inode *dir, struct dentry *dentry,
1792+ const char *link)
1793+{
1794+ return ovl_create_object(dentry, S_IFLNK, 0, link);
1795+}
1796+
1797+struct ovl_link_data {
1798+ struct dentry *realdentry;
1799+ void *cookie;
1800+};
1801+
1802+static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
1803+{
1804+ void *ret;
1805+ struct dentry *realdentry;
1806+ struct inode *realinode;
1807+
1808+ realdentry = ovl_dentry_real(dentry);
1809+ realinode = realdentry->d_inode;
1810+
1811+ if (WARN_ON(!realinode->i_op->follow_link))
1812+ return ERR_PTR(-EPERM);
1813+
1814+ ret = realinode->i_op->follow_link(realdentry, nd);
1815+ if (IS_ERR(ret))
1816+ return ret;
1817+
1818+ if (realinode->i_op->put_link) {
1819+ struct ovl_link_data *data;
1820+
1821+ data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
1822+ if (!data) {
1823+ realinode->i_op->put_link(realdentry, nd, ret);
1824+ return ERR_PTR(-ENOMEM);
1825+ }
1826+ data->realdentry = realdentry;
1827+ data->cookie = ret;
1828+
1829+ return data;
1830+ } else {
1831+ return NULL;
1832+ }
1833+}
1834+
1835+static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
1836+{
1837+ struct inode *realinode;
1838+ struct ovl_link_data *data = c;
1839+
1840+ if (!data)
1841+ return;
1842+
1843+ realinode = data->realdentry->d_inode;
1844+ realinode->i_op->put_link(data->realdentry, nd, data->cookie);
1845+ kfree(data);
1846+}
1847+
1848+static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
1849+{
1850+ struct path realpath;
1851+ struct inode *realinode;
1852+
1853+ ovl_path_real(dentry, &realpath);
1854+ realinode = realpath.dentry->d_inode;
1855+
1856+ if (!realinode->i_op->readlink)
1857+ return -EINVAL;
1858+
1859+ touch_atime(realpath.mnt, realpath.dentry);
1860+
1861+ return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
1862+}
1863+
1864+static int ovl_do_remove(struct dentry *dentry, bool is_dir)
1865+{
1866+ int err;
1867+ enum ovl_path_type type;
1868+ struct path realpath;
1869+ struct dentry *upperdir;
1870+
1871+ err = ovl_copy_up(dentry->d_parent);
1872+ if (err)
1873+ return err;
1874+
1875+ upperdir = ovl_dentry_upper(dentry->d_parent);
1876+ mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
1877+ type = ovl_path_real(dentry, &realpath);
1878+ if (type != OVL_PATH_LOWER) {
1879+ err = -ESTALE;
1880+ if (realpath.dentry->d_parent != upperdir)
1881+ goto out_d_drop;
1882+
1883+ if (is_dir)
1884+ err = vfs_rmdir(upperdir->d_inode, realpath.dentry);
1885+ else
1886+ err = vfs_unlink(upperdir->d_inode, realpath.dentry);
1887+ if (err)
1888+ goto out_d_drop;
1889+
1890+ ovl_dentry_version_inc(dentry->d_parent);
1891+ }
1892+
1893+ if (type != OVL_PATH_UPPER || ovl_dentry_is_opaque(dentry))
1894+ err = ovl_whiteout(upperdir, dentry);
1895+
1896+ /*
1897+ * Keeping this dentry hashed would mean having to release
1898+ * upperpath/lowerpath, which could only be done if we are the
1899+ * sole user of this dentry. Too tricky... Just unhash for
1900+ * now.
1901+ */
1902+out_d_drop:
1903+ d_drop(dentry);
1904+ mutex_unlock(&upperdir->d_inode->i_mutex);
1905+
1906+ return err;
1907+}
1908+
1909+static int ovl_unlink(struct inode *dir, struct dentry *dentry)
1910+{
1911+ return ovl_do_remove(dentry, false);
1912+}
1913+
1914+static int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
1915+{
1916+ int err;
1917+ struct path lowerpath;
1918+ struct path upperpath;
1919+ struct ovl_cache_entry *p;
1920+ struct ovl_readdir_data rdd = { .list = list };
1921+
1922+ ovl_path_upper(dentry, &upperpath);
1923+ ovl_path_lower(dentry, &lowerpath);
1924+
1925+ err = ovl_dir_read_merged(&upperpath, &lowerpath, &rdd);
1926+ if (err)
1927+ return err;
1928+
1929+ err = 0;
1930+
1931+ list_for_each_entry(p, list, l_node) {
1932+ if (p->is_whiteout)
1933+ continue;
1934+
1935+ if (p->name[0] == '.') {
1936+ if (p->len == 1)
1937+ continue;
1938+ if (p->len == 2 && p->name[1] == '.')
1939+ continue;
1940+ }
1941+ err = -ENOTEMPTY;
1942+ break;
1943+ }
1944+
1945+ return err;
1946+}
1947+
1948+static int ovl_remove_whiteouts(struct dentry *dir, struct list_head *list)
1949+{
1950+ struct path upperpath;
1951+ struct dentry *upperdir;
1952+ struct ovl_cache_entry *p;
1953+ const struct cred *old_cred;
1954+ struct cred *override_cred;
1955+ int ret = 0;
1956+
1957+ ovl_path_upper(dir, &upperpath);
1958+ upperdir = upperpath.dentry;
1959+
1960+ override_cred = prepare_creds();
1961+ if (!override_cred)
1962+ return -ENOMEM;
1963+
1964+ /*
1965+ * CAP_DAC_OVERRIDE for lookup and unlink
1966+ */
1967+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
1968+ old_cred = override_creds(override_cred);
1969+
1970+ mutex_lock(&upperdir->d_inode->i_mutex);
1971+ list_for_each_entry(p, list, l_node) {
1972+ if (p->is_whiteout) {
1973+ struct dentry *dentry;
1974+
1975+ dentry = lookup_one_len(p->name, upperdir, p->len);
1976+ if (IS_ERR(dentry)) {
1977+ ret = PTR_ERR(dentry);
1978+ break;
1979+ }
1980+ ret = vfs_unlink(upperdir->d_inode, dentry);
1981+ dput(dentry);
1982+ if (ret)
1983+ break;
1984+ }
1985+ }
1986+ mutex_unlock(&upperdir->d_inode->i_mutex);
1987+
1988+ revert_creds(old_cred);
1989+ put_cred(override_cred);
1990+
1991+ return ret;
1992+}
1993+
1994+static int ovl_check_empty_and_clear(struct dentry *dentry,
1995+ enum ovl_path_type type)
1996+{
1997+ int err;
1998+ LIST_HEAD(list);
1999+
2000+ err = ovl_check_empty_dir(dentry, &list);
2001+ if (!err && type == OVL_PATH_MERGE)
2002+ err = ovl_remove_whiteouts(dentry, &list);
2003+
2004+ ovl_cache_free(&list);
2005+
2006+ return err;
2007+}
2008+
2009+static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
2010+{
2011+ int err;
2012+ enum ovl_path_type type;
2013+
2014+ type = ovl_path_type(dentry);
2015+ if (type != OVL_PATH_UPPER) {
2016+ err = ovl_check_empty_and_clear(dentry, type);
2017+ if (err)
2018+ return err;
2019+ }
2020+
2021+ return ovl_do_remove(dentry, true);
2022+}
2023+
2024+static int ovl_link(struct dentry *old, struct inode *newdir,
2025+ struct dentry *new)
2026+{
2027+ int err;
2028+ struct dentry *olddentry;
2029+ struct dentry *newdentry;
2030+ struct dentry *upperdir;
2031+
2032+ err = ovl_copy_up(old);
2033+ if (err)
2034+ goto out;
2035+
2036+ err = ovl_copy_up(new->d_parent);
2037+ if (err)
2038+ goto out;
2039+
2040+ upperdir = ovl_dentry_upper(new->d_parent);
2041+ mutex_lock_nested(&upperdir->d_inode->i_mutex, I_MUTEX_PARENT);
2042+ newdentry = ovl_lookup_create(upperdir, new);
2043+ err = PTR_ERR(newdentry);
2044+ if (IS_ERR(newdentry))
2045+ goto out_unlock;
2046+
2047+ olddentry = ovl_dentry_upper(old);
2048+ err = vfs_link(olddentry, upperdir->d_inode, newdentry);
2049+ if (!err) {
2050+ ovl_dentry_version_inc(new->d_parent);
2051+ ovl_dentry_update(new, newdentry);
2052+
2053+ ihold(old->d_inode);
2054+ d_instantiate(new, old->d_inode);
2055+ } else {
2056+ if (ovl_dentry_is_opaque(new))
2057+ ovl_whiteout(upperdir, new);
2058+ dput(newdentry);
2059+ }
2060+out_unlock:
2061+ mutex_unlock(&upperdir->d_inode->i_mutex);
2062+out:
2063+ return err;
2064+
2065+}
2066+
2067+static int ovl_rename(struct inode *olddir, struct dentry *old,
2068+ struct inode *newdir, struct dentry *new)
2069+{
2070+ int err;
2071+ enum ovl_path_type old_type;
2072+ struct dentry *old_upperdir;
2073+ struct dentry *new_upperdir;
2074+ struct dentry *olddentry;
2075+ struct dentry *newdentry;
2076+ struct dentry *trap;
2077+ bool old_opaque;
2078+ bool new_opaque;
2079+ bool is_dir = S_ISDIR(old->d_inode->i_mode);
2080+
2081+ /* Don't copy up directory trees */
2082+ old_type = ovl_path_type(old);
2083+ if (old_type != OVL_PATH_UPPER && is_dir)
2084+ return -EXDEV;
2085+
2086+ if (new->d_inode) {
2087+ enum ovl_path_type new_type;
2088+
2089+ new_type = ovl_path_type(new);
2090+
2091+ if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
2092+ if (ovl_dentry_lower(old)->d_inode ==
2093+ ovl_dentry_lower(new)->d_inode)
2094+ return 0;
2095+ }
2096+ if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
2097+ if (ovl_dentry_upper(old)->d_inode ==
2098+ ovl_dentry_upper(new)->d_inode)
2099+ return 0;
2100+ }
2101+
2102+ if (new_type != OVL_PATH_UPPER &&
2103+ S_ISDIR(new->d_inode->i_mode)) {
2104+ err = ovl_check_empty_and_clear(new, new_type);
2105+ if (err)
2106+ return err;
2107+ }
2108+ }
2109+
2110+ err = ovl_copy_up(old);
2111+ if (err)
2112+ return err;
2113+
2114+ err = ovl_copy_up(new->d_parent);
2115+ if (err)
2116+ return err;
2117+
2118+ old_upperdir = ovl_dentry_upper(old->d_parent);
2119+ new_upperdir = ovl_dentry_upper(new->d_parent);
2120+
2121+ trap = lock_rename(new_upperdir, old_upperdir);
2122+
2123+ olddentry = ovl_dentry_upper(old);
2124+ newdentry = ovl_dentry_upper(new);
2125+ if (newdentry) {
2126+ dget(newdentry);
2127+ } else {
2128+ newdentry = ovl_lookup_create(new_upperdir, new);
2129+ err = PTR_ERR(newdentry);
2130+ if (IS_ERR(newdentry))
2131+ goto out_unlock;
2132+ }
2133+
2134+ err = -ESTALE;
2135+ if (olddentry->d_parent != old_upperdir)
2136+ goto out_dput;
2137+ if (newdentry->d_parent != new_upperdir)
2138+ goto out_dput;
2139+ if (olddentry == trap)
2140+ goto out_dput;
2141+ if (newdentry == trap)
2142+ goto out_dput;
2143+
2144+ old_opaque = ovl_dentry_is_opaque(old);
2145+ new_opaque = ovl_dentry_is_opaque(new) ||
2146+ ovl_path_type(new) != OVL_PATH_UPPER;
2147+
2148+ if (is_dir && !old_opaque && new_opaque) {
2149+ err = ovl_set_opaque(olddentry);
2150+ if (err)
2151+ goto out_dput;
2152+ }
2153+
2154+ err = vfs_rename(old_upperdir->d_inode, olddentry,
2155+ new_upperdir->d_inode, newdentry);
2156+
2157+ if (err) {
2158+ if (ovl_dentry_is_opaque(new))
2159+ ovl_whiteout(new_upperdir, new);
2160+ if (is_dir && !old_opaque && new_opaque)
2161+ ovl_remove_opaque(olddentry);
2162+ goto out_dput;
2163+ }
2164+
2165+ if (old_type != OVL_PATH_UPPER || old_opaque)
2166+ err = ovl_whiteout(old_upperdir, old);
2167+ if (is_dir && old_opaque && !new_opaque)
2168+ ovl_remove_opaque(olddentry);
2169+
2170+ if (old_opaque != new_opaque)
2171+ ovl_dentry_set_opaque(old, new_opaque);
2172+
2173+ ovl_dentry_version_inc(old->d_parent);
2174+ ovl_dentry_version_inc(new->d_parent);
2175+
2176+out_dput:
2177+ dput(newdentry);
2178+out_unlock:
2179+ unlock_rename(new_upperdir, old_upperdir);
2180+ return err;
2181+}
2182+
2183+static bool ovl_is_private_xattr(const char *name)
2184+{
2185+ return strncmp(name, "trusted.overlay.", 14) == 0;
2186+}
2187+
2188+static int ovl_setxattr(struct dentry *dentry, const char *name,
2189+ const void *value, size_t size, int flags)
2190+{
2191+ int err;
2192+ struct dentry *upperdentry;
2193+
2194+ if (ovl_is_private_xattr(name))
2195+ return -EPERM;
2196+
2197+ err = ovl_copy_up(dentry);
2198+ if (err)
2199+ return err;
2200+
2201+ upperdentry = ovl_dentry_upper(dentry);
2202+ return vfs_setxattr(upperdentry, name, value, size, flags);
2203+}
2204+
2205+static ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
2206+ void *value, size_t size)
2207+{
2208+ if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
2209+ ovl_is_private_xattr(name))
2210+ return -ENODATA;
2211+
2212+ return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
2213+}
2214+
2215+static ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
2216+{
2217+ ssize_t res;
2218+ int off;
2219+
2220+ res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
2221+ if (res <= 0 || size == 0)
2222+ return res;
2223+
2224+ if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
2225+ return res;
2226+
2227+ /* filter out private xattrs */
2228+ for (off = 0; off < res;) {
2229+ char *s = list + off;
2230+ size_t slen = strlen(s) + 1;
2231+
2232+ BUG_ON(off + slen > res);
2233+
2234+ if (ovl_is_private_xattr(s)) {
2235+ res -= slen;
2236+ memmove(s, s + slen, res - off);
2237+ } else {
2238+ off += slen;
2239+ }
2240+ }
2241+
2242+ return res;
2243+}
2244+
2245+static int ovl_removexattr(struct dentry *dentry, const char *name)
2246+{
2247+ int err;
2248+ struct path realpath;
2249+ enum ovl_path_type type;
2250+
2251+ if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
2252+ ovl_is_private_xattr(name))
2253+ return -ENODATA;
2254+
2255+ type = ovl_path_real(dentry, &realpath);
2256+ if (type == OVL_PATH_LOWER) {
2257+ err = vfs_getxattr(realpath.dentry, name, NULL, 0);
2258+ if (err < 0)
2259+ return err;
2260+
2261+ err = ovl_copy_up(dentry);
2262+ if (err)
2263+ return err;
2264+
2265+ ovl_path_upper(dentry, &realpath);
2266+ }
2267+
2268+ return vfs_removexattr(realpath.dentry, name);
2269+}
2270+
2271+static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
2272+ struct dentry *realdentry)
2273+{
2274+ if (type != OVL_PATH_LOWER)
2275+ return false;
2276+
2277+ if (special_file(realdentry->d_inode->i_mode))
2278+ return false;
2279+
2280+ if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
2281+ return false;
2282+
2283+ return true;
2284+}
2285+
2286+static struct file *ovl_open(struct dentry *dentry, int flags,
2287+ const struct cred *cred)
2288+{
2289+ int err;
2290+ struct path realpath;
2291+ enum ovl_path_type type;
2292+
2293+ type = ovl_path_real(dentry, &realpath);
2294+ if (ovl_open_need_copy_up(flags, type, realpath.dentry)) {
2295+ if (flags & O_TRUNC)
2296+ err = ovl_copy_up_truncate(dentry, 0);
2297+ else
2298+ err = ovl_copy_up(dentry);
2299+ if (err)
2300+ return ERR_PTR(err);
2301+
2302+ ovl_path_upper(dentry, &realpath);
2303+ }
2304+
2305+ return vfs_open(&realpath, flags, cred);
2306+}
2307+
2308+static const struct inode_operations ovl_dir_inode_operations = {
2309+ .lookup = ovl_lookup,
2310+ .mkdir = ovl_mkdir,
2311+ .symlink = ovl_symlink,
2312+ .unlink = ovl_unlink,
2313+ .rmdir = ovl_rmdir,
2314+ .rename = ovl_rename,
2315+ .link = ovl_link,
2316+ .setattr = ovl_setattr,
2317+ .create = ovl_create,
2318+ .mknod = ovl_mknod,
2319+ .permission = ovl_permission,
2320+ .getattr = ovl_dir_getattr,
2321+ .setxattr = ovl_setxattr,
2322+ .getxattr = ovl_getxattr,
2323+ .listxattr = ovl_listxattr,
2324+ .removexattr = ovl_removexattr,
2325+};
2326+
2327+static const struct inode_operations ovl_file_inode_operations = {
2328+ .setattr = ovl_setattr,
2329+ .permission = ovl_permission,
2330+ .getattr = ovl_getattr,
2331+ .setxattr = ovl_setxattr,
2332+ .getxattr = ovl_getxattr,
2333+ .listxattr = ovl_listxattr,
2334+ .removexattr = ovl_removexattr,
2335+ .open = ovl_open,
2336+};
2337+
2338+static const struct inode_operations ovl_symlink_inode_operations = {
2339+ .setattr = ovl_setattr,
2340+ .follow_link = ovl_follow_link,
2341+ .put_link = ovl_put_link,
2342+ .readlink = ovl_readlink,
2343+ .getattr = ovl_getattr,
2344+ .setxattr = ovl_setxattr,
2345+ .getxattr = ovl_getxattr,
2346+ .listxattr = ovl_listxattr,
2347+ .removexattr = ovl_removexattr,
2348+};
2349+
2350+static struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
2351+ struct ovl_entry *oe)
2352+{
2353+ struct inode *inode;
2354+
2355+ inode = new_inode(sb);
2356+ if (!inode)
2357+ return NULL;
2358+
2359+ mode &= S_IFMT;
2360+
2361+ inode->i_ino = get_next_ino();
2362+ inode->i_mode = mode;
2363+ inode->i_flags |= S_NOATIME | S_NOCMTIME;
2364+
2365+ switch (mode) {
2366+ case S_IFDIR:
2367+ inode->i_private = oe;
2368+ inode->i_op = &ovl_dir_inode_operations;
2369+ inode->i_fop = &ovl_dir_operations;
2370+ break;
2371+
2372+ case S_IFLNK:
2373+ inode->i_op = &ovl_symlink_inode_operations;
2374+ break;
2375+
2376+ case S_IFREG:
2377+ case S_IFSOCK:
2378+ case S_IFBLK:
2379+ case S_IFCHR:
2380+ case S_IFIFO:
2381+ inode->i_op = &ovl_file_inode_operations;
2382+ break;
2383+
2384+ default:
2385+ WARN(1, "illegal file type: %i\n", mode);
2386+ inode = NULL;
2387+ }
2388+
2389+ return inode;
2390+
2391+}
2392+
2393+static void ovl_put_super(struct super_block *sb)
2394+{
2395+ struct ovl_fs *ufs = sb->s_fs_info;
2396+
2397+ if (!(sb->s_flags & MS_RDONLY))
2398+ mnt_drop_write(ufs->upper_mnt);
2399+
2400+ mntput(ufs->upper_mnt);
2401+ mntput(ufs->lower_mnt);
2402+
2403+ kfree(ufs);
2404+}
2405+
2406+static int ovl_remount_fs(struct super_block *sb, int *flagsp, char *data)
2407+{
2408+ int flags = *flagsp;
2409+ struct ovl_fs *ufs = sb->s_fs_info;
2410+
2411+ /* When remounting rw or ro, we need to adjust the write access to the
2412+ * upper fs.
2413+ */
2414+ if (((flags ^ sb->s_flags) & MS_RDONLY) == 0)
2415+ /* No change to readonly status */
2416+ return 0;
2417+
2418+ if (flags & MS_RDONLY) {
2419+ mnt_drop_write(ufs->upper_mnt);
2420+ return 0;
2421+ } else
2422+ return mnt_want_write(ufs->upper_mnt);
2423+}
2424+
2425+/**
2426+ * ovl_statfs
2427+ * @sb: The overlayfs super block
2428+ * @buf: The struct kstatfs to fill in with stats
2429+ *
2430+ * Get the filesystem statistics. As writes always target the upper layer
2431+ * filesystem pass the statfs to the same filesystem.
2432+ */
2433+static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
2434+{
2435+ struct dentry *root_dentry = dentry->d_sb->s_root;
2436+ struct path path;
2437+ ovl_path_upper(root_dentry, &path);
2438+
2439+ if (!path.dentry->d_sb->s_op->statfs)
2440+ return -ENOSYS;
2441+ return path.dentry->d_sb->s_op->statfs(path.dentry, buf);
2442+}
2443+
2444+static const struct super_operations ovl_super_operations = {
2445+ .put_super = ovl_put_super,
2446+ .remount_fs = ovl_remount_fs,
2447+ .statfs = ovl_statfs,
2448+};
2449+
2450+struct ovl_config {
2451+ char *lowerdir;
2452+ char *upperdir;
2453+};
2454+
2455+enum {
2456+ Opt_lowerdir,
2457+ Opt_upperdir,
2458+ Opt_err,
2459+};
2460+
2461+static const match_table_t ovl_tokens = {
2462+ {Opt_lowerdir, "lowerdir=%s"},
2463+ {Opt_upperdir, "upperdir=%s"},
2464+ {Opt_err, NULL}
2465+};
2466+
2467+static int ovl_parse_opt(char *opt, struct ovl_config *config)
2468+{
2469+ char *p;
2470+
2471+ config->upperdir = NULL;
2472+ config->lowerdir = NULL;
2473+
2474+ while ((p = strsep(&opt, ",")) != NULL) {
2475+ int token;
2476+ substring_t args[MAX_OPT_ARGS];
2477+
2478+ if (!*p)
2479+ continue;
2480+
2481+ token = match_token(p, ovl_tokens, args);
2482+ switch (token) {
2483+ case Opt_upperdir:
2484+ kfree(config->upperdir);
2485+ config->upperdir = match_strdup(&args[0]);
2486+ if (!config->upperdir)
2487+ return -ENOMEM;
2488+ break;
2489+
2490+ case Opt_lowerdir:
2491+ kfree(config->lowerdir);
2492+ config->lowerdir = match_strdup(&args[0]);
2493+ if (!config->lowerdir)
2494+ return -ENOMEM;
2495+ break;
2496+
2497+ default:
2498+ return -EINVAL;
2499+ }
2500+ }
2501+ return 0;
2502+}
2503+
2504+static int ovl_fill_super(struct super_block *sb, void *data, int silent)
2505+{
2506+ struct path lowerpath;
2507+ struct path upperpath;
2508+ struct inode *root_inode;
2509+ struct dentry *root_dentry;
2510+ struct ovl_entry *oe;
2511+ struct ovl_fs *ufs;
2512+ struct ovl_config config;
2513+ int err;
2514+
2515+ err = ovl_parse_opt((char *) data, &config);
2516+ if (err)
2517+ goto out;
2518+
2519+ err = -EINVAL;
2520+ if (!config.upperdir || !config.lowerdir) {
2521+ printk(KERN_ERR "overlayfs: missing upperdir or lowerdir\n");
2522+ goto out_free_config;
2523+ }
2524+
2525+ err = -ENOMEM;
2526+ ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL);
2527+ if (!ufs)
2528+ goto out_free_config;
2529+
2530+ oe = ovl_alloc_entry();
2531+ if (oe == NULL)
2532+ goto out_free_ufs;
2533+
2534+ root_inode = ovl_new_inode(sb, S_IFDIR, oe);
2535+ if (!root_inode)
2536+ goto out_free_oe;
2537+
2538+ err = kern_path(config.upperdir, LOOKUP_FOLLOW, &upperpath);
2539+ if (err)
2540+ goto out_put_root;
2541+
2542+ err = kern_path(config.lowerdir, LOOKUP_FOLLOW, &lowerpath);
2543+ if (err)
2544+ goto out_put_upperpath;
2545+
2546+ err = -ENOTDIR;
2547+ if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
2548+ !S_ISDIR(lowerpath.dentry->d_inode->i_mode))
2549+ goto out_put_lowerpath;
2550+
2551+ ufs->upper_mnt = clone_private_mount(&upperpath);
2552+ err = PTR_ERR(ufs->upper_mnt);
2553+ if (IS_ERR(ufs->upper_mnt)) {
2554+ printk(KERN_ERR "overlayfs: failed to clone upperpath\n");
2555+ goto out_put_lowerpath;
2556+ }
2557+
2558+ ufs->lower_mnt = clone_private_mount(&lowerpath);
2559+ err = PTR_ERR(ufs->lower_mnt);
2560+ if (IS_ERR(ufs->lower_mnt)) {
2561+ printk(KERN_ERR "overlayfs: failed to clone lowerpath\n");
2562+ goto out_put_upper_mnt;
2563+ }
2564+
2565+ if (!(sb->s_flags & MS_RDONLY)) {
2566+ err = mnt_want_write(ufs->upper_mnt);
2567+ if (err)
2568+ goto out_put_lower_mnt;
2569+ }
2570+
2571+ err = -ENOMEM;
2572+ root_dentry = d_alloc_root(root_inode);
2573+ if (!root_dentry)
2574+ goto out_drop_write;
2575+
2576+ mntput(upperpath.mnt);
2577+ mntput(lowerpath.mnt);
2578+
2579+ oe->__upperdentry = upperpath.dentry;
2580+ oe->lowerdentry = lowerpath.dentry;
2581+
2582+ root_dentry->d_fsdata = oe;
2583+ root_dentry->d_op = &ovl_dentry_operations;
2584+
2585+ sb->s_op = &ovl_super_operations;
2586+ sb->s_root = root_dentry;
2587+ sb->s_fs_info = ufs;
2588+
2589+ return 0;
2590+
2591+out_drop_write:
2592+ if (!(sb->s_flags & MS_RDONLY))
2593+ mnt_drop_write(ufs->upper_mnt);
2594+out_put_lower_mnt:
2595+ mntput(ufs->lower_mnt);
2596+out_put_upper_mnt:
2597+ mntput(ufs->upper_mnt);
2598+out_put_lowerpath:
2599+ path_put(&lowerpath);
2600+out_put_upperpath:
2601+ path_put(&upperpath);
2602+out_put_root:
2603+ iput(root_inode);
2604+out_free_oe:
2605+ kfree(oe);
2606+out_free_ufs:
2607+ kfree(ufs);
2608+out_free_config:
2609+ kfree(config.lowerdir);
2610+ kfree(config.upperdir);
2611+out:
2612+ return err;
2613+}
2614+
2615+static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
2616+ const char *dev_name, void *raw_data)
2617+{
2618+ return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
2619+}
2620+
2621+static struct file_system_type ovl_fs_type = {
2622+ .owner = THIS_MODULE,
2623+ .name = "overlayfs",
2624+ .mount = ovl_mount,
2625+ .kill_sb = kill_anon_super,
2626+};
2627+
2628+static int __init ovl_init(void)
2629+{
2630+ return register_filesystem(&ovl_fs_type);
2631+}
2632+
2633+static void __exit ovl_exit(void)
2634+{
2635+ unregister_filesystem(&ovl_fs_type);
2636+}
2637+
2638+module_init(ovl_init);
2639+module_exit(ovl_exit);
2640--- a/fs/Kconfig
2641+++ b/fs/Kconfig
2642@@ -63,6 +63,7 @@ source "fs/quota/Kconfig"
2643 
2644 source "fs/autofs4/Kconfig"
2645 source "fs/fuse/Kconfig"
2646+source "fs/overlayfs/Kconfig"
2647 
2648 config CUSE
2649     tristate "Character device in Userspace support"
2650--- a/fs/Makefile
2651+++ b/fs/Makefile
2652@@ -105,6 +105,7 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4/
2653 obj-$(CONFIG_AUTOFS4_FS) += autofs4/
2654 obj-$(CONFIG_ADFS_FS) += adfs/
2655 obj-$(CONFIG_FUSE_FS) += fuse/
2656+obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
2657 obj-$(CONFIG_UDF_FS) += udf/
2658 obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
2659 obj-$(CONFIG_OMFS_FS) += omfs/
2660--- /dev/null
2661+++ b/fs/overlayfs/Kconfig
2662@@ -0,0 +1,4 @@
2663+config OVERLAYFS_FS
2664+ tristate "Overlay filesystem support"
2665+ help
2666+ Add support for overlay filesystem.
2667--- /dev/null
2668+++ b/fs/overlayfs/Makefile
2669@@ -0,0 +1,5 @@
2670+#
2671+# Makefile for the overlay filesystem.
2672+#
2673+
2674+obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
2675--- /dev/null
2676+++ b/Documentation/filesystems/overlayfs.txt
2677@@ -0,0 +1,163 @@
2678+Written by: Neil Brown <neilb@suse.de>
2679+
2680+Overlay Filesystem
2681+==================
2682+
2683+This document describes a prototype for a new approach to providing
2684+overlay-filesystem functionality in Linux (sometimes referred to as
2685+union-filesystems). An overlay-filesystem tries to present a
2686+filesystem which is the result over overlaying one filesystem on top
2687+of the other.
2688+
2689+The result will inevitably fail to look exactly like a normal
2690+filesystem for various technical reasons. The expectation is that
2691+many use cases will be able to ignore these differences.
2692+
2693+This approach is 'hybrid' because the objects that appear in the
2694+filesystem do not all appear to belong to that filesystem. In many
2695+cases an object accessed in the union will be indistinguishable
2696+from accessing the corresponding object from the original filesystem.
2697+This is most obvious from the 'st_dev' field returned by stat(2).
2698+
2699+While directories will report an st_dev from the overlay-filesystem,
2700+all non-directory objects will report an st_dev from the lower or
2701+upper filesystem that is providing the object. Similarly st_ino will
2702+only be unique when combined with st_dev, and both of these can change
2703+over the lifetime of a non-directory object. Many applications and
2704+tools ignore these values and will not be affected.
2705+
2706+Upper and Lower
2707+---------------
2708+
2709+An overlay filesystem combines two filesystems - an 'upper' filesystem
2710+and a 'lower' filesystem. When a name exists in both filesystems, the
2711+object in the 'upper' filesystem is visible while the object in the
2712+'lower' filesystem is either hidden or, in the case of directories,
2713+merged with the 'upper' object.
2714+
2715+It would be more correct to refer to an upper and lower 'directory
2716+tree' rather than 'filesystem' as it is quite possible for both
2717+directory trees to be in the same filesystem and there is no
2718+requirement that the root of a filesystem be given for either upper or
2719+lower.
2720+
2721+The lower filesystem can be any filesystem supported by Linux and does
2722+not need to be writable. The lower filesystem can even be another
2723+overlayfs. The upper filesystem will normally be writable and if it
2724+is it must support the creation of trusted.* extended attributes, and
2725+must provide valid d_type in readdir responses, at least for symbolic
2726+links - so NFS is not suitable.
2727+
2728+A read-only overlay of two read-only filesystems may use any
2729+filesystem type.
2730+
2731+Directories
2732+-----------
2733+
2734+Overlaying mainly involved directories. If a given name appears in both
2735+upper and lower filesystems and refers to a non-directory in either,
2736+then the lower object is hidden - the name refers only to the upper
2737+object.
2738+
2739+Where both upper and lower objects are directories, a merged directory
2740+is formed.
2741+
2742+At mount time, the two directories given as mount options are combined
2743+into a merged directory. Then whenever a lookup is requested in such
2744+a merged directory, the lookup is performed in each actual directory
2745+and the combined result is cached in the dentry belonging to the overlay
2746+filesystem. If both actual lookups find directories, both are stored
2747+and a merged directory is created, otherwise only one is stored: the
2748+upper if it exists, else the lower.
2749+
2750+Only the lists of names from directories are merged. Other content
2751+such as metadata and extended attributes are reported for the upper
2752+directory only. These attributes of the lower directory are hidden.
2753+
2754+whiteouts and opaque directories
2755+--------------------------------
2756+
2757+In order to support rm and rmdir without changing the lower
2758+filesystem, an overlay filesystem needs to record in the upper filesystem
2759+that files have been removed. This is done using whiteouts and opaque
2760+directories (non-directories are always opaque).
2761+
2762+The overlay filesystem uses extended attributes with a
2763+"trusted.overlay." prefix to record these details.
2764+
2765+A whiteout is created as a symbolic link with target
2766+"(overlay-whiteout)" and with xattr "trusted.overlay.whiteout" set to "y".
2767+When a whiteout is found in the upper level of a merged directory, any
2768+matching name in the lower level is ignored, and the whiteout itself
2769+is also hidden.
2770+
2771+A directory is made opaque by setting the xattr "trusted.overlay.opaque"
2772+to "y". Where the upper filesystem contains an opaque directory, any
2773+directory in the lower filesystem with the same name is ignored.
2774+
2775+readdir
2776+-------
2777+
2778+When a 'readdir' request is made on a merged directory, the upper and
2779+lower directories are each read and the name lists merged in the
2780+obvious way (upper is read first, then lower - entries that already
2781+exist are not re-added). This merged name list is cached in the
2782+'struct file' and so remains as long as the file is kept open. If the
2783+directory is opened and read by two processes at the same time, they
2784+will each have separate caches. A seekdir to the start of the
2785+directory (offset 0) followed by a readdir will cause the cache to be
2786+discarded and rebuilt.
2787+
2788+This means that changes to the merged directory do not appear while a
2789+directory is being read. This is unlikely to be noticed by many
2790+programs.
2791+
2792+seek offsets are assigned sequentially when the directories are read.
2793+Thus if
2794+ - read part of a directory
2795+ - remember an offset, and close the directory
2796+ - re-open the directory some time later
2797+ - seek to the remembered offset
2798+
2799+there may be little correlation between the old and new locations in
2800+the list of filenames, particularly if anything has changed in the
2801+directory.
2802+
2803+Readdir on directories that are not merged is simply handled by the
2804+underlying directory (upper or lower).
2805+
2806+
2807+Non-directories
2808+---------------
2809+
2810+Objects that are not directories (files, symlinks, device-special
2811+files etc.) are presented either from the upper or lower filesystem as
2812+appropriate. When a file in the lower filesystem is accessed in a way
2813+the requires write-access, such as opening for write access, changing
2814+some metadata etc., the file is first copied from the lower filesystem
2815+to the upper filesystem (copy_up). Note that creating a hard-link
2816+also requires copy_up, though of course creation of a symlink does
2817+not.
2818+
2819+The copy_up process first makes sure that the containing directory
2820+exists in the upper filesystem - creating it and any parents as
2821+necessary. It then creates the object with the same metadata (owner,
2822+mode, mtime, symlink-target etc.) and then if the object is a file, the
2823+data is copied from the lower to the upper filesystem. Finally any
2824+extended attributes are copied up.
2825+
2826+Once the copy_up is complete, the overlay filesystem simply
2827+provides direct access to the newly created file in the upper
2828+filesystem - future operations on the file are barely noticed by the
2829+overlay filesystem (though an operation on the name of the file such as
2830+rename or unlink will of course be noticed and handled).
2831+
2832+Changes to underlying filesystems
2833+---------------------------------
2834+
2835+Offline changes, when the overlay is not mounted, are allowed to either
2836+the upper or the lower trees.
2837+
2838+Changes to the underlying filesystems while part of a mounted overlay
2839+filesystem are not allowed. This is not yet enforced, but will be in
2840+the future.
2841

Archive Download this file



interactive