From fb219bdfc3e108669006934c3531914d38494c14 Mon Sep 17 00:00:00 2001 From: Henri Gomez Date: Fri, 2 Dec 2016 12:44:31 -0800 Subject: [PATCH 1/5] linux-amlogic: add overlayfs backport patch --- .../linux-002-backport_overlayfs.patch | 11997 ++++++++++++++++ 1 file changed, 11997 insertions(+) create mode 100644 packages/linux/patches/aarch64/linux-002-backport_overlayfs.patch diff --git a/packages/linux/patches/aarch64/linux-002-backport_overlayfs.patch b/packages/linux/patches/aarch64/linux-002-backport_overlayfs.patch new file mode 100644 index 0000000000..64fa02d97b --- /dev/null +++ b/packages/linux/patches/aarch64/linux-002-backport_overlayfs.patch @@ -0,0 +1,11997 @@ +From c82d759a596eb688e804ec1d1727f78d042b26fb Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Tue, 1 Apr 2014 17:08:42 +0200 +Subject: [PATCH 01/73] vfs: rename: move d_move() up + +Move the d_move() in vfs_rename_dir() up, similarly to how it's done in +vfs_rename_other(). The next patch will consolidate these two functions +and this is the only structural difference between them. + +I'm not sure if doing the d_move() after the dput is even valid. But there +may be a logical explanation for that. But moving the d_move() before the +dput() (and the mutex_unlock()) should definitely not hurt. + +Signed-off-by: Miklos Szeredi +Reviewed-by: J. Bruce Fields +(cherry picked from commit de22a4c3720a96f1c2ebf12b0857b6db6a991f2c) +Signed-off-by: Alex Shi +--- + fs/namei.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/fs/namei.c b/fs/namei.c +index 0dd72c8..f8a6d19 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4049,13 +4049,12 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, + target->i_flags |= S_DEAD; + dont_mount(new_dentry); + } ++ if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) ++ d_move(old_dentry, new_dentry); + out: + if (target) + mutex_unlock(&target->i_mutex); + dput(new_dentry); +- if (!error) +- if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) +- d_move(old_dentry,new_dentry); + return error; + } + +-- +2.7.4 + + +From 75a4b5ddf548bcfb932cc0ba8cb8eb7806ae9dbc Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Tue, 1 Apr 2014 17:08:42 +0200 +Subject: [PATCH 02/73] vfs: rename: use common code for dir and non-dir + +There's actually very little difference between vfs_rename_dir() and +vfs_rename_other() so move both inline into vfs_rename() which still stays +reasonably readable. + +Signed-off-by: Miklos Szeredi +Reviewed-by: J. Bruce Fields +(cherry picked from commit bc27027a73e8b80376b51a1583ad1c7445605e8a) +Signed-off-by: Alex Shi +--- + fs/namei.c | 187 +++++++++++++++++++++++++------------------------------------ + 1 file changed, 75 insertions(+), 112 deletions(-) + +diff --git a/fs/namei.c b/fs/namei.c +index f8a6d19..4fa9e66 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -3977,7 +3977,27 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname + return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); + } + +-/* ++/** ++ * vfs_rename - rename a filesystem object ++ * @old_dir: parent of source ++ * @old_dentry: source ++ * @new_dir: parent of destination ++ * @new_dentry: destination ++ * @delegated_inode: returns an inode needing a delegation break ++ * ++ * The caller must hold multiple mutexes--see lock_rename()). ++ * ++ * If vfs_rename discovers a delegation in need of breaking at either ++ * the source or destination, it will return -EWOULDBLOCK and return a ++ * reference to the inode in delegated_inode. The caller should then ++ * break the delegation and retry. Because breaking a delegation may ++ * take a long time, the caller should drop all locks before doing ++ * so. ++ * ++ * Alternatively, a caller may pass NULL for delegated_inode. This may ++ * be appropriate for callers that expect the underlying filesystem not ++ * to be NFS exported. ++ * + * The worst of all namespace operations - renaming directory. "Perverted" + * doesn't even start to describe it. Somebody in UCB had a heck of a trip... + * Problems: +@@ -4005,19 +4025,39 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname + * ->i_mutex on parents, which works but leads to some truly excessive + * locking]. + */ +-static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry, ++ struct inode **delegated_inode) + { +- int error = 0; ++ int error; ++ bool is_dir = d_is_dir(old_dentry); ++ const unsigned char *old_name; ++ struct inode *source = old_dentry->d_inode; + struct inode *target = new_dentry->d_inode; +- unsigned max_links = new_dir->i_sb->s_max_links; ++ ++ if (source == target) ++ return 0; ++ ++ error = may_delete(old_dir, old_dentry, is_dir); ++ if (error) ++ return error; ++ ++ if (!target) ++ error = may_create(new_dir, new_dentry); ++ else ++ error = may_delete(new_dir, new_dentry, is_dir); ++ if (error) ++ return error; ++ ++ if (!old_dir->i_op->rename) ++ return -EPERM; + + /* + * If we are going to change the parent - check write permissions, + * we'll need to flip '..'. + */ +- if (new_dir != old_dir) { +- error = inode_permission(old_dentry->d_inode, MAY_WRITE); ++ if (is_dir && new_dir != old_dir) { ++ error = inode_permission(source, MAY_WRITE); + if (error) + return error; + } +@@ -4026,134 +4066,57 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, + if (error) + return error; + ++ old_name = fsnotify_oldname_init(old_dentry->d_name.name); + dget(new_dentry); +- if (target) ++ if (!is_dir) ++ lock_two_nondirectories(source, target); ++ else if (target) + mutex_lock(&target->i_mutex); + + error = -EBUSY; + if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) + goto out; + +- error = -EMLINK; +- if (max_links && !target && new_dir != old_dir && +- new_dir->i_nlink >= max_links) +- goto out; +- +- if (target) +- shrink_dcache_parent(new_dentry); +- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); +- if (error) +- goto out; +- +- if (target) { +- target->i_flags |= S_DEAD; +- dont_mount(new_dentry); +- } +- if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) +- d_move(old_dentry, new_dentry); +-out: +- if (target) +- mutex_unlock(&target->i_mutex); +- dput(new_dentry); +- return error; +-} +- +-static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry, +- struct inode **delegated_inode) +-{ +- struct inode *target = new_dentry->d_inode; +- struct inode *source = old_dentry->d_inode; +- int error; +- +- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); +- if (error) +- return error; +- +- dget(new_dentry); +- lock_two_nondirectories(source, target); ++ if (is_dir) { ++ unsigned max_links = new_dir->i_sb->s_max_links; + +- error = -EBUSY; +- if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) +- goto out; ++ error = -EMLINK; ++ if (max_links && !target && new_dir != old_dir && ++ new_dir->i_nlink >= max_links) ++ goto out; + +- error = try_break_deleg(source, delegated_inode); +- if (error) +- goto out; +- if (target) { +- error = try_break_deleg(target, delegated_inode); ++ if (target) ++ shrink_dcache_parent(new_dentry); ++ } else { ++ error = try_break_deleg(source, delegated_inode); + if (error) + goto out; ++ if (target) { ++ error = try_break_deleg(target, delegated_inode); ++ if (error) ++ goto out; ++ } + } + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (error) + goto out; + +- if (target) ++ if (target) { ++ if (is_dir) ++ target->i_flags |= S_DEAD; + dont_mount(new_dentry); ++ } + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) + d_move(old_dentry, new_dentry); + out: +- unlock_two_nondirectories(source, target); ++ if (!is_dir) ++ unlock_two_nondirectories(source, target); ++ else if (target) ++ mutex_unlock(&target->i_mutex); + dput(new_dentry); +- return error; +-} +- +-/** +- * vfs_rename - rename a filesystem object +- * @old_dir: parent of source +- * @old_dentry: source +- * @new_dir: parent of destination +- * @new_dentry: destination +- * @delegated_inode: returns an inode needing a delegation break +- * +- * The caller must hold multiple mutexes--see lock_rename()). +- * +- * If vfs_rename discovers a delegation in need of breaking at either +- * the source or destination, it will return -EWOULDBLOCK and return a +- * reference to the inode in delegated_inode. The caller should then +- * break the delegation and retry. Because breaking a delegation may +- * take a long time, the caller should drop all locks before doing +- * so. +- * +- * Alternatively, a caller may pass NULL for delegated_inode. This may +- * be appropriate for callers that expect the underlying filesystem not +- * to be NFS exported. +- */ +-int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry, +- struct inode **delegated_inode) +-{ +- int error; +- int is_dir = d_is_dir(old_dentry); +- const unsigned char *old_name; +- +- if (old_dentry->d_inode == new_dentry->d_inode) +- return 0; +- +- error = may_delete(old_dir, old_dentry, is_dir); +- if (error) +- return error; +- +- if (!new_dentry->d_inode) +- error = may_create(new_dir, new_dentry); +- else +- error = may_delete(new_dir, new_dentry, is_dir); +- if (error) +- return error; +- +- if (!old_dir->i_op->rename) +- return -EPERM; +- +- old_name = fsnotify_oldname_init(old_dentry->d_name.name); +- +- if (is_dir) +- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); +- else +- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); + if (!error) + fsnotify_move(old_dir, new_dir, old_name, is_dir, +- new_dentry->d_inode, old_dentry); ++ target, old_dentry); + fsnotify_oldname_free(old_name); + + return error; +-- +2.7.4 + + +From 148622f8708456dda3262bbbe08742a962a0d667 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Tue, 1 Apr 2014 17:08:42 +0200 +Subject: [PATCH 03/73] vfs: add renameat2 syscall + +Add new renameat2 syscall, which is the same as renameat with an added +flags argument. + +Pass flags to vfs_rename() and to i_op->rename() as well. + +Signed-off-by: Miklos Szeredi +Reviewed-by: J. Bruce Fields +(cherry picked from commit 520c8b16505236fc82daa352e6c5e73cd9870cff) +Signed-off-by: Alex Shi +--- + Documentation/filesystems/Locking | 6 +++- + Documentation/filesystems/vfs.txt | 16 ++++++++++ + arch/x86/syscalls/syscall_64.tbl | 1 + + .../lustre/lustre/include/linux/lustre_compat25.h | 4 +-- + drivers/staging/lustre/lustre/lvfs/lvfs_linux.c | 2 +- + fs/cachefiles/namei.c | 2 +- + fs/ecryptfs/inode.c | 2 +- + fs/namei.c | 34 +++++++++++++++++----- + fs/nfsd/vfs.c | 2 +- + include/linux/fs.h | 4 ++- + 10 files changed, 58 insertions(+), 15 deletions(-) + +diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking +index 5b0c083..f424e0e 100644 +--- a/Documentation/filesystems/Locking ++++ b/Documentation/filesystems/Locking +@@ -47,6 +47,8 @@ prototypes: + int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); ++ int (*rename2) (struct inode *, struct dentry *, ++ struct inode *, struct dentry *, unsigned int); + int (*readlink) (struct dentry *, char __user *,int); + void * (*follow_link) (struct dentry *, struct nameidata *); + void (*put_link) (struct dentry *, struct nameidata *, void *); +@@ -78,6 +80,7 @@ mkdir: yes + unlink: yes (both) + rmdir: yes (both) (see below) + rename: yes (all) (see below) ++rename2: yes (all) (see below) + readlink: no + follow_link: no + put_link: no +@@ -96,7 +99,8 @@ tmpfile: no + + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on + victim. +- cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. ++ cross-directory ->rename() and rename2() has (per-superblock) ++->s_vfs_rename_sem. + + See Documentation/filesystems/directory-locking for more detailed discussion + of the locking scheme for directory operations. +diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt +index c53784c..94eb862 100644 +--- a/Documentation/filesystems/vfs.txt ++++ b/Documentation/filesystems/vfs.txt +@@ -347,6 +347,8 @@ struct inode_operations { + int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); ++ int (*rename2) (struct inode *, struct dentry *, ++ struct inode *, struct dentry *, unsigned int); + int (*readlink) (struct dentry *, char __user *,int); + void * (*follow_link) (struct dentry *, struct nameidata *); + void (*put_link) (struct dentry *, struct nameidata *, void *); +@@ -414,6 +416,20 @@ otherwise noted. + rename: called by the rename(2) system call to rename the object to + have the parent and name given by the second inode and dentry. + ++ rename2: this has an additional flags argument compared to rename. ++ If no flags are supported by the filesystem then this method ++ need not be implemented. If some flags are supported then the ++ filesystem must return -EINVAL for any unsupported or unknown ++ flags. Currently the following flags are implemented: ++ (1) RENAME_NOREPLACE: this flag indicates that if the target ++ of the rename exists the rename should fail with -EEXIST ++ instead of replacing the target. The VFS already checks for ++ existence, so for local filesystems the RENAME_NOREPLACE ++ implementation is equivalent to plain rename. ++ (2) RENAME_EXCHANGE: exchange source and target. Both must ++ exist; this is checked by the VFS. Unlike plain rename, ++ source and target may be of different type. ++ + readlink: called by the readlink(2) system call. Only required if + you want to support reading symbolic links + +diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h +index eefdb8d..81cc7a0 100644 +--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h ++++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h +@@ -105,8 +105,8 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, + #define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry) + #define ll_vfs_mknod(dir,entry,mnt,mode,dev) vfs_mknod(dir,entry,mode,dev) + #define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry) +-#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1,delegated_inode) \ +- vfs_rename(old,old_dir,new,new_dir,delegated_inode) ++#define ll_vfs_rename(old, old_dir, mnt, new, new_dir, mnt1) \ ++ vfs_rename(old, old_dir, new, new_dir, NULL, 0) + + #define cfs_bio_io_error(a,b) bio_io_error((a)) + #define cfs_bio_endio(a,b,c) bio_endio((a),(c)) +diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c +index 428ffd8..d50822b 100644 +--- a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c ++++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c +@@ -223,7 +223,7 @@ int lustre_rename(struct dentry *dir, struct vfsmount *mnt, + GOTO(put_old, err = PTR_ERR(dchild_new)); + + err = ll_vfs_rename(dir->d_inode, dchild_old, mnt, +- dir->d_inode, dchild_new, mnt, NULL); ++ dir->d_inode, dchild_new, mnt); + + dput(dchild_new); + put_old: +diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c +index ca65f39..31088a9 100644 +--- a/fs/cachefiles/namei.c ++++ b/fs/cachefiles/namei.c +@@ -396,7 +396,7 @@ try_again: + cachefiles_io_error(cache, "Rename security error %d", ret); + } else { + ret = vfs_rename(dir->d_inode, rep, +- cache->graveyard->d_inode, grave, NULL); ++ cache->graveyard->d_inode, grave, NULL, 0); + if (ret != 0 && ret != -ENOMEM) + cachefiles_io_error(cache, + "Rename failed with error %d", ret); +diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c +index a85ceb7..57ee4c5 100644 +--- a/fs/ecryptfs/inode.c ++++ b/fs/ecryptfs/inode.c +@@ -641,7 +641,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, + } + rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, + lower_new_dir_dentry->d_inode, lower_new_dentry, +- NULL); ++ NULL, 0); + if (rc) + goto out_lock; + if (target_inode) +diff --git a/fs/namei.c b/fs/namei.c +index 4fa9e66..5134f8c 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -3984,6 +3984,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname + * @new_dir: parent of destination + * @new_dentry: destination + * @delegated_inode: returns an inode needing a delegation break ++ * @flags: rename flags + * + * The caller must hold multiple mutexes--see lock_rename()). + * +@@ -4027,7 +4028,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname + */ + int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, +- struct inode **delegated_inode) ++ struct inode **delegated_inode, unsigned int flags) + { + int error; + bool is_dir = d_is_dir(old_dentry); +@@ -4052,6 +4053,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + if (!old_dir->i_op->rename) + return -EPERM; + ++ if (flags && !old_dir->i_op->rename2) ++ return -EINVAL; ++ + /* + * If we are going to change the parent - check write permissions, + * we'll need to flip '..'. +@@ -4097,7 +4101,13 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + goto out; + } + } +- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); ++ if (!flags) { ++ error = old_dir->i_op->rename(old_dir, old_dentry, ++ new_dir, new_dentry); ++ } else { ++ error = old_dir->i_op->rename2(old_dir, old_dentry, ++ new_dir, new_dentry, flags); ++ } + if (error) + goto out; + +@@ -4122,8 +4132,8 @@ out: + return error; + } + +-SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, +- int, newdfd, const char __user *, newname) ++SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, ++ int, newdfd, const char __user *, newname, unsigned int, flags) + { + struct dentry *old_dir, *new_dir; + struct dentry *old_dentry, *new_dentry; +@@ -4135,6 +4145,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, + unsigned int lookup_flags = 0; + bool should_retry = false; + int error; ++ ++ if (flags) ++ return -EINVAL; ++ + retry: + from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); + if (IS_ERR(from)) { +@@ -4206,8 +4220,8 @@ retry_deleg: + if (error) + goto exit5; + error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry, +- &delegated_inode); ++ new_dir->d_inode, new_dentry, ++ &delegated_inode, flags); + exit5: + dput(new_dentry); + exit4: +@@ -4237,9 +4251,15 @@ exit: + return error; + } + ++SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, ++ int, newdfd, const char __user *, newname) ++{ ++ return sys_renameat2(olddfd, oldname, newdfd, newname, 0); ++} ++ + SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) + { +- return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); ++ return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); + } + + int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) +diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c +index eea5ad1..464f813 100644 +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1703,7 +1703,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, + if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) + goto out_dput_new; + +- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); ++ host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); + if (!host_err) { + host_err = commit_metadata(tfhp); + if (!host_err) +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 23b2a35..3b3670e 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1460,7 +1460,7 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *); + extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); + extern int vfs_rmdir(struct inode *, struct dentry *); + extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); +-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **); ++extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); + + /* + * VFS dentry helper functions. +@@ -1571,6 +1571,8 @@ struct inode_operations { + int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); ++ int (*rename2) (struct inode *, struct dentry *, ++ struct inode *, struct dentry *, unsigned int); + int (*setattr) (struct dentry *, struct iattr *); + int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); +-- +2.7.4 + + +From f1bdf915fa37836cc39f76e86c65521504deb7e1 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Tue, 1 Apr 2014 17:08:43 +0200 +Subject: [PATCH 04/73] vfs: add RENAME_NOREPLACE flag + +If this flag is specified and the target of the rename exists then the +rename syscall fails with EEXIST. + +The VFS does the existence checking, so it is trivial to enable for most +local filesystems. This patch only enables it in ext4. + +For network filesystems the VFS check is not enough as there may be a race +between a remote create and the rename, so these filesystems need to handle +this flag in their ->rename() implementations to ensure atomicity. + +Andy writes about why this is useful: + +"The trivial answer: to eliminate the race condition from 'mv -i'. + +Another answer: there's a common pattern to atomically create a file +with contents: open a temporary file, write to it, optionally fsync +it, close it, then link(2) it to the final name, then unlink the +temporary file. + +The reason to use link(2) is because it won't silently clobber the destination. + +This is annoying: + - It requires an extra system call that shouldn't be necessary. + - It doesn't work on (IMO sensible) filesystems that don't support +hard links (e.g. vfat). + - It's not atomic -- there's an intermediate state where both files exist. + - It's ugly. + +The new rename flag will make this totally sensible. + +To be fair, on new enough kernels, you can also use O_TMPFILE and +linkat to achieve the same thing even more cleanly." + +Suggested-by: Andy Lutomirski +Signed-off-by: Miklos Szeredi +Reviewed-by: J. Bruce Fields +(cherry picked from commit 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c) +Signed-off-by: Alex Shi +--- + fs/ext4/namei.c | 11 +++++++++++ + fs/namei.c | 21 +++++++++++++-------- + include/uapi/linux/fs.h | 2 ++ + 3 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 2dcbfb6..f0598d6 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -3191,6 +3191,16 @@ end_rename: + return retval; + } + ++static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry, ++ unsigned int flags) ++{ ++ if (flags & ~RENAME_NOREPLACE) ++ return -EINVAL; ++ ++ return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); ++} ++ + /* + * directories can handle most operations... + */ +@@ -3205,6 +3215,7 @@ const struct inode_operations ext4_dir_inode_operations = { + .mknod = ext4_mknod, + .tmpfile = ext4_tmpfile, + .rename = ext4_rename, ++ .rename2 = ext4_rename2, + .setattr = ext4_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, +diff --git a/fs/namei.c b/fs/namei.c +index 5134f8c..3f2cc3c 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4146,7 +4146,7 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, + bool should_retry = false; + int error; + +- if (flags) ++ if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + retry: +@@ -4172,6 +4172,8 @@ retry: + goto exit2; + + new_dir = newnd.path.dentry; ++ if (flags & RENAME_NOREPLACE) ++ error = -EEXIST; + if (newnd.last_type != LAST_NORM) + goto exit2; + +@@ -4194,22 +4196,25 @@ retry_deleg: + error = -ENOENT; + if (d_is_negative(old_dentry)) + goto exit4; ++ new_dentry = lookup_hash(&newnd); ++ error = PTR_ERR(new_dentry); ++ if (IS_ERR(new_dentry)) ++ goto exit4; ++ error = -EEXIST; ++ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) ++ goto exit5; + /* unless the source is a directory trailing slashes give -ENOTDIR */ + if (!d_is_dir(old_dentry)) { + error = -ENOTDIR; + if (oldnd.last.name[oldnd.last.len]) +- goto exit4; ++ goto exit5; + if (newnd.last.name[newnd.last.len]) +- goto exit4; ++ goto exit5; + } + /* source should not be ancestor of target */ + error = -EINVAL; + if (old_dentry == trap) +- goto exit4; +- new_dentry = lookup_hash(&newnd); +- error = PTR_ERR(new_dentry); +- if (IS_ERR(new_dentry)) +- goto exit4; ++ goto exit5; + /* target should not be an ancestor of source */ + error = -ENOTEMPTY; + if (new_dentry == trap) +diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h +index 6c28b61..9250f4d 100644 +--- a/include/uapi/linux/fs.h ++++ b/include/uapi/linux/fs.h +@@ -35,6 +35,8 @@ + #define SEEK_HOLE 4 /* seek to the next hole */ + #define SEEK_MAX SEEK_HOLE + ++#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ ++ + struct fstrim_range { + __u64 start; + __u64 len; +-- +2.7.4 + + +From 5371fcd46e9276f4216e329ba2714d595a384a1f Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Tue, 1 Apr 2014 17:08:43 +0200 +Subject: [PATCH 05/73] security: add flags to rename hooks + +Add flags to security_path_rename() and security_inode_rename() hooks. + +Signed-off-by: Miklos Szeredi +Reviewed-by: J. Bruce Fields +(cherry picked from commit 0b3974eb04c4874e85fa1d4fc70450d12f28611d) +Signed-off-by: Alex Shi +--- + fs/cachefiles/namei.c | 2 +- + fs/namei.c | 5 +++-- + include/linux/security.h | 12 ++++++++---- + security/security.c | 6 ++++-- + 4 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c +index 31088a9..6494d9f 100644 +--- a/fs/cachefiles/namei.c ++++ b/fs/cachefiles/namei.c +@@ -391,7 +391,7 @@ try_again: + path.dentry = dir; + path_to_graveyard.mnt = cache->mnt; + path_to_graveyard.dentry = cache->graveyard; +- ret = security_path_rename(&path, rep, &path_to_graveyard, grave); ++ ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0); + if (ret < 0) { + cachefiles_io_error(cache, "Rename security error %d", ret); + } else { +diff --git a/fs/namei.c b/fs/namei.c +index 3f2cc3c..137a7b8 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4066,7 +4066,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + return error; + } + +- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); ++ error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, ++ flags); + if (error) + return error; + +@@ -4221,7 +4222,7 @@ retry_deleg: + goto exit5; + + error = security_path_rename(&oldnd.path, old_dentry, +- &newnd.path, new_dentry); ++ &newnd.path, new_dentry, flags); + if (error) + goto exit5; + error = vfs_rename(old_dir->d_inode, old_dentry, +diff --git a/include/linux/security.h b/include/linux/security.h +index 2fc42d1..6478ce3 100644 +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -1793,7 +1793,8 @@ int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) + int security_inode_rmdir(struct inode *dir, struct dentry *dentry); + int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev); + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry); ++ struct inode *new_dir, struct dentry *new_dentry, ++ unsigned int flags); + int security_inode_readlink(struct dentry *dentry); + int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); + int security_inode_permission(struct inode *inode, int mask); +@@ -2161,7 +2162,8 @@ static inline int security_inode_mknod(struct inode *dir, + static inline int security_inode_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, +- struct dentry *new_dentry) ++ struct dentry *new_dentry, ++ unsigned int flags) + { + return 0; + } +@@ -2955,7 +2957,8 @@ int security_path_symlink(struct path *dir, struct dentry *dentry, + int security_path_link(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry); + int security_path_rename(struct path *old_dir, struct dentry *old_dentry, +- struct path *new_dir, struct dentry *new_dentry); ++ struct path *new_dir, struct dentry *new_dentry, ++ unsigned int flags); + int security_path_chmod(struct path *path, umode_t mode); + int security_path_chown(struct path *path, kuid_t uid, kgid_t gid); + int security_path_chroot(struct path *path); +@@ -3003,7 +3006,8 @@ static inline int security_path_link(struct dentry *old_dentry, + static inline int security_path_rename(struct path *old_dir, + struct dentry *old_dentry, + struct path *new_dir, +- struct dentry *new_dentry) ++ struct dentry *new_dentry, ++ unsigned int flags) + { + return 0; + } +diff --git a/security/security.c b/security/security.c +index 919cad9..284fbc9 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -433,7 +433,8 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, + } + + int security_path_rename(struct path *old_dir, struct dentry *old_dentry, +- struct path *new_dir, struct dentry *new_dentry) ++ struct path *new_dir, struct dentry *new_dentry, ++ unsigned int flags) + { + if (unlikely(IS_PRIVATE(old_dentry->d_inode) || + (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) +@@ -524,7 +525,8 @@ int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, + } + + int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry, ++ unsigned int flags) + { + if (unlikely(IS_PRIVATE(old_dentry->d_inode) || + (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) +-- +2.7.4 + + +From c3ae3e8840e6b555949cda895d885ad91c5e3f24 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Tue, 1 Apr 2014 17:08:43 +0200 +Subject: [PATCH 06/73] vfs: add cross-rename + +If flags contain RENAME_EXCHANGE then exchange source and destination files. +There's no restriction on the type of the files; e.g. a directory can be +exchanged with a symlink. + +Signed-off-by: Miklos Szeredi +Reviewed-by: Jan Kara +Reviewed-by: J. Bruce Fields +(cherry picked from commit da1ce0670c14d8380e423a3239e562a1dc15fa9e) +Signed-off-by: Alex Shi +--- + fs/dcache.c | 50 ++++++++++++++++++----- + fs/namei.c | 104 +++++++++++++++++++++++++++++++++--------------- + include/linux/dcache.h | 1 + + include/uapi/linux/fs.h | 1 + + security/security.c | 16 ++++++++ + 5 files changed, 131 insertions(+), 41 deletions(-) + +diff --git a/fs/dcache.c b/fs/dcache.c +index 4366127..e3c5563 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2481,12 +2481,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) + dentry->d_name.name = dentry->d_iname; + } else { + /* +- * Both are internal. Just copy target to dentry ++ * Both are internal. + */ +- memcpy(dentry->d_iname, target->d_name.name, +- target->d_name.len + 1); +- dentry->d_name.len = target->d_name.len; +- return; ++ unsigned int i; ++ BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); ++ for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { ++ swap(((long *) &dentry->d_iname)[i], ++ ((long *) &target->d_iname)[i]); ++ } + } + } + swap(dentry->d_name.len, target->d_name.len); +@@ -2543,13 +2545,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, + * __d_move - move a dentry + * @dentry: entry to move + * @target: new dentry ++ * @exchange: exchange the two dentries + * + * Update the dcache to reflect the move of a file name. Negative + * dcache entries should not be moved in this way. Caller must hold + * rename_lock, the i_mutex of the source and target directories, + * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). + */ +-static void __d_move(struct dentry * dentry, struct dentry * target) ++static void __d_move(struct dentry *dentry, struct dentry *target, ++ bool exchange) + { + if (!dentry->d_inode) + printk(KERN_WARNING "VFS: moving negative dcache entry\n"); +@@ -2571,8 +2575,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) + __d_drop(dentry); + __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); + +- /* Unhash the target: dput() will then get rid of it */ ++ /* ++ * Unhash the target (d_delete() is not usable here). If exchanging ++ * the two dentries, then rehash onto the other's hash queue. ++ */ + __d_drop(target); ++ if (exchange) { ++ __d_rehash(target, ++ d_hash(dentry->d_parent, dentry->d_name.hash)); ++ } + + list_del(&dentry->d_u.d_child); + list_del(&target->d_u.d_child); +@@ -2599,6 +2610,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) + write_seqcount_end(&dentry->d_seq); + + dentry_unlock_parents_for_move(dentry, target); ++ if (exchange) ++ fsnotify_d_move(target); + spin_unlock(&target->d_lock); + fsnotify_d_move(dentry); + spin_unlock(&dentry->d_lock); +@@ -2616,11 +2629,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target) + void d_move(struct dentry *dentry, struct dentry *target) + { + write_seqlock(&rename_lock); +- __d_move(dentry, target); ++ __d_move(dentry, target, false); + write_sequnlock(&rename_lock); + } + EXPORT_SYMBOL(d_move); + ++/* ++ * d_exchange - exchange two dentries ++ * @dentry1: first dentry ++ * @dentry2: second dentry ++ */ ++void d_exchange(struct dentry *dentry1, struct dentry *dentry2) ++{ ++ write_seqlock(&rename_lock); ++ ++ WARN_ON(!dentry1->d_inode); ++ WARN_ON(!dentry2->d_inode); ++ WARN_ON(IS_ROOT(dentry1)); ++ WARN_ON(IS_ROOT(dentry2)); ++ ++ __d_move(dentry1, dentry2, true); ++ ++ write_sequnlock(&rename_lock); ++} ++ + /** + * d_ancestor - search for an ancestor + * @p1: ancestor dentry +@@ -2668,7 +2700,7 @@ static struct dentry *__d_unalias(struct inode *inode, + m2 = &alias->d_parent->d_inode->i_mutex; + out_unalias: + if (likely(!d_mountpoint(alias))) { +- __d_move(alias, dentry); ++ __d_move(alias, dentry, false); + ret = alias; + } + out_err: +diff --git a/fs/namei.c b/fs/namei.c +index 137a7b8..5e41971 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4035,6 +4035,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + const unsigned char *old_name; + struct inode *source = old_dentry->d_inode; + struct inode *target = new_dentry->d_inode; ++ bool new_is_dir = false; ++ unsigned max_links = new_dir->i_sb->s_max_links; + + if (source == target) + return 0; +@@ -4043,10 +4045,16 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + if (error) + return error; + +- if (!target) ++ if (!target) { + error = may_create(new_dir, new_dentry); +- else +- error = may_delete(new_dir, new_dentry, is_dir); ++ } else { ++ new_is_dir = d_is_dir(new_dentry); ++ ++ if (!(flags & RENAME_EXCHANGE)) ++ error = may_delete(new_dir, new_dentry, is_dir); ++ else ++ error = may_delete(new_dir, new_dentry, new_is_dir); ++ } + if (error) + return error; + +@@ -4060,10 +4068,17 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + * If we are going to change the parent - check write permissions, + * we'll need to flip '..'. + */ +- if (is_dir && new_dir != old_dir) { +- error = inode_permission(source, MAY_WRITE); +- if (error) +- return error; ++ if (new_dir != old_dir) { ++ if (is_dir) { ++ error = inode_permission(source, MAY_WRITE); ++ if (error) ++ return error; ++ } ++ if ((flags & RENAME_EXCHANGE) && new_is_dir) { ++ error = inode_permission(target, MAY_WRITE); ++ if (error) ++ return error; ++ } + } + + error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, +@@ -4073,7 +4088,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + + old_name = fsnotify_oldname_init(old_dentry->d_name.name); + dget(new_dentry); +- if (!is_dir) ++ if (!is_dir || (flags & RENAME_EXCHANGE)) + lock_two_nondirectories(source, target); + else if (target) + mutex_lock(&target->i_mutex); +@@ -4082,25 +4097,25 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) + goto out; + +- if (is_dir) { +- unsigned max_links = new_dir->i_sb->s_max_links; +- ++ if (max_links && new_dir != old_dir) { + error = -EMLINK; +- if (max_links && !target && new_dir != old_dir && +- new_dir->i_nlink >= max_links) ++ if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) + goto out; +- +- if (target) +- shrink_dcache_parent(new_dentry); +- } else { ++ if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && ++ old_dir->i_nlink >= max_links) ++ goto out; ++ } ++ if (is_dir && !(flags & RENAME_EXCHANGE) && target) ++ shrink_dcache_parent(new_dentry); ++ if (!is_dir) { + error = try_break_deleg(source, delegated_inode); + if (error) + goto out; +- if (target) { +- error = try_break_deleg(target, delegated_inode); +- if (error) +- goto out; +- } ++ } ++ if (target && !new_is_dir) { ++ error = try_break_deleg(target, delegated_inode); ++ if (error) ++ goto out; + } + if (!flags) { + error = old_dir->i_op->rename(old_dir, old_dentry, +@@ -4112,22 +4127,31 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + if (error) + goto out; + +- if (target) { ++ if (!(flags & RENAME_EXCHANGE) && target) { + if (is_dir) + target->i_flags |= S_DEAD; + dont_mount(new_dentry); + } +- if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) +- d_move(old_dentry, new_dentry); ++ if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { ++ if (!(flags & RENAME_EXCHANGE)) ++ d_move(old_dentry, new_dentry); ++ else ++ d_exchange(old_dentry, new_dentry); ++ } + out: +- if (!is_dir) ++ if (!is_dir || (flags & RENAME_EXCHANGE)) + unlock_two_nondirectories(source, target); + else if (target) + mutex_unlock(&target->i_mutex); + dput(new_dentry); +- if (!error) ++ if (!error) { + fsnotify_move(old_dir, new_dir, old_name, is_dir, +- target, old_dentry); ++ !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); ++ if (flags & RENAME_EXCHANGE) { ++ fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, ++ new_is_dir, NULL, new_dentry); ++ } ++ } + fsnotify_oldname_free(old_name); + + return error; +@@ -4147,7 +4171,10 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, + bool should_retry = false; + int error; + +- if (flags & ~RENAME_NOREPLACE) ++ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) ++ return -EINVAL; ++ ++ if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) + return -EINVAL; + + retry: +@@ -4184,7 +4211,8 @@ retry: + + oldnd.flags &= ~LOOKUP_PARENT; + newnd.flags &= ~LOOKUP_PARENT; +- newnd.flags |= LOOKUP_RENAME_TARGET; ++ if (!(flags & RENAME_EXCHANGE)) ++ newnd.flags |= LOOKUP_RENAME_TARGET; + + retry_deleg: + trap = lock_rename(new_dir, old_dir); +@@ -4204,12 +4232,23 @@ retry_deleg: + error = -EEXIST; + if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) + goto exit5; ++ if (flags & RENAME_EXCHANGE) { ++ error = -ENOENT; ++ if (d_is_negative(new_dentry)) ++ goto exit5; ++ ++ if (!d_is_dir(new_dentry)) { ++ error = -ENOTDIR; ++ if (newnd.last.name[newnd.last.len]) ++ goto exit5; ++ } ++ } + /* unless the source is a directory trailing slashes give -ENOTDIR */ + if (!d_is_dir(old_dentry)) { + error = -ENOTDIR; + if (oldnd.last.name[oldnd.last.len]) + goto exit5; +- if (newnd.last.name[newnd.last.len]) ++ if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) + goto exit5; + } + /* source should not be ancestor of target */ +@@ -4217,7 +4256,8 @@ retry_deleg: + if (old_dentry == trap) + goto exit5; + /* target should not be an ancestor of source */ +- error = -ENOTEMPTY; ++ if (!(flags & RENAME_EXCHANGE)) ++ error = -ENOTEMPTY; + if (new_dentry == trap) + goto exit5; + +diff --git a/include/linux/dcache.h b/include/linux/dcache.h +index 3b50cac..3b9bfdb 100644 +--- a/include/linux/dcache.h ++++ b/include/linux/dcache.h +@@ -308,6 +308,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *); + + /* used for rename() and baskets */ + extern void d_move(struct dentry *, struct dentry *); ++extern void d_exchange(struct dentry *, struct dentry *); + extern struct dentry *d_ancestor(struct dentry *, struct dentry *); + + /* appendix may either be NULL or be used for transname suffixes */ +diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h +index 9250f4d..ca1a11b 100644 +--- a/include/uapi/linux/fs.h ++++ b/include/uapi/linux/fs.h +@@ -36,6 +36,7 @@ + #define SEEK_MAX SEEK_HOLE + + #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ ++#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ + + struct fstrim_range { + __u64 start; +diff --git a/security/security.c b/security/security.c +index 284fbc9..8b774f3 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -439,6 +439,14 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, + if (unlikely(IS_PRIVATE(old_dentry->d_inode) || + (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) + return 0; ++ ++ if (flags & RENAME_EXCHANGE) { ++ int err = security_ops->path_rename(new_dir, new_dentry, ++ old_dir, old_dentry); ++ if (err) ++ return err; ++ } ++ + return security_ops->path_rename(old_dir, old_dentry, new_dir, + new_dentry); + } +@@ -531,6 +539,14 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, + if (unlikely(IS_PRIVATE(old_dentry->d_inode) || + (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) + return 0; ++ ++ if (flags & RENAME_EXCHANGE) { ++ int err = security_ops->inode_rename(new_dir, new_dentry, ++ old_dir, old_dentry); ++ if (err) ++ return err; ++ } ++ + return security_ops->inode_rename(old_dir, old_dentry, + new_dir, new_dentry); + } +-- +2.7.4 + + +From a756aea51bb937b6a150c4d1cfff66824285491d Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Wed, 23 Jul 2014 15:15:30 +0200 +Subject: [PATCH 07/73] fs: call rename2 if exists + +Christoph Hellwig suggests: + +1) make vfs_rename call ->rename2 if it exists instead of ->rename +2) switch all filesystems that you're adding NOREPLACE support for to + use ->rename2 +3) see how many ->rename instances we'll have left after a few + iterations of 2. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Christoph Hellwig +Signed-off-by: Al Viro +(cherry picked from commit 7177a9c4b509eb357cc450256bc3cf39f1a1e639) +Signed-off-by: Alex Shi + + Conflicts: + fs/fuse/dir.c +--- + fs/ext4/namei.c | 1 - + fs/fuse/dir.c | 32 +++++++++++++++++++++++++++++++- + fs/namei.c | 5 +++-- + 3 files changed, 34 insertions(+), 4 deletions(-) + +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index f0598d6..98de4b3 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -3214,7 +3214,6 @@ const struct inode_operations ext4_dir_inode_operations = { + .rmdir = ext4_rmdir, + .mknod = ext4_mknod, + .tmpfile = ext4_tmpfile, +- .rename = ext4_rename, + .rename2 = ext4_rename2, + .setattr = ext4_setattr, + .setxattr = generic_setxattr, +diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c +index 342f0239..d53a7d9 100644 +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -796,6 +796,36 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, + return err; + } + ++static int fuse_rename2(struct inode *olddir, struct dentry *oldent, ++ struct inode *newdir, struct dentry *newent, ++ unsigned int flags) ++{ ++ struct fuse_conn *fc = get_fuse_conn(olddir); ++ int err; ++ ++ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) ++ return -EINVAL; ++ ++ if (flags) { ++ if (fc->no_rename2 || fc->minor < 23) ++ return -EINVAL; ++ ++ err = fuse_rename_common(olddir, oldent, newdir, newent, flags, ++ FUSE_RENAME2, ++ sizeof(struct fuse_rename2_in)); ++ if (err == -ENOSYS) { ++ fc->no_rename2 = 1; ++ err = -EINVAL; ++ } ++ } else { ++ err = fuse_rename_common(olddir, oldent, newdir, newent, 0, ++ FUSE_RENAME, ++ sizeof(struct fuse_rename_in)); ++ } ++ ++ return err; ++} ++ + static int fuse_link(struct dentry *entry, struct inode *newdir, + struct dentry *newent) + { +@@ -1882,7 +1912,7 @@ static const struct inode_operations fuse_dir_inode_operations = { + .symlink = fuse_symlink, + .unlink = fuse_unlink, + .rmdir = fuse_rmdir, +- .rename = fuse_rename, ++ .rename2 = fuse_rename2, + .link = fuse_link, + .setattr = fuse_setattr, + .create = fuse_create, +diff --git a/fs/namei.c b/fs/namei.c +index 5e41971..fa7dd2d 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4058,7 +4058,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + if (error) + return error; + +- if (!old_dir->i_op->rename) ++ if (!old_dir->i_op->rename && !old_dir->i_op->rename2) + return -EPERM; + + if (flags && !old_dir->i_op->rename2) +@@ -4117,10 +4117,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + if (error) + goto out; + } +- if (!flags) { ++ if (!old_dir->i_op->rename2) { + error = old_dir->i_op->rename(old_dir, old_dentry, + new_dir, new_dentry); + } else { ++ WARN_ON(old_dir->i_op->rename != NULL); + error = old_dir->i_op->rename2(old_dir, old_dentry, + new_dir, new_dentry, flags); + } +-- +2.7.4 + + +From c5948f26c2de38f3b475d10cfb6695245f52b6ff Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Sat, 1 Feb 2014 04:43:32 -0500 +Subject: [PATCH 08/73] get rid of pointless checks for NULL ->i_op + +Signed-off-by: Al Viro +(cherry picked from commit 627bf81ac625f05060db033a0f3791521ad7bd79) +Signed-off-by: Alex Shi +--- + fs/cachefiles/bind.c | 1 - + fs/cachefiles/namei.c | 3 +-- + security/integrity/evm/evm_crypto.c | 2 +- + security/integrity/evm/evm_main.c | 2 +- + security/tomoyo/realpath.c | 4 ++-- + 5 files changed, 5 insertions(+), 7 deletions(-) + +diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c +index 622f469..5b99baf 100644 +--- a/fs/cachefiles/bind.c ++++ b/fs/cachefiles/bind.c +@@ -124,7 +124,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) + /* check parameters */ + ret = -EOPNOTSUPP; + if (!root->d_inode || +- !root->d_inode->i_op || + !root->d_inode->i_op->lookup || + !root->d_inode->i_op->mkdir || + !root->d_inode->i_op->setxattr || +diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c +index 6494d9f..c0a6817 100644 +--- a/fs/cachefiles/namei.c ++++ b/fs/cachefiles/namei.c +@@ -779,8 +779,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, + } + + ret = -EPERM; +- if (!subdir->d_inode->i_op || +- !subdir->d_inode->i_op->setxattr || ++ if (!subdir->d_inode->i_op->setxattr || + !subdir->d_inode->i_op->getxattr || + !subdir->d_inode->i_op->lookup || + !subdir->d_inode->i_op->mkdir || +diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c +index 3bab89e..e90ab0e 100644 +--- a/security/integrity/evm/evm_crypto.c ++++ b/security/integrity/evm/evm_crypto.c +@@ -137,7 +137,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, + int error; + int size; + +- if (!inode->i_op || !inode->i_op->getxattr) ++ if (!inode->i_op->getxattr) + return -EOPNOTSUPP; + desc = init_desc(type); + if (IS_ERR(desc)) +diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c +index 7e71e06..690cd63 100644 +--- a/security/integrity/evm/evm_main.c ++++ b/security/integrity/evm/evm_main.c +@@ -62,7 +62,7 @@ static int evm_find_protected_xattrs(struct dentry *dentry) + int error; + int count = 0; + +- if (!inode->i_op || !inode->i_op->getxattr) ++ if (!inode->i_op->getxattr) + return -EOPNOTSUPP; + + for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) { +diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c +index 80a09c3..a3386d1 100644 +--- a/security/tomoyo/realpath.c ++++ b/security/tomoyo/realpath.c +@@ -173,7 +173,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer, + * Use filesystem name if filesystem does not support rename() + * operation. + */ +- if (inode->i_op && !inode->i_op->rename) ++ if (!inode->i_op->rename) + goto prepend_filesystem_name; + } + /* Prepend device name. */ +@@ -282,7 +282,7 @@ char *tomoyo_realpath_from_path(struct path *path) + * Get local name for filesystems without rename() operation + * or dentry without vfsmount. + */ +- if (!path->mnt || (inode->i_op && !inode->i_op->rename)) ++ if (!path->mnt || !inode->i_op->rename) + pos = tomoyo_get_local_path(path->dentry, buf, + buf_len - 1); + /* Get absolute name for the rest. */ +-- +2.7.4 + + +From bb9934859afca84e19c3213060f2f2eeaff5d6a1 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Wed, 20 Aug 2014 14:14:04 +0900 +Subject: [PATCH 09/73] tomoyo: Fix pathname calculation breakage. + +Commit 7177a9c4b509 ("fs: call rename2 if exists") changed +"struct inode_operations"->rename == NULL if +"struct inode_operations"->rename2 != NULL . + +TOMOYO needs to check for both ->rename and ->rename2 , or +a system on (e.g.) ext4 filesystem won't boot. + +Signed-off-by: Tetsuo Handa +Signed-off-by: Serge E. Hallyn +(cherry picked from commit 8fe7a268b18ebc89203c766b020b9e32f1cfeebf) +Signed-off-by: Alex Shi +--- + security/tomoyo/realpath.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c +index a3386d1..bed745c 100644 +--- a/security/tomoyo/realpath.c ++++ b/security/tomoyo/realpath.c +@@ -173,7 +173,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer, + * Use filesystem name if filesystem does not support rename() + * operation. + */ +- if (!inode->i_op->rename) ++ if (!inode->i_op->rename && !inode->i_op->rename2) + goto prepend_filesystem_name; + } + /* Prepend device name. */ +@@ -282,7 +282,8 @@ char *tomoyo_realpath_from_path(struct path *path) + * Get local name for filesystems without rename() operation + * or dentry without vfsmount. + */ +- if (!path->mnt || !inode->i_op->rename) ++ if (!path->mnt || ++ (!inode->i_op->rename && !inode->i_op->rename2)) + pos = tomoyo_get_local_path(path->dentry, buf, + buf_len - 1); + /* Get absolute name for the rest. */ +-- +2.7.4 + + +From 0750d56323044df86224bb9aa3d0830bada1cb81 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Fri, 14 Mar 2014 13:42:45 -0400 +Subject: [PATCH 10/73] new helper: readlink_copy() + +Signed-off-by: Al Viro +(cherry picked from commit 5d826c847b34de6415b4f1becd88a57ff619af50) +Signed-off-by: Alex Shi + + Conflicts: + fs/namei.c +--- + fs/namei.c | 12 ++++-------- + fs/proc/namespaces.c | 14 ++++---------- + fs/proc/self.c | 2 +- + fs/xfs/xfs_ioctl.c | 28 +--------------------------- + include/linux/fs.h | 2 +- + 5 files changed, 11 insertions(+), 47 deletions(-) + +diff --git a/fs/namei.c b/fs/namei.c +index fa7dd2d..ef28dcd 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4309,11 +4309,9 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna + return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); + } + +-int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) ++int readlink_copy(char __user *buffer, int buflen, const char *link) + { +- int len; +- +- len = PTR_ERR(link); ++ int len = PTR_ERR(link); + if (IS_ERR(link)) + goto out; + +@@ -4342,7 +4340,7 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) + if (IS_ERR(cookie)) + return PTR_ERR(cookie); + +- res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); ++ res = readlink_copy(buffer, buflen, nd_get_link(&nd)); + if (dentry->d_inode->i_op->put_link) + dentry->d_inode->i_op->put_link(dentry, &nd, cookie); + return res; +@@ -4366,8 +4364,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage) + int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) + { + struct page *page = NULL; +- char *s = page_getlink(dentry, &page); +- int res = vfs_readlink(dentry,buffer,buflen,s); ++ int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page)); + if (page) { + kunmap(page); + page_cache_release(page); +@@ -4463,7 +4460,6 @@ EXPORT_SYMBOL(vfs_link); + EXPORT_SYMBOL(vfs_mkdir); + EXPORT_SYMBOL(vfs_mknod); + EXPORT_SYMBOL(generic_permission); +-EXPORT_SYMBOL(vfs_readlink); + EXPORT_SYMBOL(vfs_rename); + EXPORT_SYMBOL(vfs_rmdir); + EXPORT_SYMBOL(vfs_symlink); +diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c +index 9ae46b8..8902609 100644 +--- a/fs/proc/namespaces.c ++++ b/fs/proc/namespaces.c +@@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl + struct task_struct *task; + void *ns; + char name[50]; +- int len = -EACCES; ++ int res = -EACCES; + + task = get_proc_task(inode); + if (!task) +@@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + +- len = -ENOENT; ++ res = -ENOENT; + ns = ns_ops->get(task); + if (!ns) + goto out_put_task; + + snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); +- len = strlen(name); +- +- if (len > buflen) +- len = buflen; +- if (copy_to_user(buffer, name, len)) +- len = -EFAULT; +- ++ res = readlink_copy(buffer, buflen, name); + ns_ops->put(ns); + out_put_task: + put_task_struct(task); + out: +- return len; ++ return res; + } + + static const struct inode_operations proc_ns_link_inode_operations = { +diff --git a/fs/proc/self.c b/fs/proc/self.c +index ffeb202..4348bb8 100644 +--- a/fs/proc/self.c ++++ b/fs/proc/self.c +@@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, + if (!tgid) + return -ENOENT; + sprintf(tmp, "%d", tgid); +- return vfs_readlink(dentry,buffer,buflen,tmp); ++ return readlink_copy(buffer, buflen, tmp); + } + + static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c +index 78e62cc..6152cbe 100644 +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -271,32 +271,6 @@ xfs_open_by_handle( + return error; + } + +-/* +- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's +- * unused first argument. +- */ +-STATIC int +-do_readlink( +- char __user *buffer, +- int buflen, +- const char *link) +-{ +- int len; +- +- len = PTR_ERR(link); +- if (IS_ERR(link)) +- goto out; +- +- len = strlen(link); +- if (len > (unsigned) buflen) +- len = buflen; +- if (copy_to_user(buffer, link, len)) +- len = -EFAULT; +- out: +- return len; +-} +- +- + int + xfs_readlink_by_handle( + struct file *parfilp, +@@ -334,7 +308,7 @@ xfs_readlink_by_handle( + error = -xfs_readlink(XFS_I(dentry->d_inode), link); + if (error) + goto out_kfree; +- error = do_readlink(hreq->ohandle, olen, link); ++ error = readlink_copy(hreq->ohandle, olen, link); + if (error) + goto out_kfree; + +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 3b3670e..29170bf 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2566,7 +2566,7 @@ extern const struct file_operations generic_ro_fops; + + #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) + +-extern int vfs_readlink(struct dentry *, char __user *, int, const char *); ++extern int readlink_copy(char __user *, int, const char *); + extern int page_readlink(struct dentry *, char __user *, int); + extern void *page_follow_link_light(struct dentry *, struct nameidata *); + extern void page_put_link(struct dentry *, struct nameidata *, void *); +-- +2.7.4 + + +From 6c1a7bfb6a5ac8cf057fd191acd8f28b23c60dd4 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Fri, 14 Mar 2014 12:54:25 -0400 +Subject: [PATCH 11/73] lustre: generic_readlink() is just fine there, TYVM... + +Signed-off-by: Al Viro +(cherry picked from commit 4efcc9ffcd4fc53f1f7de539842cdffa1f8e5ecc) +Signed-off-by: Alex Shi +--- + drivers/staging/lustre/lustre/llite/symlink.c | 23 +---------------------- + 1 file changed, 1 insertion(+), 22 deletions(-) + +diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c +index ab06891..80d48b5 100644 +--- a/drivers/staging/lustre/lustre/llite/symlink.c ++++ b/drivers/staging/lustre/lustre/llite/symlink.c +@@ -115,27 +115,6 @@ failed: + return rc; + } + +-static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) +-{ +- struct inode *inode = dentry->d_inode; +- struct ptlrpc_request *request; +- char *symname; +- int rc; +- +- CDEBUG(D_VFSTRACE, "VFS Op\n"); +- +- ll_inode_size_lock(inode); +- rc = ll_readlink_internal(inode, &request, &symname); +- if (rc) +- GOTO(out, rc); +- +- rc = vfs_readlink(dentry, buffer, buflen, symname); +- out: +- ptlrpc_req_finished(request); +- ll_inode_size_unlock(inode); +- return rc; +-} +- + static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd) + { + struct inode *inode = dentry->d_inode; +@@ -175,7 +154,7 @@ static void ll_put_link(struct dentry *dentry, struct nameidata *nd, void *cooki + } + + struct inode_operations ll_fast_symlink_inode_operations = { +- .readlink = ll_readlink, ++ .readlink = generic_readlink, + .setattr = ll_setattr, + .follow_link = ll_follow_link, + .put_link = ll_put_link, +-- +2.7.4 + + +From 71732036259eac84c4b4add5b7503a0f01905bc4 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:35 +0200 +Subject: [PATCH 12/73] vfs: add i_op->dentry_open() + +Add a new inode operation i_op->dentry_open(). This is for stacked filesystems +that want to return a struct file from a different filesystem. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 4aa7c6346be395bdf776f82bbb2e3e2bc60bdd2b) +Signed-off-by: Alex Shi +--- + Documentation/filesystems/Locking | 2 ++ + Documentation/filesystems/vfs.txt | 7 +++++++ + fs/namei.c | 9 ++++++--- + fs/open.c | 23 +++++++++++++++++++++-- + include/linux/fs.h | 4 ++++ + 5 files changed, 40 insertions(+), 5 deletions(-) + +diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking +index f424e0e..821c8f4 100644 +--- a/Documentation/filesystems/Locking ++++ b/Documentation/filesystems/Locking +@@ -67,6 +67,7 @@ prototypes: + struct file *, unsigned open_flag, + umode_t create_mode, int *opened); + int (*tmpfile) (struct inode *, struct dentry *, umode_t); ++ int (*dentry_open)(struct dentry *, struct file *, const struct cred *); + + locking rules: + all may block +@@ -96,6 +97,7 @@ fiemap: no + update_time: no + atomic_open: yes + tmpfile: no ++dentry_open: no + + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on + victim. +diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt +index 94eb862..5d06f2a 100644 +--- a/Documentation/filesystems/vfs.txt ++++ b/Documentation/filesystems/vfs.txt +@@ -364,6 +364,7 @@ struct inode_operations { + int (*atomic_open)(struct inode *, struct dentry *, struct file *, + unsigned open_flag, umode_t create_mode, int *opened); + int (*tmpfile) (struct inode *, struct dentry *, umode_t); ++ int (*dentry_open)(struct dentry *, struct file *, const struct cred *); + }; + + Again, all methods are called without any locks being held, unless +@@ -697,6 +698,12 @@ struct address_space_operations { + but instead uses bmap to find out where the blocks in the file + are and uses those addresses directly. + ++ dentry_open: *WARNING: probably going away soon, do not use!* This is an ++ alternative to f_op->open(), the difference is that this method may open ++ a file not necessarily originating from the same filesystem as the one ++ i_op->open() was called on. It may be useful for stacking filesystems ++ which want to allow native I/O directly on underlying files. ++ + + invalidatepage: If a page has PagePrivate set, then invalidatepage + will be called when part or all of the page is to be removed +diff --git a/fs/namei.c b/fs/namei.c +index ef28dcd..b987228 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -3037,9 +3037,12 @@ finish_open_created: + error = may_open(&nd->path, acc_mode, open_flag); + if (error) + goto out; +- file->f_path.mnt = nd->path.mnt; +- error = finish_open(file, nd->path.dentry, NULL, opened); +- if (error) { ++ ++ BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ ++ error = vfs_open(&nd->path, file, current_cred()); ++ if (!error) { ++ *opened |= FILE_OPENED; ++ } else { + if (error == -EOPENSTALE) + goto stale_open; + goto out; +diff --git a/fs/open.c b/fs/open.c +index 2ed7325..c92c6ef 100644 +--- a/fs/open.c ++++ b/fs/open.c +@@ -822,8 +822,7 @@ struct file *dentry_open(const struct path *path, int flags, + f = get_empty_filp(); + if (!IS_ERR(f)) { + f->f_flags = flags; +- f->f_path = *path; +- error = do_dentry_open(f, NULL, cred); ++ error = vfs_open(path, f, cred); + if (!error) { + /* from now on we need fput() to dispose of f */ + error = open_check_o_direct(f); +@@ -840,6 +839,26 @@ struct file *dentry_open(const struct path *path, int flags, + } + EXPORT_SYMBOL(dentry_open); + ++/** ++ * vfs_open - open the file at the given path ++ * @path: path to open ++ * @filp: newly allocated file with f_flag initialized ++ * @cred: credentials to use ++ */ ++int vfs_open(const struct path *path, struct file *filp, ++ const struct cred *cred) ++{ ++ struct inode *inode = path->dentry->d_inode; ++ ++ if (inode->i_op->dentry_open) ++ return inode->i_op->dentry_open(path->dentry, filp, cred); ++ else { ++ filp->f_path = *path; ++ return do_dentry_open(filp, NULL, cred); ++ } ++} ++EXPORT_SYMBOL(vfs_open); ++ + static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) + { + int lookup_flags = 0; +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 29170bf..ce38acf 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1587,6 +1587,9 @@ struct inode_operations { + umode_t create_mode, int *opened); + int (*tmpfile) (struct inode *, struct dentry *, umode_t); + int (*set_acl)(struct inode *, struct posix_acl *, int); ++ ++ /* WARNING: probably going away soon, do not use! */ ++ int (*dentry_open)(struct dentry *, struct file *, const struct cred *); + } ____cacheline_aligned; + + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, +@@ -2081,6 +2084,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); + extern struct file *filp_open(const char *, int, umode_t); + extern struct file *file_open_root(struct dentry *, struct vfsmount *, + const char *, int); ++extern int vfs_open(const struct path *, struct file *, const struct cred *); + extern struct file * dentry_open(const struct path *, int, const struct cred *); + extern int filp_close(struct file *, fl_owner_t id); + +-- +2.7.4 + + +From 5c45fc6a9c14be17ae4d203d35fe82c6b4575de4 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:35 +0200 +Subject: [PATCH 13/73] vfs: export do_splice_direct() to modules + +Export do_splice_direct() to modules. Needed by overlay filesystem. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 1c118596a7682912106c80007102ce0184c77780) +Signed-off-by: Alex Shi +--- + fs/internal.h | 6 ------ + fs/splice.c | 1 + + include/linux/fs.h | 3 +++ + 3 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/fs/internal.h b/fs/internal.h +index 4657424..873cdf5 100644 +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -135,12 +135,6 @@ extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); + extern int rw_verify_area(int, struct file *, const loff_t *, size_t); + + /* +- * splice.c +- */ +-extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, +- loff_t *opos, size_t len, unsigned int flags); +- +-/* + * pipe.c + */ + extern const struct file_operations pipefifo_fops; +diff --git a/fs/splice.c b/fs/splice.c +index 12028fa..ffb92b9 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -1327,6 +1327,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + + return ret; + } ++EXPORT_SYMBOL(do_splice_direct); + + static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, + struct pipe_inode_info *opipe, +diff --git a/include/linux/fs.h b/include/linux/fs.h +index ce38acf..5bcbbf3 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2506,6 +2506,9 @@ extern ssize_t generic_file_splice_write(struct pipe_inode_info *, + struct file *, loff_t *, size_t, unsigned int); + extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, + struct file *out, loff_t *, size_t len, unsigned int flags); ++extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ++ loff_t *opos, size_t len, unsigned int flags); ++ + + extern void + file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); +-- +2.7.4 + + +From 96bd0c53e4204392144978e5a40f4699570be22b Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:35 +0200 +Subject: [PATCH 14/73] vfs: export __inode_permission() to modules + +We need to be able to check inode permissions (but not filesystem implied +permissions) for stackable filesystems. Expose this interface for overlayfs. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit bd5d08569cc379f8366663a61558a9ce17c2e460) +Signed-off-by: Alex Shi +--- + fs/internal.h | 1 - + fs/namei.c | 1 + + include/linux/fs.h | 1 + + 3 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/internal.h b/fs/internal.h +index 873cdf5..dd41b12 100644 +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -42,7 +42,6 @@ extern void __init chrdev_init(void); + /* + * namei.c + */ +-extern int __inode_permission(struct inode *, int); + extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); + extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + const char *, unsigned int, struct path *); +diff --git a/fs/namei.c b/fs/namei.c +index b987228..56b30af 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -415,6 +415,7 @@ int __inode_permission(struct inode *inode, int mask) + + return security_inode_permission(inode, mask); + } ++EXPORT_SYMBOL(__inode_permission); + + /** + * sb_permission - Check superblock-level permissions +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 5bcbbf3..82c78d6 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2298,6 +2298,7 @@ extern sector_t bmap(struct inode *, sector_t); + #endif + extern int notify_change(struct dentry *, struct iattr *, struct inode **); + extern int inode_permission(struct inode *, int); ++extern int __inode_permission(struct inode *, int); + extern int generic_permission(struct inode *, int); + + static inline bool execute_ok(struct inode *inode) +-- +2.7.4 + + +From 574c7ef9787b3b3d0513830e40323c34c2f53a39 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:36 +0200 +Subject: [PATCH 15/73] vfs: introduce clone_private_mount() + +Overlayfs needs a private clone of the mount, so create a function for +this and export to modules. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit c771d683a62e5d36bc46036f5c07f4f5bb7dda61) +Signed-off-by: Alex Shi +--- + fs/namespace.c | 27 +++++++++++++++++++++++++++ + include/linux/mount.h | 3 +++ + 2 files changed, 30 insertions(+) + +diff --git a/fs/namespace.c b/fs/namespace.c +index 039f380..c6533ce 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -1586,6 +1586,33 @@ void drop_collected_mounts(struct vfsmount *mnt) + namespace_unlock(); + } + ++/** ++ * clone_private_mount - create a private clone of a path ++ * ++ * This creates a new vfsmount, which will be the clone of @path. The new will ++ * not be attached anywhere in the namespace and will be private (i.e. changes ++ * to the originating mount won't be propagated into this). ++ * ++ * Release with mntput(). ++ */ ++struct vfsmount *clone_private_mount(struct path *path) ++{ ++ struct mount *old_mnt = real_mount(path->mnt); ++ struct mount *new_mnt; ++ ++ if (IS_MNT_UNBINDABLE(old_mnt)) ++ return ERR_PTR(-EINVAL); ++ ++ down_read(&namespace_sem); ++ new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); ++ up_read(&namespace_sem); ++ if (IS_ERR(new_mnt)) ++ return ERR_CAST(new_mnt); ++ ++ return &new_mnt->mnt; ++} ++EXPORT_SYMBOL_GPL(clone_private_mount); ++ + int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, + struct vfsmount *root) + { +diff --git a/include/linux/mount.h b/include/linux/mount.h +index b0c1e65..fff78cb 100644 +--- a/include/linux/mount.h ++++ b/include/linux/mount.h +@@ -81,6 +81,9 @@ extern void mnt_pin(struct vfsmount *mnt); + extern void mnt_unpin(struct vfsmount *mnt); + extern int __mnt_is_readonly(struct vfsmount *mnt); + ++struct path; ++extern struct vfsmount *clone_private_mount(struct path *path); ++ + struct file_system_type; + extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, + int flags, const char *name, +-- +2.7.4 + + +From ae311653282de97e927ff8c961a79be3ab259cb1 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:36 +0200 +Subject: [PATCH 16/73] vfs: export check_sticky() + +It's already duplicated in btrfs and about to be used in overlayfs too. + +Move the sticky bit check to an inline helper and call the out-of-line +helper only in the unlikly case of the sticky bit being set. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit cbdf35bcb833bfd00f0925d7a9a33a21f41ea582) +Signed-off-by: Alex Shi +--- + fs/btrfs/ioctl.c | 20 +------------------- + fs/namei.c | 9 ++------- + include/linux/fs.h | 9 +++++++++ + 3 files changed, 12 insertions(+), 26 deletions(-) + +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index 0b72006..96b95e6 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -682,23 +682,6 @@ out: + return ret; + } + +-/* copy of check_sticky in fs/namei.c() +-* It's inline, so penalty for filesystems that don't use sticky bit is +-* minimal. +-*/ +-static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) +-{ +- kuid_t fsuid = current_fsuid(); +- +- if (!(dir->i_mode & S_ISVTX)) +- return 0; +- if (uid_eq(inode->i_uid, fsuid)) +- return 0; +- if (uid_eq(dir->i_uid, fsuid)) +- return 0; +- return !capable(CAP_FOWNER); +-} +- + /* copy of may_delete in fs/namei.c() + * Check whether we can remove a link victim from directory dir, check + * whether the type of victim is right. +@@ -734,8 +717,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) + return error; + if (IS_APPEND(dir)) + return -EPERM; +- if (btrfs_check_sticky(dir, victim->d_inode)|| +- IS_APPEND(victim->d_inode)|| ++ if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) || + IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) + return -EPERM; + if (isdir) { +diff --git a/fs/namei.c b/fs/namei.c +index 56b30af..1fd5d75 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2360,22 +2360,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path, + } + EXPORT_SYMBOL(kern_path_mountpoint); + +-/* +- * It's inline, so penalty for filesystems that don't use sticky bit is +- * minimal. +- */ +-static inline int check_sticky(struct inode *dir, struct inode *inode) ++int __check_sticky(struct inode *dir, struct inode *inode) + { + kuid_t fsuid = current_fsuid(); + +- if (!(dir->i_mode & S_ISVTX)) +- return 0; + if (uid_eq(inode->i_uid, fsuid)) + return 0; + if (uid_eq(dir->i_uid, fsuid)) + return 0; + return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); + } ++EXPORT_SYMBOL(__check_sticky); + + /* + * Check whether we can remove a link victim from directory dir, check +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 82c78d6..ddb9ab5 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2300,6 +2300,7 @@ extern int notify_change(struct dentry *, struct iattr *, struct inode **); + extern int inode_permission(struct inode *, int); + extern int __inode_permission(struct inode *, int); + extern int generic_permission(struct inode *, int); ++extern int __check_sticky(struct inode *dir, struct inode *inode); + + static inline bool execute_ok(struct inode *inode) + { +@@ -2787,6 +2788,14 @@ static inline int is_sxid(umode_t mode) + return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); + } + ++static inline int check_sticky(struct inode *dir, struct inode *inode) ++{ ++ if (!(dir->i_mode & S_ISVTX)) ++ return 0; ++ ++ return __check_sticky(dir, inode); ++} ++ + static inline void inode_has_no_xattr(struct inode *inode) + { + if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) +-- +2.7.4 + + +From 9354e064adb1c963fdec1d38dc43d48dee394e1e Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:36 +0200 +Subject: [PATCH 17/73] vfs: add whiteout support + +Whiteout isn't actually a new file type, but is represented as a char +device (Linus's idea) with 0/0 device number. + +This has several advantages compared to introducing a new whiteout file +type: + + - no userspace API changes (e.g. trivial to make backups of upper layer + filesystem, without losing whiteouts) + + - no fs image format changes (you can boot an old kernel/fsck without + whiteout support and things won't break) + + - implementation is trivial + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 787fb6bc9682ec7c05fb5d9561b57100fbc1cc41) +Signed-off-by: Alex Shi +--- + fs/namei.c | 14 ++++++++++++++ + include/linux/fs.h | 11 +++++++++++ + 2 files changed, 25 insertions(+) + +diff --git a/fs/namei.c b/fs/namei.c +index 1fd5d75..9a4443b 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4308,6 +4308,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna + return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); + } + ++int vfs_whiteout(struct inode *dir, struct dentry *dentry) ++{ ++ int error = may_create(dir, dentry); ++ if (error) ++ return error; ++ ++ if (!dir->i_op->mknod) ++ return -EPERM; ++ ++ return dir->i_op->mknod(dir, dentry, ++ S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); ++} ++EXPORT_SYMBOL(vfs_whiteout); ++ + int readlink_copy(char __user *buffer, int buflen, const char *link) + { + int len = PTR_ERR(link); +diff --git a/include/linux/fs.h b/include/linux/fs.h +index ddb9ab5..18492c8 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -218,6 +218,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, + #define ATTR_TIMES_SET (1 << 16) + + /* ++ * Whiteout is represented by a char device. The following constants define the ++ * mode and device number to use. ++ */ ++#define WHITEOUT_MODE 0 ++#define WHITEOUT_DEV 0 ++ ++/* + * This is the Inode Attributes structure, used for notify_change(). It + * uses the above definitions as flags, to know which values have changed. + * Also, in this manner, a Filesystem can look at only the values it cares +@@ -1461,6 +1468,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino + extern int vfs_rmdir(struct inode *, struct dentry *); + extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); + extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); ++extern int vfs_whiteout(struct inode *, struct dentry *); + + /* + * VFS dentry helper functions. +@@ -1687,6 +1695,9 @@ struct super_operations { + #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) + #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) + ++#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ ++ (inode)->i_rdev == WHITEOUT_DEV) ++ + /* + * Inode state bits. Protected by inode->i_lock + * +-- +2.7.4 + + +From 0e98d78deaf317c6524acf9c57eecdefbfdbe444 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:37 +0200 +Subject: [PATCH 18/73] vfs: add RENAME_WHITEOUT + +This adds a new RENAME_WHITEOUT flag. This flag makes rename() create a +whiteout of source. The whiteout creation is atomic relative to the +rename. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 0d7a855526dd672e114aff2ac22b60fc6f155b08) +Signed-off-by: Alex Shi +--- + fs/namei.c | 8 ++++++-- + include/uapi/linux/fs.h | 1 + + 2 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/fs/namei.c b/fs/namei.c +index 9a4443b..a047265e 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4171,12 +4171,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, + bool should_retry = false; + int error; + +- if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) ++ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) + return -EINVAL; + +- if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) ++ if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && ++ (flags & RENAME_EXCHANGE)) + return -EINVAL; + ++ if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) ++ return -EPERM; ++ + retry: + from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); + if (IS_ERR(from)) { +diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h +index ca1a11b..3735fa0 100644 +--- a/include/uapi/linux/fs.h ++++ b/include/uapi/linux/fs.h +@@ -37,6 +37,7 @@ + + #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ + #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ ++#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ + + struct fstrim_range { + __u64 start; +-- +2.7.4 + + +From c5341097e1a69ba7f5cbc938b927489ad3bef1ee Mon Sep 17 00:00:00 2001 +From: "Paul E. McKenney" +Date: Mon, 27 Oct 2014 21:11:27 -0700 +Subject: [PATCH 19/73] rcu: Provide counterpart to rcu_dereference() for + non-RCU situations + +Although rcu_dereference() and friends can be used in situations where +object lifetimes are being managed by something other than RCU, the +resulting sparse and lockdep-RCU noise can be annoying. This commit +therefore supplies a lockless_dereference(), which provides the +protection for dereferences without the RCU-related debugging noise. + +Reported-by: Al Viro +Signed-off-by: Paul E. McKenney +Signed-off-by: Al Viro +(cherry picked from commit 54ef6df3f3f1353d99c80c437259d317b2cd1cbd) +Signed-off-by: Alex Shi +--- + include/linux/rcupdate.h | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 72bf3a0..a9a98ff 100644 +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -555,6 +555,21 @@ static inline void rcu_preempt_sleep_check(void) + #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) + + /** ++ * lockless_dereference() - safely load a pointer for later dereference ++ * @p: The pointer to load ++ * ++ * Similar to rcu_dereference(), but for situations where the pointed-to ++ * object's lifetime is managed by something other than RCU. That ++ * "something other" might be reference counting or simple immortality. ++ */ ++#define lockless_dereference(p) \ ++({ \ ++ typeof(p) _________p1 = ACCESS_ONCE(p); \ ++ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ ++ (_________p1); \ ++}) ++ ++/** + * rcu_assign_pointer() - assign to RCU-protected pointer + * @p: pointer to assign to + * @v: value to assign (publish) +-- +2.7.4 + + +From 5587227cc0f683fe4732c0a6813f96154254e96b Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:38 +0200 +Subject: [PATCH 20/73] overlay filesystem + +Overlayfs allows one, usually read-write, directory tree to be +overlaid onto another, read-only directory tree. All modifications +go to the upper, writable layer. + +This type of mechanism is most often used for live CDs but there's a +wide variety of other uses. + +The implementation differs from other "union filesystem" +implementations in that after a file is opened all operations go +directly to the underlying, lower or upper, filesystems. This +simplifies the implementation and allows native performance in these +cases. + +The dentry tree is duplicated from the underlying filesystems, this +enables fast cached lookups without adding special support into the +VFS. This uses slightly more memory than union mounts, but dentries +are relatively small. + +Currently inodes are duplicated as well, but it is a possible +optimization to share inodes for non-directories. + +Opening non directories results in the open forwarded to the +underlying filesystem. This makes the behavior very similar to union +mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file +descriptors). + +Usage: + + mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay + +The following cotributions have been folded into this patch: + +Neil Brown : + - minimal remount support + - use correct seek function for directories + - initialise is_real before use + - rename ovl_fill_cache to ovl_dir_read + +Felix Fietkau : + - fix a deadlock in ovl_dir_read_merged + - fix a deadlock in ovl_remove_whiteouts + +Erez Zadok + - fix cleanup after WARN_ON + +Sedat Dilek + - fix up permission to confirm to new API + +Robin Dong + - fix possible leak in ovl_new_inode + - create new inode in ovl_link + +Andy Whitcroft + - switch to __inode_permission() + - copy up i_uid/i_gid from the underlying inode + +AV: + - ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits + - ovl_clear_empty() - one failure exit forgetting to do unlock_rename(), + lack of check for udir being the parent of upper, dropping and regaining + the lock on udir (which would require _another_ check for parent being + right). + - bogus d_drop() in copyup and rename [fix from your mail] + - copyup/remove and copyup/rename races [fix from your mail] + - ovl_dir_fsync() leaving ERR_PTR() in ->realfile + - ovl_entry_free() is pointless - it's just a kfree_rcu() + - fold ovl_do_lookup() into ovl_lookup() + - manually assigning ->d_op is wrong. Just use ->s_d_op. + [patches picked from Miklos]: + * copyup/remove and copyup/rename races + * bogus d_drop() in copyup and rename + +Also thanks to the following people for testing and reporting bugs: + + Jordi Pujol + Andy Whitcroft + Michal Suchanek + Felix Fietkau + Erez Zadok + Randy Dunlap + +Signed-off-by: Miklos Szeredi +(cherry picked from commit e9be9d5e76e34872f0c37d72e25bc27fe9e2c54c) +Signed-off-by: Alex Shi +--- + fs/Kconfig | 1 + + fs/Makefile | 1 + + fs/overlayfs/Kconfig | 10 + + fs/overlayfs/Makefile | 7 + + fs/overlayfs/copy_up.c | 414 +++++++++++++++++++++ + fs/overlayfs/dir.c | 921 +++++++++++++++++++++++++++++++++++++++++++++++ + fs/overlayfs/inode.c | 425 ++++++++++++++++++++++ + fs/overlayfs/overlayfs.h | 191 ++++++++++ + fs/overlayfs/readdir.c | 587 ++++++++++++++++++++++++++++++ + fs/overlayfs/super.c | 727 +++++++++++++++++++++++++++++++++++++ + 10 files changed, 3284 insertions(+) + create mode 100644 fs/overlayfs/Kconfig + create mode 100644 fs/overlayfs/Makefile + create mode 100644 fs/overlayfs/copy_up.c + create mode 100644 fs/overlayfs/dir.c + create mode 100644 fs/overlayfs/inode.c + create mode 100644 fs/overlayfs/overlayfs.h + create mode 100644 fs/overlayfs/readdir.c + create mode 100644 fs/overlayfs/super.c + +diff --git a/fs/Kconfig b/fs/Kconfig +index 7385e54..6b40fd8 100644 +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -67,6 +67,7 @@ source "fs/quota/Kconfig" + + source "fs/autofs4/Kconfig" + source "fs/fuse/Kconfig" ++source "fs/overlayfs/Kconfig" + + menu "Caches" + +diff --git a/fs/Makefile b/fs/Makefile +index 47ac07b..5eb9caf 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ + obj-$(CONFIG_AUTOFS4_FS) += autofs4/ + obj-$(CONFIG_ADFS_FS) += adfs/ + obj-$(CONFIG_FUSE_FS) += fuse/ ++obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ + obj-$(CONFIG_UDF_FS) += udf/ + obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ + obj-$(CONFIG_OMFS_FS) += omfs/ +diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig +new file mode 100644 +index 0000000..e601259 +--- /dev/null ++++ b/fs/overlayfs/Kconfig +@@ -0,0 +1,10 @@ ++config OVERLAYFS_FS ++ tristate "Overlay filesystem support" ++ help ++ An overlay filesystem combines two filesystems - an 'upper' filesystem ++ and a 'lower' filesystem. When a name exists in both filesystems, the ++ object in the 'upper' filesystem is visible while the object in the ++ 'lower' filesystem is either hidden or, in the case of directories, ++ merged with the 'upper' object. ++ ++ For more information see Documentation/filesystems/overlayfs.txt +diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile +new file mode 100644 +index 0000000..8f91889 +--- /dev/null ++++ b/fs/overlayfs/Makefile +@@ -0,0 +1,7 @@ ++# ++# Makefile for the overlay filesystem. ++# ++ ++obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o ++ ++overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o +diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c +new file mode 100644 +index 0000000..ea10a87 +--- /dev/null ++++ b/fs/overlayfs/copy_up.c +@@ -0,0 +1,414 @@ ++/* ++ * ++ * Copyright (C) 2011 Novell Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "overlayfs.h" ++ ++#define OVL_COPY_UP_CHUNK_SIZE (1 << 20) ++ ++int ovl_copy_xattr(struct dentry *old, struct dentry *new) ++{ ++ ssize_t list_size, size; ++ char *buf, *name, *value; ++ int error; ++ ++ if (!old->d_inode->i_op->getxattr || ++ !new->d_inode->i_op->getxattr) ++ return 0; ++ ++ list_size = vfs_listxattr(old, NULL, 0); ++ if (list_size <= 0) { ++ if (list_size == -EOPNOTSUPP) ++ return 0; ++ return list_size; ++ } ++ ++ buf = kzalloc(list_size, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ error = -ENOMEM; ++ value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); ++ if (!value) ++ goto out; ++ ++ list_size = vfs_listxattr(old, buf, list_size); ++ if (list_size <= 0) { ++ error = list_size; ++ goto out_free_value; ++ } ++ ++ for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { ++ size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); ++ if (size <= 0) { ++ error = size; ++ goto out_free_value; ++ } ++ error = vfs_setxattr(new, name, value, size, 0); ++ if (error) ++ goto out_free_value; ++ } ++ ++out_free_value: ++ kfree(value); ++out: ++ kfree(buf); ++ return error; ++} ++ ++static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) ++{ ++ struct file *old_file; ++ struct file *new_file; ++ loff_t old_pos = 0; ++ loff_t new_pos = 0; ++ int error = 0; ++ ++ if (len == 0) ++ return 0; ++ ++ old_file = ovl_path_open(old, O_RDONLY); ++ if (IS_ERR(old_file)) ++ return PTR_ERR(old_file); ++ ++ new_file = ovl_path_open(new, O_WRONLY); ++ if (IS_ERR(new_file)) { ++ error = PTR_ERR(new_file); ++ goto out_fput; ++ } ++ ++ /* FIXME: copy up sparse files efficiently */ ++ while (len) { ++ size_t this_len = OVL_COPY_UP_CHUNK_SIZE; ++ long bytes; ++ ++ if (len < this_len) ++ this_len = len; ++ ++ if (signal_pending_state(TASK_KILLABLE, current)) { ++ error = -EINTR; ++ break; ++ } ++ ++ bytes = do_splice_direct(old_file, &old_pos, ++ new_file, &new_pos, ++ this_len, SPLICE_F_MOVE); ++ if (bytes <= 0) { ++ error = bytes; ++ break; ++ } ++ WARN_ON(old_pos != new_pos); ++ ++ len -= bytes; ++ } ++ ++ fput(new_file); ++out_fput: ++ fput(old_file); ++ return error; ++} ++ ++static char *ovl_read_symlink(struct dentry *realdentry) ++{ ++ int res; ++ char *buf; ++ struct inode *inode = realdentry->d_inode; ++ mm_segment_t old_fs; ++ ++ res = -EINVAL; ++ if (!inode->i_op->readlink) ++ goto err; ++ ++ res = -ENOMEM; ++ buf = (char *) __get_free_page(GFP_KERNEL); ++ if (!buf) ++ goto err; ++ ++ old_fs = get_fs(); ++ set_fs(get_ds()); ++ /* The cast to a user pointer is valid due to the set_fs() */ ++ res = inode->i_op->readlink(realdentry, ++ (char __user *)buf, PAGE_SIZE - 1); ++ set_fs(old_fs); ++ if (res < 0) { ++ free_page((unsigned long) buf); ++ goto err; ++ } ++ buf[res] = '\0'; ++ ++ return buf; ++ ++err: ++ return ERR_PTR(res); ++} ++ ++static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) ++{ ++ struct iattr attr = { ++ .ia_valid = ++ ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, ++ .ia_atime = stat->atime, ++ .ia_mtime = stat->mtime, ++ }; ++ ++ return notify_change(upperdentry, &attr, NULL); ++} ++ ++int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) ++{ ++ int err = 0; ++ ++ if (!S_ISLNK(stat->mode)) { ++ struct iattr attr = { ++ .ia_valid = ATTR_MODE, ++ .ia_mode = stat->mode, ++ }; ++ err = notify_change(upperdentry, &attr, NULL); ++ } ++ if (!err) { ++ struct iattr attr = { ++ .ia_valid = ATTR_UID | ATTR_GID, ++ .ia_uid = stat->uid, ++ .ia_gid = stat->gid, ++ }; ++ err = notify_change(upperdentry, &attr, NULL); ++ } ++ if (!err) ++ ovl_set_timestamps(upperdentry, stat); ++ ++ return err; ++ ++} ++ ++static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, ++ struct dentry *dentry, struct path *lowerpath, ++ struct kstat *stat, struct iattr *attr, ++ const char *link) ++{ ++ struct inode *wdir = workdir->d_inode; ++ struct inode *udir = upperdir->d_inode; ++ struct dentry *newdentry = NULL; ++ struct dentry *upper = NULL; ++ umode_t mode = stat->mode; ++ int err; ++ ++ newdentry = ovl_lookup_temp(workdir, dentry); ++ err = PTR_ERR(newdentry); ++ if (IS_ERR(newdentry)) ++ goto out; ++ ++ upper = lookup_one_len(dentry->d_name.name, upperdir, ++ dentry->d_name.len); ++ err = PTR_ERR(upper); ++ if (IS_ERR(upper)) ++ goto out1; ++ ++ /* Can't properly set mode on creation because of the umask */ ++ stat->mode &= S_IFMT; ++ err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); ++ stat->mode = mode; ++ if (err) ++ goto out2; ++ ++ if (S_ISREG(stat->mode)) { ++ struct path upperpath; ++ ovl_path_upper(dentry, &upperpath); ++ BUG_ON(upperpath.dentry != NULL); ++ upperpath.dentry = newdentry; ++ ++ err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); ++ if (err) ++ goto out_cleanup; ++ } ++ ++ err = ovl_copy_xattr(lowerpath->dentry, newdentry); ++ if (err) ++ goto out_cleanup; ++ ++ mutex_lock(&newdentry->d_inode->i_mutex); ++ err = ovl_set_attr(newdentry, stat); ++ if (!err && attr) ++ err = notify_change(newdentry, attr, NULL); ++ mutex_unlock(&newdentry->d_inode->i_mutex); ++ if (err) ++ goto out_cleanup; ++ ++ err = ovl_do_rename(wdir, newdentry, udir, upper, 0); ++ if (err) ++ goto out_cleanup; ++ ++ ovl_dentry_update(dentry, newdentry); ++ newdentry = NULL; ++ ++ /* ++ * Non-directores become opaque when copied up. ++ */ ++ if (!S_ISDIR(stat->mode)) ++ ovl_dentry_set_opaque(dentry, true); ++out2: ++ dput(upper); ++out1: ++ dput(newdentry); ++out: ++ return err; ++ ++out_cleanup: ++ ovl_cleanup(wdir, newdentry); ++ goto out; ++} ++ ++/* ++ * Copy up a single dentry ++ * ++ * Directory renames only allowed on "pure upper" (already created on ++ * upper filesystem, never copied up). Directories which are on lower or ++ * are merged may not be renamed. For these -EXDEV is returned and ++ * userspace has to deal with it. This means, when copying up a ++ * directory we can rely on it and ancestors being stable. ++ * ++ * Non-directory renames start with copy up of source if necessary. The ++ * actual rename will only proceed once the copy up was successful. Copy ++ * up uses upper parent i_mutex for exclusion. Since rename can change ++ * d_parent it is possible that the copy up will lock the old parent. At ++ * that point the file will have already been copied up anyway. ++ */ ++int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, ++ struct path *lowerpath, struct kstat *stat, ++ struct iattr *attr) ++{ ++ struct dentry *workdir = ovl_workdir(dentry); ++ int err; ++ struct kstat pstat; ++ struct path parentpath; ++ struct dentry *upperdir; ++ struct dentry *upperdentry; ++ const struct cred *old_cred; ++ struct cred *override_cred; ++ char *link = NULL; ++ ++ ovl_path_upper(parent, &parentpath); ++ upperdir = parentpath.dentry; ++ ++ err = vfs_getattr(&parentpath, &pstat); ++ if (err) ++ return err; ++ ++ if (S_ISLNK(stat->mode)) { ++ link = ovl_read_symlink(lowerpath->dentry); ++ if (IS_ERR(link)) ++ return PTR_ERR(link); ++ } ++ ++ err = -ENOMEM; ++ override_cred = prepare_creds(); ++ if (!override_cred) ++ goto out_free_link; ++ ++ override_cred->fsuid = stat->uid; ++ override_cred->fsgid = stat->gid; ++ /* ++ * CAP_SYS_ADMIN for copying up extended attributes ++ * CAP_DAC_OVERRIDE for create ++ * CAP_FOWNER for chmod, timestamp update ++ * CAP_FSETID for chmod ++ * CAP_CHOWN for chown ++ * CAP_MKNOD for mknod ++ */ ++ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); ++ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); ++ cap_raise(override_cred->cap_effective, CAP_FOWNER); ++ cap_raise(override_cred->cap_effective, CAP_FSETID); ++ cap_raise(override_cred->cap_effective, CAP_CHOWN); ++ cap_raise(override_cred->cap_effective, CAP_MKNOD); ++ old_cred = override_creds(override_cred); ++ ++ err = -EIO; ++ if (lock_rename(workdir, upperdir) != NULL) { ++ pr_err("overlayfs: failed to lock workdir+upperdir\n"); ++ goto out_unlock; ++ } ++ upperdentry = ovl_dentry_upper(dentry); ++ if (upperdentry) { ++ unlock_rename(workdir, upperdir); ++ err = 0; ++ /* Raced with another copy-up? Do the setattr here */ ++ if (attr) { ++ mutex_lock(&upperdentry->d_inode->i_mutex); ++ err = notify_change(upperdentry, attr, NULL); ++ mutex_unlock(&upperdentry->d_inode->i_mutex); ++ } ++ goto out_put_cred; ++ } ++ ++ err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, ++ stat, attr, link); ++ if (!err) { ++ /* Restore timestamps on parent (best effort) */ ++ ovl_set_timestamps(upperdir, &pstat); ++ } ++out_unlock: ++ unlock_rename(workdir, upperdir); ++out_put_cred: ++ revert_creds(old_cred); ++ put_cred(override_cred); ++ ++out_free_link: ++ if (link) ++ free_page((unsigned long) link); ++ ++ return err; ++} ++ ++int ovl_copy_up(struct dentry *dentry) ++{ ++ int err; ++ ++ err = 0; ++ while (!err) { ++ struct dentry *next; ++ struct dentry *parent; ++ struct path lowerpath; ++ struct kstat stat; ++ enum ovl_path_type type = ovl_path_type(dentry); ++ ++ if (type != OVL_PATH_LOWER) ++ break; ++ ++ next = dget(dentry); ++ /* find the topmost dentry not yet copied up */ ++ for (;;) { ++ parent = dget_parent(next); ++ ++ type = ovl_path_type(parent); ++ if (type != OVL_PATH_LOWER) ++ break; ++ ++ dput(next); ++ next = parent; ++ } ++ ++ ovl_path_lower(next, &lowerpath); ++ err = vfs_getattr(&lowerpath, &stat); ++ if (!err) ++ err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); ++ ++ dput(parent); ++ dput(next); ++ } ++ ++ return err; ++} +diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c +new file mode 100644 +index 0000000..15cd91a +--- /dev/null ++++ b/fs/overlayfs/dir.c +@@ -0,0 +1,921 @@ ++/* ++ * ++ * Copyright (C) 2011 Novell Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "overlayfs.h" ++ ++void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) ++{ ++ int err; ++ ++ dget(wdentry); ++ if (S_ISDIR(wdentry->d_inode->i_mode)) ++ err = ovl_do_rmdir(wdir, wdentry); ++ else ++ err = ovl_do_unlink(wdir, wdentry); ++ dput(wdentry); ++ ++ if (err) { ++ pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", ++ wdentry, err); ++ } ++} ++ ++struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) ++{ ++ struct dentry *temp; ++ char name[20]; ++ ++ snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry); ++ ++ temp = lookup_one_len(name, workdir, strlen(name)); ++ if (!IS_ERR(temp) && temp->d_inode) { ++ pr_err("overlayfs: workdir/%s already exists\n", name); ++ dput(temp); ++ temp = ERR_PTR(-EIO); ++ } ++ ++ return temp; ++} ++ ++/* caller holds i_mutex on workdir */ ++static struct dentry *ovl_whiteout(struct dentry *workdir, ++ struct dentry *dentry) ++{ ++ int err; ++ struct dentry *whiteout; ++ struct inode *wdir = workdir->d_inode; ++ ++ whiteout = ovl_lookup_temp(workdir, dentry); ++ if (IS_ERR(whiteout)) ++ return whiteout; ++ ++ err = ovl_do_whiteout(wdir, whiteout); ++ if (err) { ++ dput(whiteout); ++ whiteout = ERR_PTR(err); ++ } ++ ++ return whiteout; ++} ++ ++int ovl_create_real(struct inode *dir, struct dentry *newdentry, ++ struct kstat *stat, const char *link, ++ struct dentry *hardlink, bool debug) ++{ ++ int err; ++ ++ if (newdentry->d_inode) ++ return -ESTALE; ++ ++ if (hardlink) { ++ err = ovl_do_link(hardlink, dir, newdentry, debug); ++ } else { ++ switch (stat->mode & S_IFMT) { ++ case S_IFREG: ++ err = ovl_do_create(dir, newdentry, stat->mode, debug); ++ break; ++ ++ case S_IFDIR: ++ err = ovl_do_mkdir(dir, newdentry, stat->mode, debug); ++ break; ++ ++ case S_IFCHR: ++ case S_IFBLK: ++ case S_IFIFO: ++ case S_IFSOCK: ++ err = ovl_do_mknod(dir, newdentry, ++ stat->mode, stat->rdev, debug); ++ break; ++ ++ case S_IFLNK: ++ err = ovl_do_symlink(dir, newdentry, link, debug); ++ break; ++ ++ default: ++ err = -EPERM; ++ } ++ } ++ if (!err && WARN_ON(!newdentry->d_inode)) { ++ /* ++ * Not quite sure if non-instantiated dentry is legal or not. ++ * VFS doesn't seem to care so check and warn here. ++ */ ++ err = -ENOENT; ++ } ++ return err; ++} ++ ++static int ovl_set_opaque(struct dentry *upperdentry) ++{ ++ return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); ++} ++ ++static void ovl_remove_opaque(struct dentry *upperdentry) ++{ ++ int err; ++ ++ err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr); ++ if (err) { ++ pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", ++ upperdentry->d_name.name, err); ++ } ++} ++ ++static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat) ++{ ++ int err; ++ enum ovl_path_type type; ++ struct path realpath; ++ ++ type = ovl_path_real(dentry, &realpath); ++ err = vfs_getattr(&realpath, stat); ++ if (err) ++ return err; ++ ++ stat->dev = dentry->d_sb->s_dev; ++ stat->ino = dentry->d_inode->i_ino; ++ ++ /* ++ * It's probably not worth it to count subdirs to get the ++ * correct link count. nlink=1 seems to pacify 'find' and ++ * other utilities. ++ */ ++ if (type == OVL_PATH_MERGE) ++ stat->nlink = 1; ++ ++ return 0; ++} ++ ++static int ovl_create_upper(struct dentry *dentry, struct inode *inode, ++ struct kstat *stat, const char *link, ++ struct dentry *hardlink) ++{ ++ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); ++ struct inode *udir = upperdir->d_inode; ++ struct dentry *newdentry; ++ int err; ++ ++ mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT); ++ newdentry = lookup_one_len(dentry->d_name.name, upperdir, ++ dentry->d_name.len); ++ err = PTR_ERR(newdentry); ++ if (IS_ERR(newdentry)) ++ goto out_unlock; ++ err = ovl_create_real(udir, newdentry, stat, link, hardlink, false); ++ if (err) ++ goto out_dput; ++ ++ ovl_dentry_version_inc(dentry->d_parent); ++ ovl_dentry_update(dentry, newdentry); ++ ovl_copyattr(newdentry->d_inode, inode); ++ d_instantiate(dentry, inode); ++ newdentry = NULL; ++out_dput: ++ dput(newdentry); ++out_unlock: ++ mutex_unlock(&udir->i_mutex); ++ return err; ++} ++ ++static int ovl_lock_rename_workdir(struct dentry *workdir, ++ struct dentry *upperdir) ++{ ++ /* Workdir should not be the same as upperdir */ ++ if (workdir == upperdir) ++ goto err; ++ ++ /* Workdir should not be subdir of upperdir and vice versa */ ++ if (lock_rename(workdir, upperdir) != NULL) ++ goto err_unlock; ++ ++ return 0; ++ ++err_unlock: ++ unlock_rename(workdir, upperdir); ++err: ++ pr_err("overlayfs: failed to lock workdir+upperdir\n"); ++ return -EIO; ++} ++ ++static struct dentry *ovl_clear_empty(struct dentry *dentry, ++ struct list_head *list) ++{ ++ struct dentry *workdir = ovl_workdir(dentry); ++ struct inode *wdir = workdir->d_inode; ++ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); ++ struct inode *udir = upperdir->d_inode; ++ struct path upperpath; ++ struct dentry *upper; ++ struct dentry *opaquedir; ++ struct kstat stat; ++ int err; ++ ++ err = ovl_lock_rename_workdir(workdir, upperdir); ++ if (err) ++ goto out; ++ ++ ovl_path_upper(dentry, &upperpath); ++ err = vfs_getattr(&upperpath, &stat); ++ if (err) ++ goto out_unlock; ++ ++ err = -ESTALE; ++ if (!S_ISDIR(stat.mode)) ++ goto out_unlock; ++ upper = upperpath.dentry; ++ if (upper->d_parent->d_inode != udir) ++ goto out_unlock; ++ ++ opaquedir = ovl_lookup_temp(workdir, dentry); ++ err = PTR_ERR(opaquedir); ++ if (IS_ERR(opaquedir)) ++ goto out_unlock; ++ ++ err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true); ++ if (err) ++ goto out_dput; ++ ++ err = ovl_copy_xattr(upper, opaquedir); ++ if (err) ++ goto out_cleanup; ++ ++ err = ovl_set_opaque(opaquedir); ++ if (err) ++ goto out_cleanup; ++ ++ mutex_lock(&opaquedir->d_inode->i_mutex); ++ err = ovl_set_attr(opaquedir, &stat); ++ mutex_unlock(&opaquedir->d_inode->i_mutex); ++ if (err) ++ goto out_cleanup; ++ ++ err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE); ++ if (err) ++ goto out_cleanup; ++ ++ ovl_cleanup_whiteouts(upper, list); ++ ovl_cleanup(wdir, upper); ++ unlock_rename(workdir, upperdir); ++ ++ /* dentry's upper doesn't match now, get rid of it */ ++ d_drop(dentry); ++ ++ return opaquedir; ++ ++out_cleanup: ++ ovl_cleanup(wdir, opaquedir); ++out_dput: ++ dput(opaquedir); ++out_unlock: ++ unlock_rename(workdir, upperdir); ++out: ++ return ERR_PTR(err); ++} ++ ++static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, ++ enum ovl_path_type type) ++{ ++ int err; ++ struct dentry *ret = NULL; ++ LIST_HEAD(list); ++ ++ err = ovl_check_empty_dir(dentry, &list); ++ if (err) ++ ret = ERR_PTR(err); ++ else if (type == OVL_PATH_MERGE) ++ ret = ovl_clear_empty(dentry, &list); ++ ++ ovl_cache_free(&list); ++ ++ return ret; ++} ++ ++static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, ++ struct kstat *stat, const char *link, ++ struct dentry *hardlink) ++{ ++ struct dentry *workdir = ovl_workdir(dentry); ++ struct inode *wdir = workdir->d_inode; ++ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); ++ struct inode *udir = upperdir->d_inode; ++ struct dentry *upper; ++ struct dentry *newdentry; ++ int err; ++ ++ err = ovl_lock_rename_workdir(workdir, upperdir); ++ if (err) ++ goto out; ++ ++ newdentry = ovl_lookup_temp(workdir, dentry); ++ err = PTR_ERR(newdentry); ++ if (IS_ERR(newdentry)) ++ goto out_unlock; ++ ++ upper = lookup_one_len(dentry->d_name.name, upperdir, ++ dentry->d_name.len); ++ err = PTR_ERR(upper); ++ if (IS_ERR(upper)) ++ goto out_dput; ++ ++ err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true); ++ if (err) ++ goto out_dput2; ++ ++ if (S_ISDIR(stat->mode)) { ++ err = ovl_set_opaque(newdentry); ++ if (err) ++ goto out_cleanup; ++ ++ err = ovl_do_rename(wdir, newdentry, udir, upper, ++ RENAME_EXCHANGE); ++ if (err) ++ goto out_cleanup; ++ ++ ovl_cleanup(wdir, upper); ++ } else { ++ err = ovl_do_rename(wdir, newdentry, udir, upper, 0); ++ if (err) ++ goto out_cleanup; ++ } ++ ovl_dentry_version_inc(dentry->d_parent); ++ ovl_dentry_update(dentry, newdentry); ++ ovl_copyattr(newdentry->d_inode, inode); ++ d_instantiate(dentry, inode); ++ newdentry = NULL; ++out_dput2: ++ dput(upper); ++out_dput: ++ dput(newdentry); ++out_unlock: ++ unlock_rename(workdir, upperdir); ++out: ++ return err; ++ ++out_cleanup: ++ ovl_cleanup(wdir, newdentry); ++ goto out_dput2; ++} ++ ++static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, ++ const char *link, struct dentry *hardlink) ++{ ++ int err; ++ struct inode *inode; ++ struct kstat stat = { ++ .mode = mode, ++ .rdev = rdev, ++ }; ++ ++ err = -ENOMEM; ++ inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); ++ if (!inode) ++ goto out; ++ ++ err = ovl_copy_up(dentry->d_parent); ++ if (err) ++ goto out_iput; ++ ++ if (!ovl_dentry_is_opaque(dentry)) { ++ err = ovl_create_upper(dentry, inode, &stat, link, hardlink); ++ } else { ++ const struct cred *old_cred; ++ struct cred *override_cred; ++ ++ err = -ENOMEM; ++ override_cred = prepare_creds(); ++ if (!override_cred) ++ goto out_iput; ++ ++ /* ++ * CAP_SYS_ADMIN for setting opaque xattr ++ * CAP_DAC_OVERRIDE for create in workdir, rename ++ * CAP_FOWNER for removing whiteout from sticky dir ++ */ ++ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); ++ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); ++ cap_raise(override_cred->cap_effective, CAP_FOWNER); ++ old_cred = override_creds(override_cred); ++ ++ err = ovl_create_over_whiteout(dentry, inode, &stat, link, ++ hardlink); ++ ++ revert_creds(old_cred); ++ put_cred(override_cred); ++ } ++ ++ if (!err) ++ inode = NULL; ++out_iput: ++ iput(inode); ++out: ++ return err; ++} ++ ++static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, ++ const char *link) ++{ ++ int err; ++ ++ err = ovl_want_write(dentry); ++ if (!err) { ++ err = ovl_create_or_link(dentry, mode, rdev, link, NULL); ++ ovl_drop_write(dentry); ++ } ++ ++ return err; ++} ++ ++static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, ++ bool excl) ++{ ++ return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); ++} ++ ++static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ++{ ++ return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); ++} ++ ++static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, ++ dev_t rdev) ++{ ++ /* Don't allow creation of "whiteout" on overlay */ ++ if (S_ISCHR(mode) && rdev == WHITEOUT_DEV) ++ return -EPERM; ++ ++ return ovl_create_object(dentry, mode, rdev, NULL); ++} ++ ++static int ovl_symlink(struct inode *dir, struct dentry *dentry, ++ const char *link) ++{ ++ return ovl_create_object(dentry, S_IFLNK, 0, link); ++} ++ ++static int ovl_link(struct dentry *old, struct inode *newdir, ++ struct dentry *new) ++{ ++ int err; ++ struct dentry *upper; ++ ++ err = ovl_want_write(old); ++ if (err) ++ goto out; ++ ++ err = ovl_copy_up(old); ++ if (err) ++ goto out_drop_write; ++ ++ upper = ovl_dentry_upper(old); ++ err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper); ++ ++out_drop_write: ++ ovl_drop_write(old); ++out: ++ return err; ++} ++ ++static int ovl_remove_and_whiteout(struct dentry *dentry, ++ enum ovl_path_type type, bool is_dir) ++{ ++ struct dentry *workdir = ovl_workdir(dentry); ++ struct inode *wdir = workdir->d_inode; ++ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); ++ struct inode *udir = upperdir->d_inode; ++ struct dentry *whiteout; ++ struct dentry *upper; ++ struct dentry *opaquedir = NULL; ++ int err; ++ ++ if (is_dir) { ++ opaquedir = ovl_check_empty_and_clear(dentry, type); ++ err = PTR_ERR(opaquedir); ++ if (IS_ERR(opaquedir)) ++ goto out; ++ } ++ ++ err = ovl_lock_rename_workdir(workdir, upperdir); ++ if (err) ++ goto out_dput; ++ ++ whiteout = ovl_whiteout(workdir, dentry); ++ err = PTR_ERR(whiteout); ++ if (IS_ERR(whiteout)) ++ goto out_unlock; ++ ++ if (type == OVL_PATH_LOWER) { ++ upper = lookup_one_len(dentry->d_name.name, upperdir, ++ dentry->d_name.len); ++ err = PTR_ERR(upper); ++ if (IS_ERR(upper)) ++ goto kill_whiteout; ++ ++ err = ovl_do_rename(wdir, whiteout, udir, upper, 0); ++ dput(upper); ++ if (err) ++ goto kill_whiteout; ++ } else { ++ int flags = 0; ++ ++ upper = ovl_dentry_upper(dentry); ++ if (opaquedir) ++ upper = opaquedir; ++ err = -ESTALE; ++ if (upper->d_parent != upperdir) ++ goto kill_whiteout; ++ ++ if (is_dir) ++ flags |= RENAME_EXCHANGE; ++ ++ err = ovl_do_rename(wdir, whiteout, udir, upper, flags); ++ if (err) ++ goto kill_whiteout; ++ ++ if (is_dir) ++ ovl_cleanup(wdir, upper); ++ } ++ ovl_dentry_version_inc(dentry->d_parent); ++out_d_drop: ++ d_drop(dentry); ++ dput(whiteout); ++out_unlock: ++ unlock_rename(workdir, upperdir); ++out_dput: ++ dput(opaquedir); ++out: ++ return err; ++ ++kill_whiteout: ++ ovl_cleanup(wdir, whiteout); ++ goto out_d_drop; ++} ++ ++static int ovl_remove_upper(struct dentry *dentry, bool is_dir) ++{ ++ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); ++ struct inode *dir = upperdir->d_inode; ++ struct dentry *upper = ovl_dentry_upper(dentry); ++ int err; ++ ++ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); ++ err = -ESTALE; ++ if (upper->d_parent == upperdir) { ++ /* Don't let d_delete() think it can reset d_inode */ ++ dget(upper); ++ if (is_dir) ++ err = vfs_rmdir(dir, upper); ++ else ++ err = vfs_unlink(dir, upper, NULL); ++ dput(upper); ++ ovl_dentry_version_inc(dentry->d_parent); ++ } ++ ++ /* ++ * Keeping this dentry hashed would mean having to release ++ * upperpath/lowerpath, which could only be done if we are the ++ * sole user of this dentry. Too tricky... Just unhash for ++ * now. ++ */ ++ d_drop(dentry); ++ mutex_unlock(&dir->i_mutex); ++ ++ return err; ++} ++ ++static inline int ovl_check_sticky(struct dentry *dentry) ++{ ++ struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode; ++ struct inode *inode = ovl_dentry_real(dentry)->d_inode; ++ ++ if (check_sticky(dir, inode)) ++ return -EPERM; ++ ++ return 0; ++} ++ ++static int ovl_do_remove(struct dentry *dentry, bool is_dir) ++{ ++ enum ovl_path_type type; ++ int err; ++ ++ err = ovl_check_sticky(dentry); ++ if (err) ++ goto out; ++ ++ err = ovl_want_write(dentry); ++ if (err) ++ goto out; ++ ++ err = ovl_copy_up(dentry->d_parent); ++ if (err) ++ goto out_drop_write; ++ ++ type = ovl_path_type(dentry); ++ if (type == OVL_PATH_PURE_UPPER) { ++ err = ovl_remove_upper(dentry, is_dir); ++ } else { ++ const struct cred *old_cred; ++ struct cred *override_cred; ++ ++ err = -ENOMEM; ++ override_cred = prepare_creds(); ++ if (!override_cred) ++ goto out_drop_write; ++ ++ /* ++ * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir ++ * CAP_DAC_OVERRIDE for create in workdir, rename ++ * CAP_FOWNER for removing whiteout from sticky dir ++ * CAP_FSETID for chmod of opaque dir ++ * CAP_CHOWN for chown of opaque dir ++ */ ++ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); ++ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); ++ cap_raise(override_cred->cap_effective, CAP_FOWNER); ++ cap_raise(override_cred->cap_effective, CAP_FSETID); ++ cap_raise(override_cred->cap_effective, CAP_CHOWN); ++ old_cred = override_creds(override_cred); ++ ++ err = ovl_remove_and_whiteout(dentry, type, is_dir); ++ ++ revert_creds(old_cred); ++ put_cred(override_cred); ++ } ++out_drop_write: ++ ovl_drop_write(dentry); ++out: ++ return err; ++} ++ ++static int ovl_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ return ovl_do_remove(dentry, false); ++} ++ ++static int ovl_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ return ovl_do_remove(dentry, true); ++} ++ ++static int ovl_rename2(struct inode *olddir, struct dentry *old, ++ struct inode *newdir, struct dentry *new, ++ unsigned int flags) ++{ ++ int err; ++ enum ovl_path_type old_type; ++ enum ovl_path_type new_type; ++ struct dentry *old_upperdir; ++ struct dentry *new_upperdir; ++ struct dentry *olddentry; ++ struct dentry *newdentry; ++ struct dentry *trap; ++ bool old_opaque; ++ bool new_opaque; ++ bool new_create = false; ++ bool cleanup_whiteout = false; ++ bool overwrite = !(flags & RENAME_EXCHANGE); ++ bool is_dir = S_ISDIR(old->d_inode->i_mode); ++ bool new_is_dir = false; ++ struct dentry *opaquedir = NULL; ++ const struct cred *old_cred = NULL; ++ struct cred *override_cred = NULL; ++ ++ err = -EINVAL; ++ if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) ++ goto out; ++ ++ flags &= ~RENAME_NOREPLACE; ++ ++ err = ovl_check_sticky(old); ++ if (err) ++ goto out; ++ ++ /* Don't copy up directory trees */ ++ old_type = ovl_path_type(old); ++ err = -EXDEV; ++ if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir) ++ goto out; ++ ++ if (new->d_inode) { ++ err = ovl_check_sticky(new); ++ if (err) ++ goto out; ++ ++ if (S_ISDIR(new->d_inode->i_mode)) ++ new_is_dir = true; ++ ++ new_type = ovl_path_type(new); ++ err = -EXDEV; ++ if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) ++ goto out; ++ ++ err = 0; ++ if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { ++ if (ovl_dentry_lower(old)->d_inode == ++ ovl_dentry_lower(new)->d_inode) ++ goto out; ++ } ++ if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { ++ if (ovl_dentry_upper(old)->d_inode == ++ ovl_dentry_upper(new)->d_inode) ++ goto out; ++ } ++ } else { ++ if (ovl_dentry_is_opaque(new)) ++ new_type = OVL_PATH_UPPER; ++ else ++ new_type = OVL_PATH_PURE_UPPER; ++ } ++ ++ err = ovl_want_write(old); ++ if (err) ++ goto out; ++ ++ err = ovl_copy_up(old); ++ if (err) ++ goto out_drop_write; ++ ++ err = ovl_copy_up(new->d_parent); ++ if (err) ++ goto out_drop_write; ++ if (!overwrite) { ++ err = ovl_copy_up(new); ++ if (err) ++ goto out_drop_write; ++ } ++ ++ old_opaque = old_type != OVL_PATH_PURE_UPPER; ++ new_opaque = new_type != OVL_PATH_PURE_UPPER; ++ ++ if (old_opaque || new_opaque) { ++ err = -ENOMEM; ++ override_cred = prepare_creds(); ++ if (!override_cred) ++ goto out_drop_write; ++ ++ /* ++ * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir ++ * CAP_DAC_OVERRIDE for create in workdir ++ * CAP_FOWNER for removing whiteout from sticky dir ++ * CAP_FSETID for chmod of opaque dir ++ * CAP_CHOWN for chown of opaque dir ++ */ ++ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); ++ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); ++ cap_raise(override_cred->cap_effective, CAP_FOWNER); ++ cap_raise(override_cred->cap_effective, CAP_FSETID); ++ cap_raise(override_cred->cap_effective, CAP_CHOWN); ++ old_cred = override_creds(override_cred); ++ } ++ ++ if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { ++ opaquedir = ovl_check_empty_and_clear(new, new_type); ++ err = PTR_ERR(opaquedir); ++ if (IS_ERR(opaquedir)) { ++ opaquedir = NULL; ++ goto out_revert_creds; ++ } ++ } ++ ++ if (overwrite) { ++ if (old_opaque) { ++ if (new->d_inode || !new_opaque) { ++ /* Whiteout source */ ++ flags |= RENAME_WHITEOUT; ++ } else { ++ /* Switch whiteouts */ ++ flags |= RENAME_EXCHANGE; ++ } ++ } else if (is_dir && !new->d_inode && new_opaque) { ++ flags |= RENAME_EXCHANGE; ++ cleanup_whiteout = true; ++ } ++ } ++ ++ old_upperdir = ovl_dentry_upper(old->d_parent); ++ new_upperdir = ovl_dentry_upper(new->d_parent); ++ ++ trap = lock_rename(new_upperdir, old_upperdir); ++ ++ olddentry = ovl_dentry_upper(old); ++ newdentry = ovl_dentry_upper(new); ++ if (newdentry) { ++ if (opaquedir) { ++ newdentry = opaquedir; ++ opaquedir = NULL; ++ } else { ++ dget(newdentry); ++ } ++ } else { ++ new_create = true; ++ newdentry = lookup_one_len(new->d_name.name, new_upperdir, ++ new->d_name.len); ++ err = PTR_ERR(newdentry); ++ if (IS_ERR(newdentry)) ++ goto out_unlock; ++ } ++ ++ err = -ESTALE; ++ if (olddentry->d_parent != old_upperdir) ++ goto out_dput; ++ if (newdentry->d_parent != new_upperdir) ++ goto out_dput; ++ if (olddentry == trap) ++ goto out_dput; ++ if (newdentry == trap) ++ goto out_dput; ++ ++ if (is_dir && !old_opaque && new_opaque) { ++ err = ovl_set_opaque(olddentry); ++ if (err) ++ goto out_dput; ++ } ++ if (!overwrite && new_is_dir && old_opaque && !new_opaque) { ++ err = ovl_set_opaque(newdentry); ++ if (err) ++ goto out_dput; ++ } ++ ++ if (old_opaque || new_opaque) { ++ err = ovl_do_rename(old_upperdir->d_inode, olddentry, ++ new_upperdir->d_inode, newdentry, ++ flags); ++ } else { ++ /* No debug for the plain case */ ++ BUG_ON(flags & ~RENAME_EXCHANGE); ++ err = vfs_rename(old_upperdir->d_inode, olddentry, ++ new_upperdir->d_inode, newdentry, ++ NULL, flags); ++ } ++ ++ if (err) { ++ if (is_dir && !old_opaque && new_opaque) ++ ovl_remove_opaque(olddentry); ++ if (!overwrite && new_is_dir && old_opaque && !new_opaque) ++ ovl_remove_opaque(newdentry); ++ goto out_dput; ++ } ++ ++ if (is_dir && old_opaque && !new_opaque) ++ ovl_remove_opaque(olddentry); ++ if (!overwrite && new_is_dir && !old_opaque && new_opaque) ++ ovl_remove_opaque(newdentry); ++ ++ if (old_opaque != new_opaque) { ++ ovl_dentry_set_opaque(old, new_opaque); ++ if (!overwrite) ++ ovl_dentry_set_opaque(new, old_opaque); ++ } ++ ++ if (cleanup_whiteout) ++ ovl_cleanup(old_upperdir->d_inode, newdentry); ++ ++ ovl_dentry_version_inc(old->d_parent); ++ ovl_dentry_version_inc(new->d_parent); ++ ++out_dput: ++ dput(newdentry); ++out_unlock: ++ unlock_rename(new_upperdir, old_upperdir); ++out_revert_creds: ++ if (old_opaque || new_opaque) { ++ revert_creds(old_cred); ++ put_cred(override_cred); ++ } ++out_drop_write: ++ ovl_drop_write(old); ++out: ++ dput(opaquedir); ++ return err; ++} ++ ++const struct inode_operations ovl_dir_inode_operations = { ++ .lookup = ovl_lookup, ++ .mkdir = ovl_mkdir, ++ .symlink = ovl_symlink, ++ .unlink = ovl_unlink, ++ .rmdir = ovl_rmdir, ++ .rename2 = ovl_rename2, ++ .link = ovl_link, ++ .setattr = ovl_setattr, ++ .create = ovl_create, ++ .mknod = ovl_mknod, ++ .permission = ovl_permission, ++ .getattr = ovl_dir_getattr, ++ .setxattr = ovl_setxattr, ++ .getxattr = ovl_getxattr, ++ .listxattr = ovl_listxattr, ++ .removexattr = ovl_removexattr, ++}; +diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c +new file mode 100644 +index 0000000..af2d18c +--- /dev/null ++++ b/fs/overlayfs/inode.c +@@ -0,0 +1,425 @@ ++/* ++ * ++ * Copyright (C) 2011 Novell Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include "overlayfs.h" ++ ++static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, ++ bool no_data) ++{ ++ int err; ++ struct dentry *parent; ++ struct kstat stat; ++ struct path lowerpath; ++ ++ parent = dget_parent(dentry); ++ err = ovl_copy_up(parent); ++ if (err) ++ goto out_dput_parent; ++ ++ ovl_path_lower(dentry, &lowerpath); ++ err = vfs_getattr(&lowerpath, &stat); ++ if (err) ++ goto out_dput_parent; ++ ++ if (no_data) ++ stat.size = 0; ++ ++ err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); ++ ++out_dput_parent: ++ dput(parent); ++ return err; ++} ++ ++int ovl_setattr(struct dentry *dentry, struct iattr *attr) ++{ ++ int err; ++ struct dentry *upperdentry; ++ ++ err = ovl_want_write(dentry); ++ if (err) ++ goto out; ++ ++ upperdentry = ovl_dentry_upper(dentry); ++ if (upperdentry) { ++ mutex_lock(&upperdentry->d_inode->i_mutex); ++ err = notify_change(upperdentry, attr, NULL); ++ mutex_unlock(&upperdentry->d_inode->i_mutex); ++ } else { ++ err = ovl_copy_up_last(dentry, attr, false); ++ } ++ ovl_drop_write(dentry); ++out: ++ return err; ++} ++ ++static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat) ++{ ++ struct path realpath; ++ ++ ovl_path_real(dentry, &realpath); ++ return vfs_getattr(&realpath, stat); ++} ++ ++int ovl_permission(struct inode *inode, int mask) ++{ ++ struct ovl_entry *oe; ++ struct dentry *alias = NULL; ++ struct inode *realinode; ++ struct dentry *realdentry; ++ bool is_upper; ++ int err; ++ ++ if (S_ISDIR(inode->i_mode)) { ++ oe = inode->i_private; ++ } else if (mask & MAY_NOT_BLOCK) { ++ return -ECHILD; ++ } else { ++ /* ++ * For non-directories find an alias and get the info ++ * from there. ++ */ ++ alias = d_find_any_alias(inode); ++ if (WARN_ON(!alias)) ++ return -ENOENT; ++ ++ oe = alias->d_fsdata; ++ } ++ ++ realdentry = ovl_entry_real(oe, &is_upper); ++ ++ /* Careful in RCU walk mode */ ++ realinode = ACCESS_ONCE(realdentry->d_inode); ++ if (!realinode) { ++ WARN_ON(!(mask & MAY_NOT_BLOCK)); ++ err = -ENOENT; ++ goto out_dput; ++ } ++ ++ if (mask & MAY_WRITE) { ++ umode_t mode = realinode->i_mode; ++ ++ /* ++ * Writes will always be redirected to upper layer, so ++ * ignore lower layer being read-only. ++ * ++ * If the overlay itself is read-only then proceed ++ * with the permission check, don't return EROFS. ++ * This will only happen if this is the lower layer of ++ * another overlayfs. ++ * ++ * If upper fs becomes read-only after the overlay was ++ * constructed return EROFS to prevent modification of ++ * upper layer. ++ */ ++ err = -EROFS; ++ if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && ++ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) ++ goto out_dput; ++ } ++ ++ err = __inode_permission(realinode, mask); ++out_dput: ++ dput(alias); ++ return err; ++} ++ ++ ++struct ovl_link_data { ++ struct dentry *realdentry; ++ void *cookie; ++}; ++ ++static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ void *ret; ++ struct dentry *realdentry; ++ struct inode *realinode; ++ ++ realdentry = ovl_dentry_real(dentry); ++ realinode = realdentry->d_inode; ++ ++ if (WARN_ON(!realinode->i_op->follow_link)) ++ return ERR_PTR(-EPERM); ++ ++ ret = realinode->i_op->follow_link(realdentry, nd); ++ if (IS_ERR(ret)) ++ return ret; ++ ++ if (realinode->i_op->put_link) { ++ struct ovl_link_data *data; ++ ++ data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); ++ if (!data) { ++ realinode->i_op->put_link(realdentry, nd, ret); ++ return ERR_PTR(-ENOMEM); ++ } ++ data->realdentry = realdentry; ++ data->cookie = ret; ++ ++ return data; ++ } else { ++ return NULL; ++ } ++} ++ ++static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) ++{ ++ struct inode *realinode; ++ struct ovl_link_data *data = c; ++ ++ if (!data) ++ return; ++ ++ realinode = data->realdentry->d_inode; ++ realinode->i_op->put_link(data->realdentry, nd, data->cookie); ++ kfree(data); ++} ++ ++static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) ++{ ++ struct path realpath; ++ struct inode *realinode; ++ ++ ovl_path_real(dentry, &realpath); ++ realinode = realpath.dentry->d_inode; ++ ++ if (!realinode->i_op->readlink) ++ return -EINVAL; ++ ++ touch_atime(&realpath); ++ ++ return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); ++} ++ ++ ++static bool ovl_is_private_xattr(const char *name) ++{ ++ return strncmp(name, "trusted.overlay.", 14) == 0; ++} ++ ++int ovl_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ int err; ++ struct dentry *upperdentry; ++ ++ err = ovl_want_write(dentry); ++ if (err) ++ goto out; ++ ++ err = -EPERM; ++ if (ovl_is_private_xattr(name)) ++ goto out_drop_write; ++ ++ err = ovl_copy_up(dentry); ++ if (err) ++ goto out_drop_write; ++ ++ upperdentry = ovl_dentry_upper(dentry); ++ err = vfs_setxattr(upperdentry, name, value, size, flags); ++ ++out_drop_write: ++ ovl_drop_write(dentry); ++out: ++ return err; ++} ++ ++ssize_t ovl_getxattr(struct dentry *dentry, const char *name, ++ void *value, size_t size) ++{ ++ if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && ++ ovl_is_private_xattr(name)) ++ return -ENODATA; ++ ++ return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); ++} ++ ++ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) ++{ ++ ssize_t res; ++ int off; ++ ++ res = vfs_listxattr(ovl_dentry_real(dentry), list, size); ++ if (res <= 0 || size == 0) ++ return res; ++ ++ if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) ++ return res; ++ ++ /* filter out private xattrs */ ++ for (off = 0; off < res;) { ++ char *s = list + off; ++ size_t slen = strlen(s) + 1; ++ ++ BUG_ON(off + slen > res); ++ ++ if (ovl_is_private_xattr(s)) { ++ res -= slen; ++ memmove(s, s + slen, res - off); ++ } else { ++ off += slen; ++ } ++ } ++ ++ return res; ++} ++ ++int ovl_removexattr(struct dentry *dentry, const char *name) ++{ ++ int err; ++ struct path realpath; ++ enum ovl_path_type type; ++ ++ err = ovl_want_write(dentry); ++ if (err) ++ goto out; ++ ++ if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && ++ ovl_is_private_xattr(name)) ++ goto out_drop_write; ++ ++ type = ovl_path_real(dentry, &realpath); ++ if (type == OVL_PATH_LOWER) { ++ err = vfs_getxattr(realpath.dentry, name, NULL, 0); ++ if (err < 0) ++ goto out_drop_write; ++ ++ err = ovl_copy_up(dentry); ++ if (err) ++ goto out_drop_write; ++ ++ ovl_path_upper(dentry, &realpath); ++ } ++ ++ err = vfs_removexattr(realpath.dentry, name); ++out_drop_write: ++ ovl_drop_write(dentry); ++out: ++ return err; ++} ++ ++static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, ++ struct dentry *realdentry) ++{ ++ if (type != OVL_PATH_LOWER) ++ return false; ++ ++ if (special_file(realdentry->d_inode->i_mode)) ++ return false; ++ ++ if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) ++ return false; ++ ++ return true; ++} ++ ++static int ovl_dentry_open(struct dentry *dentry, struct file *file, ++ const struct cred *cred) ++{ ++ int err; ++ struct path realpath; ++ enum ovl_path_type type; ++ bool want_write = false; ++ ++ type = ovl_path_real(dentry, &realpath); ++ if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { ++ want_write = true; ++ err = ovl_want_write(dentry); ++ if (err) ++ goto out; ++ ++ if (file->f_flags & O_TRUNC) ++ err = ovl_copy_up_last(dentry, NULL, true); ++ else ++ err = ovl_copy_up(dentry); ++ if (err) ++ goto out_drop_write; ++ ++ ovl_path_upper(dentry, &realpath); ++ } ++ ++ err = vfs_open(&realpath, file, cred); ++out_drop_write: ++ if (want_write) ++ ovl_drop_write(dentry); ++out: ++ return err; ++} ++ ++static const struct inode_operations ovl_file_inode_operations = { ++ .setattr = ovl_setattr, ++ .permission = ovl_permission, ++ .getattr = ovl_getattr, ++ .setxattr = ovl_setxattr, ++ .getxattr = ovl_getxattr, ++ .listxattr = ovl_listxattr, ++ .removexattr = ovl_removexattr, ++ .dentry_open = ovl_dentry_open, ++}; ++ ++static const struct inode_operations ovl_symlink_inode_operations = { ++ .setattr = ovl_setattr, ++ .follow_link = ovl_follow_link, ++ .put_link = ovl_put_link, ++ .readlink = ovl_readlink, ++ .getattr = ovl_getattr, ++ .setxattr = ovl_setxattr, ++ .getxattr = ovl_getxattr, ++ .listxattr = ovl_listxattr, ++ .removexattr = ovl_removexattr, ++}; ++ ++struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, ++ struct ovl_entry *oe) ++{ ++ struct inode *inode; ++ ++ inode = new_inode(sb); ++ if (!inode) ++ return NULL; ++ ++ mode &= S_IFMT; ++ ++ inode->i_ino = get_next_ino(); ++ inode->i_mode = mode; ++ inode->i_flags |= S_NOATIME | S_NOCMTIME; ++ ++ switch (mode) { ++ case S_IFDIR: ++ inode->i_private = oe; ++ inode->i_op = &ovl_dir_inode_operations; ++ inode->i_fop = &ovl_dir_operations; ++ break; ++ ++ case S_IFLNK: ++ inode->i_op = &ovl_symlink_inode_operations; ++ break; ++ ++ case S_IFREG: ++ case S_IFSOCK: ++ case S_IFBLK: ++ case S_IFCHR: ++ case S_IFIFO: ++ inode->i_op = &ovl_file_inode_operations; ++ break; ++ ++ default: ++ WARN(1, "illegal file type: %i\n", mode); ++ iput(inode); ++ inode = NULL; ++ } ++ ++ return inode; ++ ++} +diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h +new file mode 100644 +index 0000000..814bed3 +--- /dev/null ++++ b/fs/overlayfs/overlayfs.h +@@ -0,0 +1,191 @@ ++/* ++ * ++ * Copyright (C) 2011 Novell Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ */ ++ ++#include ++ ++struct ovl_entry; ++ ++enum ovl_path_type { ++ OVL_PATH_PURE_UPPER, ++ OVL_PATH_UPPER, ++ OVL_PATH_MERGE, ++ OVL_PATH_LOWER, ++}; ++ ++extern const char *ovl_opaque_xattr; ++ ++static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ int err = vfs_rmdir(dir, dentry); ++ pr_debug("rmdir(%pd2) = %i\n", dentry, err); ++ return err; ++} ++ ++static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ int err = vfs_unlink(dir, dentry, NULL); ++ pr_debug("unlink(%pd2) = %i\n", dentry, err); ++ return err; ++} ++ ++static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir, ++ struct dentry *new_dentry, bool debug) ++{ ++ int err = vfs_link(old_dentry, dir, new_dentry, NULL); ++ if (debug) { ++ pr_debug("link(%pd2, %pd2) = %i\n", ++ old_dentry, new_dentry, err); ++ } ++ return err; ++} ++ ++static inline int ovl_do_create(struct inode *dir, struct dentry *dentry, ++ umode_t mode, bool debug) ++{ ++ int err = vfs_create(dir, dentry, mode, true); ++ if (debug) ++ pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); ++ return err; ++} ++ ++static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry, ++ umode_t mode, bool debug) ++{ ++ int err = vfs_mkdir(dir, dentry, mode); ++ if (debug) ++ pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); ++ return err; ++} ++ ++static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry, ++ umode_t mode, dev_t dev, bool debug) ++{ ++ int err = vfs_mknod(dir, dentry, mode, dev); ++ if (debug) { ++ pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", ++ dentry, mode, dev, err); ++ } ++ return err; ++} ++ ++static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry, ++ const char *oldname, bool debug) ++{ ++ int err = vfs_symlink(dir, dentry, oldname); ++ if (debug) ++ pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); ++ return err; ++} ++ ++static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ int err = vfs_setxattr(dentry, name, value, size, flags); ++ pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", ++ dentry, name, (int) size, (char *) value, flags, err); ++ return err; ++} ++ ++static inline int ovl_do_removexattr(struct dentry *dentry, const char *name) ++{ ++ int err = vfs_removexattr(dentry, name); ++ pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); ++ return err; ++} ++ ++static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, ++ struct inode *newdir, struct dentry *newdentry, ++ unsigned int flags) ++{ ++ int err; ++ ++ pr_debug("rename2(%pd2, %pd2, 0x%x)\n", ++ olddentry, newdentry, flags); ++ ++ err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); ++ ++ if (err) { ++ pr_debug("...rename2(%pd2, %pd2, ...) = %i\n", ++ olddentry, newdentry, err); ++ } ++ return err; ++} ++ ++static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) ++{ ++ int err = vfs_whiteout(dir, dentry); ++ pr_debug("whiteout(%pd2) = %i\n", dentry, err); ++ return err; ++} ++ ++enum ovl_path_type ovl_path_type(struct dentry *dentry); ++u64 ovl_dentry_version_get(struct dentry *dentry); ++void ovl_dentry_version_inc(struct dentry *dentry); ++void ovl_path_upper(struct dentry *dentry, struct path *path); ++void ovl_path_lower(struct dentry *dentry, struct path *path); ++enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); ++struct dentry *ovl_dentry_upper(struct dentry *dentry); ++struct dentry *ovl_dentry_lower(struct dentry *dentry); ++struct dentry *ovl_dentry_real(struct dentry *dentry); ++struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); ++struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); ++void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); ++struct dentry *ovl_workdir(struct dentry *dentry); ++int ovl_want_write(struct dentry *dentry); ++void ovl_drop_write(struct dentry *dentry); ++bool ovl_dentry_is_opaque(struct dentry *dentry); ++void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); ++bool ovl_is_whiteout(struct dentry *dentry); ++void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); ++struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, ++ unsigned int flags); ++struct file *ovl_path_open(struct path *path, int flags); ++ ++struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, ++ struct kstat *stat, const char *link); ++ ++/* readdir.c */ ++extern const struct file_operations ovl_dir_operations; ++int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); ++void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); ++void ovl_cache_free(struct list_head *list); ++ ++/* inode.c */ ++int ovl_setattr(struct dentry *dentry, struct iattr *attr); ++int ovl_permission(struct inode *inode, int mask); ++int ovl_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags); ++ssize_t ovl_getxattr(struct dentry *dentry, const char *name, ++ void *value, size_t size); ++ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); ++int ovl_removexattr(struct dentry *dentry, const char *name); ++ ++struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, ++ struct ovl_entry *oe); ++static inline void ovl_copyattr(struct inode *from, struct inode *to) ++{ ++ to->i_uid = from->i_uid; ++ to->i_gid = from->i_gid; ++} ++ ++/* dir.c */ ++extern const struct inode_operations ovl_dir_inode_operations; ++struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); ++int ovl_create_real(struct inode *dir, struct dentry *newdentry, ++ struct kstat *stat, const char *link, ++ struct dentry *hardlink, bool debug); ++void ovl_cleanup(struct inode *dir, struct dentry *dentry); ++ ++/* copy_up.c */ ++int ovl_copy_up(struct dentry *dentry); ++int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, ++ struct path *lowerpath, struct kstat *stat, ++ struct iattr *attr); ++int ovl_copy_xattr(struct dentry *old, struct dentry *new); ++int ovl_set_attr(struct dentry *upper, struct kstat *stat); +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +new file mode 100644 +index 0000000..c6787f8 +--- /dev/null ++++ b/fs/overlayfs/readdir.c +@@ -0,0 +1,587 @@ ++/* ++ * ++ * Copyright (C) 2011 Novell Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "overlayfs.h" ++ ++struct ovl_cache_entry { ++ const char *name; ++ unsigned int len; ++ unsigned int type; ++ u64 ino; ++ bool is_whiteout; ++ struct list_head l_node; ++ struct rb_node node; ++}; ++ ++struct ovl_dir_cache { ++ long refcount; ++ u64 version; ++ struct list_head entries; ++}; ++ ++struct ovl_readdir_data { ++ struct dir_context ctx; ++ bool is_merge; ++ struct rb_root *root; ++ struct list_head *list; ++ struct list_head *middle; ++ int count; ++ int err; ++}; ++ ++struct ovl_dir_file { ++ bool is_real; ++ bool is_upper; ++ struct ovl_dir_cache *cache; ++ struct ovl_cache_entry cursor; ++ struct file *realfile; ++ struct file *upperfile; ++}; ++ ++static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) ++{ ++ return container_of(n, struct ovl_cache_entry, node); ++} ++ ++static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, ++ const char *name, int len) ++{ ++ struct rb_node *node = root->rb_node; ++ int cmp; ++ ++ while (node) { ++ struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); ++ ++ cmp = strncmp(name, p->name, len); ++ if (cmp > 0) ++ node = p->node.rb_right; ++ else if (cmp < 0 || len < p->len) ++ node = p->node.rb_left; ++ else ++ return p; ++ } ++ ++ return NULL; ++} ++ ++static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, ++ u64 ino, unsigned int d_type) ++{ ++ struct ovl_cache_entry *p; ++ ++ p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL); ++ if (p) { ++ char *name_copy = (char *) (p + 1); ++ memcpy(name_copy, name, len); ++ name_copy[len] = '\0'; ++ p->name = name_copy; ++ p->len = len; ++ p->type = d_type; ++ p->ino = ino; ++ p->is_whiteout = false; ++ } ++ ++ return p; ++} ++ ++static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, ++ const char *name, int len, u64 ino, ++ unsigned int d_type) ++{ ++ struct rb_node **newp = &rdd->root->rb_node; ++ struct rb_node *parent = NULL; ++ struct ovl_cache_entry *p; ++ ++ while (*newp) { ++ int cmp; ++ struct ovl_cache_entry *tmp; ++ ++ parent = *newp; ++ tmp = ovl_cache_entry_from_node(*newp); ++ cmp = strncmp(name, tmp->name, len); ++ if (cmp > 0) ++ newp = &tmp->node.rb_right; ++ else if (cmp < 0 || len < tmp->len) ++ newp = &tmp->node.rb_left; ++ else ++ return 0; ++ } ++ ++ p = ovl_cache_entry_new(name, len, ino, d_type); ++ if (p == NULL) ++ return -ENOMEM; ++ ++ list_add_tail(&p->l_node, rdd->list); ++ rb_link_node(&p->node, parent, newp); ++ rb_insert_color(&p->node, rdd->root); ++ ++ return 0; ++} ++ ++static int ovl_fill_lower(struct ovl_readdir_data *rdd, ++ const char *name, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) ++{ ++ struct ovl_cache_entry *p; ++ ++ p = ovl_cache_entry_find(rdd->root, name, namelen); ++ if (p) { ++ list_move_tail(&p->l_node, rdd->middle); ++ } else { ++ p = ovl_cache_entry_new(name, namelen, ino, d_type); ++ if (p == NULL) ++ rdd->err = -ENOMEM; ++ else ++ list_add_tail(&p->l_node, rdd->middle); ++ } ++ ++ return rdd->err; ++} ++ ++void ovl_cache_free(struct list_head *list) ++{ ++ struct ovl_cache_entry *p; ++ struct ovl_cache_entry *n; ++ ++ list_for_each_entry_safe(p, n, list, l_node) ++ kfree(p); ++ ++ INIT_LIST_HEAD(list); ++} ++ ++static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) ++{ ++ struct ovl_dir_cache *cache = od->cache; ++ ++ list_del(&od->cursor.l_node); ++ WARN_ON(cache->refcount <= 0); ++ cache->refcount--; ++ if (!cache->refcount) { ++ if (ovl_dir_cache(dentry) == cache) ++ ovl_set_dir_cache(dentry, NULL); ++ ++ ovl_cache_free(&cache->entries); ++ kfree(cache); ++ } ++} ++ ++static int ovl_fill_merge(void *buf, const char *name, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) ++{ ++ struct ovl_readdir_data *rdd = buf; ++ ++ rdd->count++; ++ if (!rdd->is_merge) ++ return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); ++ else ++ return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); ++} ++ ++static inline int ovl_dir_read(struct path *realpath, ++ struct ovl_readdir_data *rdd) ++{ ++ struct file *realfile; ++ int err; ++ ++ realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); ++ if (IS_ERR(realfile)) ++ return PTR_ERR(realfile); ++ ++ rdd->ctx.pos = 0; ++ do { ++ rdd->count = 0; ++ rdd->err = 0; ++ err = iterate_dir(realfile, &rdd->ctx); ++ if (err >= 0) ++ err = rdd->err; ++ } while (!err && rdd->count); ++ fput(realfile); ++ ++ return err; ++} ++ ++static void ovl_dir_reset(struct file *file) ++{ ++ struct ovl_dir_file *od = file->private_data; ++ struct ovl_dir_cache *cache = od->cache; ++ struct dentry *dentry = file->f_path.dentry; ++ enum ovl_path_type type = ovl_path_type(dentry); ++ ++ if (cache && ovl_dentry_version_get(dentry) != cache->version) { ++ ovl_cache_put(od, dentry); ++ od->cache = NULL; ++ } ++ WARN_ON(!od->is_real && type != OVL_PATH_MERGE); ++ if (od->is_real && type == OVL_PATH_MERGE) ++ od->is_real = false; ++} ++ ++static int ovl_dir_mark_whiteouts(struct dentry *dir, ++ struct ovl_readdir_data *rdd) ++{ ++ struct ovl_cache_entry *p; ++ struct dentry *dentry; ++ const struct cred *old_cred; ++ struct cred *override_cred; ++ ++ override_cred = prepare_creds(); ++ if (!override_cred) { ++ ovl_cache_free(rdd->list); ++ return -ENOMEM; ++ } ++ ++ /* ++ * CAP_DAC_OVERRIDE for lookup ++ */ ++ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); ++ old_cred = override_creds(override_cred); ++ ++ mutex_lock(&dir->d_inode->i_mutex); ++ list_for_each_entry(p, rdd->list, l_node) { ++ if (!p->name) ++ continue; ++ ++ if (p->type != DT_CHR) ++ continue; ++ ++ dentry = lookup_one_len(p->name, dir, p->len); ++ if (IS_ERR(dentry)) ++ continue; ++ ++ p->is_whiteout = ovl_is_whiteout(dentry); ++ dput(dentry); ++ } ++ mutex_unlock(&dir->d_inode->i_mutex); ++ ++ revert_creds(old_cred); ++ put_cred(override_cred); ++ ++ return 0; ++} ++ ++static inline int ovl_dir_read_merged(struct path *upperpath, ++ struct path *lowerpath, ++ struct list_head *list) ++{ ++ int err; ++ struct rb_root root = RB_ROOT; ++ struct list_head middle; ++ struct ovl_readdir_data rdd = { ++ .ctx.actor = ovl_fill_merge, ++ .list = list, ++ .root = &root, ++ .is_merge = false, ++ }; ++ ++ if (upperpath->dentry) { ++ err = ovl_dir_read(upperpath, &rdd); ++ if (err) ++ goto out; ++ ++ if (lowerpath->dentry) { ++ err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); ++ if (err) ++ goto out; ++ } ++ } ++ if (lowerpath->dentry) { ++ /* ++ * Insert lowerpath entries before upperpath ones, this allows ++ * offsets to be reasonably constant ++ */ ++ list_add(&middle, rdd.list); ++ rdd.middle = &middle; ++ rdd.is_merge = true; ++ err = ovl_dir_read(lowerpath, &rdd); ++ list_del(&middle); ++ } ++out: ++ return err; ++ ++} ++ ++static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) ++{ ++ struct ovl_cache_entry *p; ++ loff_t off = 0; ++ ++ list_for_each_entry(p, &od->cache->entries, l_node) { ++ if (!p->name) ++ continue; ++ if (off >= pos) ++ break; ++ off++; ++ } ++ list_move_tail(&od->cursor.l_node, &p->l_node); ++} ++ ++static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) ++{ ++ int res; ++ struct path lowerpath; ++ struct path upperpath; ++ struct ovl_dir_cache *cache; ++ ++ cache = ovl_dir_cache(dentry); ++ if (cache && ovl_dentry_version_get(dentry) == cache->version) { ++ cache->refcount++; ++ return cache; ++ } ++ ovl_set_dir_cache(dentry, NULL); ++ ++ cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); ++ if (!cache) ++ return ERR_PTR(-ENOMEM); ++ ++ cache->refcount = 1; ++ INIT_LIST_HEAD(&cache->entries); ++ ++ ovl_path_lower(dentry, &lowerpath); ++ ovl_path_upper(dentry, &upperpath); ++ ++ res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); ++ if (res) { ++ ovl_cache_free(&cache->entries); ++ kfree(cache); ++ return ERR_PTR(res); ++ } ++ ++ cache->version = ovl_dentry_version_get(dentry); ++ ovl_set_dir_cache(dentry, cache); ++ ++ return cache; ++} ++ ++static int ovl_iterate(struct file *file, struct dir_context *ctx) ++{ ++ struct ovl_dir_file *od = file->private_data; ++ struct dentry *dentry = file->f_path.dentry; ++ ++ if (!ctx->pos) ++ ovl_dir_reset(file); ++ ++ if (od->is_real) ++ return iterate_dir(od->realfile, ctx); ++ ++ if (!od->cache) { ++ struct ovl_dir_cache *cache; ++ ++ cache = ovl_cache_get(dentry); ++ if (IS_ERR(cache)) ++ return PTR_ERR(cache); ++ ++ od->cache = cache; ++ ovl_seek_cursor(od, ctx->pos); ++ } ++ ++ while (od->cursor.l_node.next != &od->cache->entries) { ++ struct ovl_cache_entry *p; ++ ++ p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); ++ /* Skip cursors */ ++ if (p->name) { ++ if (!p->is_whiteout) { ++ if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) ++ break; ++ } ++ ctx->pos++; ++ } ++ list_move(&od->cursor.l_node, &p->l_node); ++ } ++ return 0; ++} ++ ++static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) ++{ ++ loff_t res; ++ struct ovl_dir_file *od = file->private_data; ++ ++ mutex_lock(&file_inode(file)->i_mutex); ++ if (!file->f_pos) ++ ovl_dir_reset(file); ++ ++ if (od->is_real) { ++ res = vfs_llseek(od->realfile, offset, origin); ++ file->f_pos = od->realfile->f_pos; ++ } else { ++ res = -EINVAL; ++ ++ switch (origin) { ++ case SEEK_CUR: ++ offset += file->f_pos; ++ break; ++ case SEEK_SET: ++ break; ++ default: ++ goto out_unlock; ++ } ++ if (offset < 0) ++ goto out_unlock; ++ ++ if (offset != file->f_pos) { ++ file->f_pos = offset; ++ if (od->cache) ++ ovl_seek_cursor(od, offset); ++ } ++ res = offset; ++ } ++out_unlock: ++ mutex_unlock(&file_inode(file)->i_mutex); ++ ++ return res; ++} ++ ++static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, ++ int datasync) ++{ ++ struct ovl_dir_file *od = file->private_data; ++ struct dentry *dentry = file->f_path.dentry; ++ struct file *realfile = od->realfile; ++ ++ /* ++ * Need to check if we started out being a lower dir, but got copied up ++ */ ++ if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { ++ struct inode *inode = file_inode(file); ++ ++ mutex_lock(&inode->i_mutex); ++ realfile = od->upperfile; ++ if (!realfile) { ++ struct path upperpath; ++ ++ ovl_path_upper(dentry, &upperpath); ++ realfile = ovl_path_open(&upperpath, O_RDONLY); ++ if (IS_ERR(realfile)) { ++ mutex_unlock(&inode->i_mutex); ++ return PTR_ERR(realfile); ++ } ++ od->upperfile = realfile; ++ } ++ mutex_unlock(&inode->i_mutex); ++ } ++ ++ return vfs_fsync_range(realfile, start, end, datasync); ++} ++ ++static int ovl_dir_release(struct inode *inode, struct file *file) ++{ ++ struct ovl_dir_file *od = file->private_data; ++ ++ if (od->cache) { ++ mutex_lock(&inode->i_mutex); ++ ovl_cache_put(od, file->f_path.dentry); ++ mutex_unlock(&inode->i_mutex); ++ } ++ fput(od->realfile); ++ if (od->upperfile) ++ fput(od->upperfile); ++ kfree(od); ++ ++ return 0; ++} ++ ++static int ovl_dir_open(struct inode *inode, struct file *file) ++{ ++ struct path realpath; ++ struct file *realfile; ++ struct ovl_dir_file *od; ++ enum ovl_path_type type; ++ ++ od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); ++ if (!od) ++ return -ENOMEM; ++ ++ type = ovl_path_real(file->f_path.dentry, &realpath); ++ realfile = ovl_path_open(&realpath, file->f_flags); ++ if (IS_ERR(realfile)) { ++ kfree(od); ++ return PTR_ERR(realfile); ++ } ++ INIT_LIST_HEAD(&od->cursor.l_node); ++ od->realfile = realfile; ++ od->is_real = (type != OVL_PATH_MERGE); ++ od->is_upper = (type != OVL_PATH_LOWER); ++ file->private_data = od; ++ ++ return 0; ++} ++ ++const struct file_operations ovl_dir_operations = { ++ .read = generic_read_dir, ++ .open = ovl_dir_open, ++ .iterate = ovl_iterate, ++ .llseek = ovl_dir_llseek, ++ .fsync = ovl_dir_fsync, ++ .release = ovl_dir_release, ++}; ++ ++int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) ++{ ++ int err; ++ struct path lowerpath; ++ struct path upperpath; ++ struct ovl_cache_entry *p; ++ ++ ovl_path_upper(dentry, &upperpath); ++ ovl_path_lower(dentry, &lowerpath); ++ ++ err = ovl_dir_read_merged(&upperpath, &lowerpath, list); ++ if (err) ++ return err; ++ ++ err = 0; ++ ++ list_for_each_entry(p, list, l_node) { ++ if (p->is_whiteout) ++ continue; ++ ++ if (p->name[0] == '.') { ++ if (p->len == 1) ++ continue; ++ if (p->len == 2 && p->name[1] == '.') ++ continue; ++ } ++ err = -ENOTEMPTY; ++ break; ++ } ++ ++ return err; ++} ++ ++void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) ++{ ++ struct ovl_cache_entry *p; ++ ++ mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT); ++ list_for_each_entry(p, list, l_node) { ++ struct dentry *dentry; ++ ++ if (!p->is_whiteout) ++ continue; ++ ++ dentry = lookup_one_len(p->name, upper, p->len); ++ if (IS_ERR(dentry)) { ++ pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", ++ upper->d_name.name, p->len, p->name, ++ (int) PTR_ERR(dentry)); ++ continue; ++ } ++ ovl_cleanup(upper->d_inode, dentry); ++ dput(dentry); ++ } ++ mutex_unlock(&upper->d_inode->i_mutex); ++} +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +new file mode 100644 +index 0000000..227710a +--- /dev/null ++++ b/fs/overlayfs/super.c +@@ -0,0 +1,727 @@ ++/* ++ * ++ * Copyright (C) 2011 Novell Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "overlayfs.h" ++ ++MODULE_AUTHOR("Miklos Szeredi "); ++MODULE_DESCRIPTION("Overlay filesystem"); ++MODULE_LICENSE("GPL"); ++ ++/* private information held for overlayfs's superblock */ ++struct ovl_fs { ++ struct vfsmount *upper_mnt; ++ struct vfsmount *lower_mnt; ++ struct dentry *workdir; ++}; ++ ++struct ovl_dir_cache; ++ ++/* private information held for every overlayfs dentry */ ++struct ovl_entry { ++ struct dentry *__upperdentry; ++ struct dentry *lowerdentry; ++ struct ovl_dir_cache *cache; ++ union { ++ struct { ++ u64 version; ++ bool opaque; ++ }; ++ struct rcu_head rcu; ++ }; ++}; ++ ++const char *ovl_opaque_xattr = "trusted.overlay.opaque"; ++ ++ ++enum ovl_path_type ovl_path_type(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ if (oe->__upperdentry) { ++ if (oe->lowerdentry) { ++ if (S_ISDIR(dentry->d_inode->i_mode)) ++ return OVL_PATH_MERGE; ++ else ++ return OVL_PATH_UPPER; ++ } else { ++ if (oe->opaque) ++ return OVL_PATH_UPPER; ++ else ++ return OVL_PATH_PURE_UPPER; ++ } ++ } else { ++ return OVL_PATH_LOWER; ++ } ++} ++ ++static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) ++{ ++ struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); ++ /* ++ * Make sure to order reads to upperdentry wrt ovl_dentry_update() ++ */ ++ smp_read_barrier_depends(); ++ return upperdentry; ++} ++ ++void ovl_path_upper(struct dentry *dentry, struct path *path) ++{ ++ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ path->mnt = ofs->upper_mnt; ++ path->dentry = ovl_upperdentry_dereference(oe); ++} ++ ++enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) ++{ ++ ++ enum ovl_path_type type = ovl_path_type(dentry); ++ ++ if (type == OVL_PATH_LOWER) ++ ovl_path_lower(dentry, path); ++ else ++ ovl_path_upper(dentry, path); ++ ++ return type; ++} ++ ++struct dentry *ovl_dentry_upper(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ return ovl_upperdentry_dereference(oe); ++} ++ ++struct dentry *ovl_dentry_lower(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ return oe->lowerdentry; ++} ++ ++struct dentry *ovl_dentry_real(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ struct dentry *realdentry; ++ ++ realdentry = ovl_upperdentry_dereference(oe); ++ if (!realdentry) ++ realdentry = oe->lowerdentry; ++ ++ return realdentry; ++} ++ ++struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) ++{ ++ struct dentry *realdentry; ++ ++ realdentry = ovl_upperdentry_dereference(oe); ++ if (realdentry) { ++ *is_upper = true; ++ } else { ++ realdentry = oe->lowerdentry; ++ *is_upper = false; ++ } ++ return realdentry; ++} ++ ++struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ return oe->cache; ++} ++ ++void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ oe->cache = cache; ++} ++ ++void ovl_path_lower(struct dentry *dentry, struct path *path) ++{ ++ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ path->mnt = ofs->lower_mnt; ++ path->dentry = oe->lowerdentry; ++} ++ ++int ovl_want_write(struct dentry *dentry) ++{ ++ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; ++ return mnt_want_write(ofs->upper_mnt); ++} ++ ++void ovl_drop_write(struct dentry *dentry) ++{ ++ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; ++ mnt_drop_write(ofs->upper_mnt); ++} ++ ++struct dentry *ovl_workdir(struct dentry *dentry) ++{ ++ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; ++ return ofs->workdir; ++} ++ ++bool ovl_dentry_is_opaque(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ return oe->opaque; ++} ++ ++void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ oe->opaque = opaque; ++} ++ ++void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); ++ WARN_ON(oe->__upperdentry); ++ BUG_ON(!upperdentry->d_inode); ++ /* ++ * Make sure upperdentry is consistent before making it visible to ++ * ovl_upperdentry_dereference(). ++ */ ++ smp_wmb(); ++ oe->__upperdentry = upperdentry; ++} ++ ++void ovl_dentry_version_inc(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); ++ oe->version++; ++} ++ ++u64 ovl_dentry_version_get(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); ++ return oe->version; ++} ++ ++bool ovl_is_whiteout(struct dentry *dentry) ++{ ++ struct inode *inode = dentry->d_inode; ++ ++ return inode && IS_WHITEOUT(inode); ++} ++ ++static bool ovl_is_opaquedir(struct dentry *dentry) ++{ ++ int res; ++ char val; ++ struct inode *inode = dentry->d_inode; ++ ++ if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) ++ return false; ++ ++ res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1); ++ if (res == 1 && val == 'y') ++ return true; ++ ++ return false; ++} ++ ++static void ovl_dentry_release(struct dentry *dentry) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ if (oe) { ++ dput(oe->__upperdentry); ++ dput(oe->lowerdentry); ++ kfree_rcu(oe, rcu); ++ } ++} ++ ++static const struct dentry_operations ovl_dentry_operations = { ++ .d_release = ovl_dentry_release, ++}; ++ ++static struct ovl_entry *ovl_alloc_entry(void) ++{ ++ return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); ++} ++ ++static inline struct dentry *ovl_lookup_real(struct dentry *dir, ++ struct qstr *name) ++{ ++ struct dentry *dentry; ++ ++ mutex_lock(&dir->d_inode->i_mutex); ++ dentry = lookup_one_len(name->name, dir, name->len); ++ mutex_unlock(&dir->d_inode->i_mutex); ++ ++ if (IS_ERR(dentry)) { ++ if (PTR_ERR(dentry) == -ENOENT) ++ dentry = NULL; ++ } else if (!dentry->d_inode) { ++ dput(dentry); ++ dentry = NULL; ++ } ++ return dentry; ++} ++ ++struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, ++ unsigned int flags) ++{ ++ struct ovl_entry *oe; ++ struct dentry *upperdir; ++ struct dentry *lowerdir; ++ struct dentry *upperdentry = NULL; ++ struct dentry *lowerdentry = NULL; ++ struct inode *inode = NULL; ++ int err; ++ ++ err = -ENOMEM; ++ oe = ovl_alloc_entry(); ++ if (!oe) ++ goto out; ++ ++ upperdir = ovl_dentry_upper(dentry->d_parent); ++ lowerdir = ovl_dentry_lower(dentry->d_parent); ++ ++ if (upperdir) { ++ upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); ++ err = PTR_ERR(upperdentry); ++ if (IS_ERR(upperdentry)) ++ goto out_put_dir; ++ ++ if (lowerdir && upperdentry) { ++ if (ovl_is_whiteout(upperdentry)) { ++ dput(upperdentry); ++ upperdentry = NULL; ++ oe->opaque = true; ++ } else if (ovl_is_opaquedir(upperdentry)) { ++ oe->opaque = true; ++ } ++ } ++ } ++ if (lowerdir && !oe->opaque) { ++ lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); ++ err = PTR_ERR(lowerdentry); ++ if (IS_ERR(lowerdentry)) ++ goto out_dput_upper; ++ } ++ ++ if (lowerdentry && upperdentry && ++ (!S_ISDIR(upperdentry->d_inode->i_mode) || ++ !S_ISDIR(lowerdentry->d_inode->i_mode))) { ++ dput(lowerdentry); ++ lowerdentry = NULL; ++ oe->opaque = true; ++ } ++ ++ if (lowerdentry || upperdentry) { ++ struct dentry *realdentry; ++ ++ realdentry = upperdentry ? upperdentry : lowerdentry; ++ err = -ENOMEM; ++ inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, ++ oe); ++ if (!inode) ++ goto out_dput; ++ ovl_copyattr(realdentry->d_inode, inode); ++ } ++ ++ oe->__upperdentry = upperdentry; ++ oe->lowerdentry = lowerdentry; ++ ++ dentry->d_fsdata = oe; ++ d_add(dentry, inode); ++ ++ return NULL; ++ ++out_dput: ++ dput(lowerdentry); ++out_dput_upper: ++ dput(upperdentry); ++out_put_dir: ++ kfree(oe); ++out: ++ return ERR_PTR(err); ++} ++ ++struct file *ovl_path_open(struct path *path, int flags) ++{ ++ return dentry_open(path, flags, current_cred()); ++} ++ ++static void ovl_put_super(struct super_block *sb) ++{ ++ struct ovl_fs *ufs = sb->s_fs_info; ++ ++ dput(ufs->workdir); ++ mntput(ufs->upper_mnt); ++ mntput(ufs->lower_mnt); ++ ++ kfree(ufs); ++} ++ ++static const struct super_operations ovl_super_operations = { ++ .put_super = ovl_put_super, ++}; ++ ++struct ovl_config { ++ char *lowerdir; ++ char *upperdir; ++ char *workdir; ++}; ++ ++enum { ++ OPT_LOWERDIR, ++ OPT_UPPERDIR, ++ OPT_WORKDIR, ++ OPT_ERR, ++}; ++ ++static const match_table_t ovl_tokens = { ++ {OPT_LOWERDIR, "lowerdir=%s"}, ++ {OPT_UPPERDIR, "upperdir=%s"}, ++ {OPT_WORKDIR, "workdir=%s"}, ++ {OPT_ERR, NULL} ++}; ++ ++static int ovl_parse_opt(char *opt, struct ovl_config *config) ++{ ++ char *p; ++ ++ config->upperdir = NULL; ++ config->lowerdir = NULL; ++ config->workdir = NULL; ++ ++ while ((p = strsep(&opt, ",")) != NULL) { ++ int token; ++ substring_t args[MAX_OPT_ARGS]; ++ ++ if (!*p) ++ continue; ++ ++ token = match_token(p, ovl_tokens, args); ++ switch (token) { ++ case OPT_UPPERDIR: ++ kfree(config->upperdir); ++ config->upperdir = match_strdup(&args[0]); ++ if (!config->upperdir) ++ return -ENOMEM; ++ break; ++ ++ case OPT_LOWERDIR: ++ kfree(config->lowerdir); ++ config->lowerdir = match_strdup(&args[0]); ++ if (!config->lowerdir) ++ return -ENOMEM; ++ break; ++ ++ case OPT_WORKDIR: ++ kfree(config->workdir); ++ config->workdir = match_strdup(&args[0]); ++ if (!config->workdir) ++ return -ENOMEM; ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ } ++ return 0; ++} ++ ++#define OVL_WORKDIR_NAME "work" ++ ++static struct dentry *ovl_workdir_create(struct vfsmount *mnt, ++ struct dentry *dentry) ++{ ++ struct inode *dir = dentry->d_inode; ++ struct dentry *work; ++ int err; ++ bool retried = false; ++ ++ err = mnt_want_write(mnt); ++ if (err) ++ return ERR_PTR(err); ++ ++ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); ++retry: ++ work = lookup_one_len(OVL_WORKDIR_NAME, dentry, ++ strlen(OVL_WORKDIR_NAME)); ++ ++ if (!IS_ERR(work)) { ++ struct kstat stat = { ++ .mode = S_IFDIR | 0, ++ }; ++ ++ if (work->d_inode) { ++ err = -EEXIST; ++ if (retried) ++ goto out_dput; ++ ++ retried = true; ++ ovl_cleanup(dir, work); ++ dput(work); ++ goto retry; ++ } ++ ++ err = ovl_create_real(dir, work, &stat, NULL, NULL, true); ++ if (err) ++ goto out_dput; ++ } ++out_unlock: ++ mutex_unlock(&dir->i_mutex); ++ mnt_drop_write(mnt); ++ ++ return work; ++ ++out_dput: ++ dput(work); ++ work = ERR_PTR(err); ++ goto out_unlock; ++} ++ ++static int ovl_mount_dir(const char *name, struct path *path) ++{ ++ int err; ++ ++ err = kern_path(name, LOOKUP_FOLLOW, path); ++ if (err) { ++ pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); ++ err = -EINVAL; ++ } ++ return err; ++} ++ ++static bool ovl_is_allowed_fs_type(struct dentry *root) ++{ ++ const struct dentry_operations *dop = root->d_op; ++ ++ /* ++ * We don't support: ++ * - automount filesystems ++ * - filesystems with revalidate (FIXME for lower layer) ++ * - filesystems with case insensitive names ++ */ ++ if (dop && ++ (dop->d_manage || dop->d_automount || ++ dop->d_revalidate || dop->d_weak_revalidate || ++ dop->d_compare || dop->d_hash)) { ++ return false; ++ } ++ return true; ++} ++ ++/* Workdir should not be subdir of upperdir and vice versa */ ++static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) ++{ ++ bool ok = false; ++ ++ if (workdir != upperdir) { ++ ok = (lock_rename(workdir, upperdir) == NULL); ++ unlock_rename(workdir, upperdir); ++ } ++ return ok; ++} ++ ++static int ovl_fill_super(struct super_block *sb, void *data, int silent) ++{ ++ struct path lowerpath; ++ struct path upperpath; ++ struct path workpath; ++ struct inode *root_inode; ++ struct dentry *root_dentry; ++ struct ovl_entry *oe; ++ struct ovl_fs *ufs; ++ struct ovl_config config; ++ int err; ++ ++ err = ovl_parse_opt((char *) data, &config); ++ if (err) ++ goto out; ++ ++ /* FIXME: workdir is not needed for a R/O mount */ ++ err = -EINVAL; ++ if (!config.upperdir || !config.lowerdir || !config.workdir) { ++ pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); ++ goto out_free_config; ++ } ++ ++ err = -ENOMEM; ++ ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL); ++ if (!ufs) ++ goto out_free_config; ++ ++ oe = ovl_alloc_entry(); ++ if (oe == NULL) ++ goto out_free_ufs; ++ ++ err = ovl_mount_dir(config.upperdir, &upperpath); ++ if (err) ++ goto out_free_oe; ++ ++ err = ovl_mount_dir(config.lowerdir, &lowerpath); ++ if (err) ++ goto out_put_upperpath; ++ ++ err = ovl_mount_dir(config.workdir, &workpath); ++ if (err) ++ goto out_put_lowerpath; ++ ++ err = -EINVAL; ++ if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || ++ !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || ++ !S_ISDIR(workpath.dentry->d_inode->i_mode)) { ++ pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); ++ goto out_put_workpath; ++ } ++ ++ if (upperpath.mnt != workpath.mnt) { ++ pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); ++ goto out_put_workpath; ++ } ++ if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { ++ pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); ++ goto out_put_workpath; ++ } ++ ++ if (!ovl_is_allowed_fs_type(upperpath.dentry)) { ++ pr_err("overlayfs: filesystem of upperdir is not supported\n"); ++ goto out_put_workpath; ++ } ++ ++ if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { ++ pr_err("overlayfs: filesystem of lowerdir is not supported\n"); ++ goto out_put_workpath; ++ } ++ ++ ufs->upper_mnt = clone_private_mount(&upperpath); ++ err = PTR_ERR(ufs->upper_mnt); ++ if (IS_ERR(ufs->upper_mnt)) { ++ pr_err("overlayfs: failed to clone upperpath\n"); ++ goto out_put_workpath; ++ } ++ ++ ufs->lower_mnt = clone_private_mount(&lowerpath); ++ err = PTR_ERR(ufs->lower_mnt); ++ if (IS_ERR(ufs->lower_mnt)) { ++ pr_err("overlayfs: failed to clone lowerpath\n"); ++ goto out_put_upper_mnt; ++ } ++ ++ ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); ++ err = PTR_ERR(ufs->workdir); ++ if (IS_ERR(ufs->workdir)) { ++ pr_err("overlayfs: failed to create directory %s/%s\n", ++ config.workdir, OVL_WORKDIR_NAME); ++ goto out_put_lower_mnt; ++ } ++ ++ /* ++ * Make lower_mnt R/O. That way fchmod/fchown on lower file ++ * will fail instead of modifying lower fs. ++ */ ++ ufs->lower_mnt->mnt_flags |= MNT_READONLY; ++ ++ /* If the upper fs is r/o, we mark overlayfs r/o too */ ++ if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) ++ sb->s_flags |= MS_RDONLY; ++ ++ sb->s_d_op = &ovl_dentry_operations; ++ ++ err = -ENOMEM; ++ root_inode = ovl_new_inode(sb, S_IFDIR, oe); ++ if (!root_inode) ++ goto out_put_workdir; ++ ++ root_dentry = d_make_root(root_inode); ++ if (!root_dentry) ++ goto out_put_workdir; ++ ++ mntput(upperpath.mnt); ++ mntput(lowerpath.mnt); ++ path_put(&workpath); ++ ++ oe->__upperdentry = upperpath.dentry; ++ oe->lowerdentry = lowerpath.dentry; ++ ++ root_dentry->d_fsdata = oe; ++ ++ sb->s_op = &ovl_super_operations; ++ sb->s_root = root_dentry; ++ sb->s_fs_info = ufs; ++ ++ return 0; ++ ++out_put_workdir: ++ dput(ufs->workdir); ++out_put_lower_mnt: ++ mntput(ufs->lower_mnt); ++out_put_upper_mnt: ++ mntput(ufs->upper_mnt); ++out_put_workpath: ++ path_put(&workpath); ++out_put_lowerpath: ++ path_put(&lowerpath); ++out_put_upperpath: ++ path_put(&upperpath); ++out_free_oe: ++ kfree(oe); ++out_free_ufs: ++ kfree(ufs); ++out_free_config: ++ kfree(config.lowerdir); ++ kfree(config.upperdir); ++ kfree(config.workdir); ++out: ++ return err; ++} ++ ++static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, ++ const char *dev_name, void *raw_data) ++{ ++ return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); ++} ++ ++static struct file_system_type ovl_fs_type = { ++ .owner = THIS_MODULE, ++ .name = "overlayfs", ++ .mount = ovl_mount, ++ .kill_sb = kill_anon_super, ++}; ++MODULE_ALIAS_FS("overlayfs"); ++ ++static int __init ovl_init(void) ++{ ++ return register_filesystem(&ovl_fs_type); ++} ++ ++static void __exit ovl_exit(void) ++{ ++ unregister_filesystem(&ovl_fs_type); ++} ++ ++module_init(ovl_init); ++module_exit(ovl_exit); +-- +2.7.4 + + +From a38696f51b951dddbba71e7a3cabb47d9347fa06 Mon Sep 17 00:00:00 2001 +From: Andy Whitcroft +Date: Fri, 24 Oct 2014 00:14:38 +0200 +Subject: [PATCH 21/73] overlayfs: add statfs support + +Add support for statfs to the overlayfs filesystem. As the upper layer +is the target of all write operations assume that the space in that +filesystem is the space in the overlayfs. There will be some inaccuracy as +overwriting a file will copy it up and consume space we were not expecting, +but it is better than nothing. + +Use the upper layer dentry and mount from the overlayfs root inode, +passing the statfs call to that filesystem. + +Signed-off-by: Andy Whitcroft +Signed-off-by: Miklos Szeredi +(cherry picked from commit cc2596392af3b1404421aaef828a255303c46f93) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 227710a..aaf562b 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -16,17 +16,21 @@ + #include + #include + #include ++#include + #include "overlayfs.h" + + MODULE_AUTHOR("Miklos Szeredi "); + MODULE_DESCRIPTION("Overlay filesystem"); + MODULE_LICENSE("GPL"); + ++#define OVERLAYFS_SUPER_MAGIC 0x794c764f ++ + /* private information held for overlayfs's superblock */ + struct ovl_fs { + struct vfsmount *upper_mnt; + struct vfsmount *lower_mnt; + struct dentry *workdir; ++ long lower_namelen; + }; + + struct ovl_dir_cache; +@@ -383,8 +387,35 @@ static void ovl_put_super(struct super_block *sb) + kfree(ufs); + } + ++/** ++ * ovl_statfs ++ * @sb: The overlayfs super block ++ * @buf: The struct kstatfs to fill in with stats ++ * ++ * Get the filesystem statistics. As writes always target the upper layer ++ * filesystem pass the statfs to the same filesystem. ++ */ ++static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; ++ struct dentry *root_dentry = dentry->d_sb->s_root; ++ struct path path; ++ int err; ++ ++ ovl_path_upper(root_dentry, &path); ++ ++ err = vfs_statfs(&path, buf); ++ if (!err) { ++ buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); ++ buf->f_type = OVERLAYFS_SUPER_MAGIC; ++ } ++ ++ return err; ++} ++ + static const struct super_operations ovl_super_operations = { + .put_super = ovl_put_super, ++ .statfs = ovl_statfs, + }; + + struct ovl_config { +@@ -556,6 +587,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + struct ovl_entry *oe; + struct ovl_fs *ufs; + struct ovl_config config; ++ struct kstatfs statfs; + int err; + + err = ovl_parse_opt((char *) data, &config); +@@ -617,6 +649,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + goto out_put_workpath; + } + ++ err = vfs_statfs(&lowerpath, &statfs); ++ if (err) { ++ pr_err("overlayfs: statfs failed on lowerpath\n"); ++ goto out_put_workpath; ++ } ++ ufs->lower_namelen = statfs.f_namelen; ++ + ufs->upper_mnt = clone_private_mount(&upperpath); + err = PTR_ERR(ufs->upper_mnt); + if (IS_ERR(ufs->upper_mnt)) { +@@ -669,6 +708,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + + root_dentry->d_fsdata = oe; + ++ sb->s_magic = OVERLAYFS_SUPER_MAGIC; + sb->s_op = &ovl_super_operations; + sb->s_root = root_dentry; + sb->s_fs_info = ufs; +-- +2.7.4 + + +From 2428e59706e6221ccebeee9c79870988485e2b06 Mon Sep 17 00:00:00 2001 +From: Erez Zadok +Date: Fri, 24 Oct 2014 00:14:38 +0200 +Subject: [PATCH 22/73] overlayfs: implement show_options + +This is useful because of the stacking nature of overlayfs. Users like to +find out (via /proc/mounts) which lower/upper directory were used at mount +time. + +AV: even failing ovl_parse_opt() could've done some kstrdup() +AV: failure of ovl_alloc_entry() should end up with ENOMEM, not EINVAL + +Signed-off-by: Erez Zadok +Signed-off-by: Miklos Szeredi +(cherry picked from commit f45827e84186af152492c6d0dcf4105b4a605f9b) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 76 +++++++++++++++++++++++++++++++++------------------- + 1 file changed, 48 insertions(+), 28 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index aaf562b..7dcc24e 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include "overlayfs.h" + + MODULE_AUTHOR("Miklos Szeredi "); +@@ -25,12 +26,20 @@ MODULE_LICENSE("GPL"); + + #define OVERLAYFS_SUPER_MAGIC 0x794c764f + ++struct ovl_config { ++ char *lowerdir; ++ char *upperdir; ++ char *workdir; ++}; ++ + /* private information held for overlayfs's superblock */ + struct ovl_fs { + struct vfsmount *upper_mnt; + struct vfsmount *lower_mnt; + struct dentry *workdir; + long lower_namelen; ++ /* pathnames of lower and upper dirs, for show_options */ ++ struct ovl_config config; + }; + + struct ovl_dir_cache; +@@ -384,6 +393,9 @@ static void ovl_put_super(struct super_block *sb) + mntput(ufs->upper_mnt); + mntput(ufs->lower_mnt); + ++ kfree(ufs->config.lowerdir); ++ kfree(ufs->config.upperdir); ++ kfree(ufs->config.workdir); + kfree(ufs); + } + +@@ -413,15 +425,27 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) + return err; + } + ++/** ++ * ovl_show_options ++ * ++ * Prints the mount options for a given superblock. ++ * Returns zero; does not fail. ++ */ ++static int ovl_show_options(struct seq_file *m, struct dentry *dentry) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct ovl_fs *ufs = sb->s_fs_info; ++ ++ seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); ++ seq_printf(m, ",upperdir=%s", ufs->config.upperdir); ++ seq_printf(m, ",workdir=%s", ufs->config.workdir); ++ return 0; ++} ++ + static const struct super_operations ovl_super_operations = { + .put_super = ovl_put_super, + .statfs = ovl_statfs, +-}; +- +-struct ovl_config { +- char *lowerdir; +- char *upperdir; +- char *workdir; ++ .show_options = ovl_show_options, + }; + + enum { +@@ -442,10 +466,6 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) + { + char *p; + +- config->upperdir = NULL; +- config->lowerdir = NULL; +- config->workdir = NULL; +- + while ((p = strsep(&opt, ",")) != NULL) { + int token; + substring_t args[MAX_OPT_ARGS]; +@@ -586,39 +606,40 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + struct dentry *root_dentry; + struct ovl_entry *oe; + struct ovl_fs *ufs; +- struct ovl_config config; + struct kstatfs statfs; + int err; + +- err = ovl_parse_opt((char *) data, &config); +- if (err) ++ err = -ENOMEM; ++ ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); ++ if (!ufs) + goto out; + ++ err = ovl_parse_opt((char *) data, &ufs->config); ++ if (err) ++ goto out_free_config; ++ + /* FIXME: workdir is not needed for a R/O mount */ + err = -EINVAL; +- if (!config.upperdir || !config.lowerdir || !config.workdir) { ++ if (!ufs->config.upperdir || !ufs->config.lowerdir || ++ !ufs->config.workdir) { + pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); + goto out_free_config; + } + + err = -ENOMEM; +- ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL); +- if (!ufs) +- goto out_free_config; +- + oe = ovl_alloc_entry(); + if (oe == NULL) +- goto out_free_ufs; ++ goto out_free_config; + +- err = ovl_mount_dir(config.upperdir, &upperpath); ++ err = ovl_mount_dir(ufs->config.upperdir, &upperpath); + if (err) + goto out_free_oe; + +- err = ovl_mount_dir(config.lowerdir, &lowerpath); ++ err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); + if (err) + goto out_put_upperpath; + +- err = ovl_mount_dir(config.workdir, &workpath); ++ err = ovl_mount_dir(ufs->config.workdir, &workpath); + if (err) + goto out_put_lowerpath; + +@@ -674,7 +695,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + err = PTR_ERR(ufs->workdir); + if (IS_ERR(ufs->workdir)) { + pr_err("overlayfs: failed to create directory %s/%s\n", +- config.workdir, OVL_WORKDIR_NAME); ++ ufs->config.workdir, OVL_WORKDIR_NAME); + goto out_put_lower_mnt; + } + +@@ -729,12 +750,11 @@ out_put_upperpath: + path_put(&upperpath); + out_free_oe: + kfree(oe); +-out_free_ufs: +- kfree(ufs); + out_free_config: +- kfree(config.lowerdir); +- kfree(config.upperdir); +- kfree(config.workdir); ++ kfree(ufs->config.lowerdir); ++ kfree(ufs->config.upperdir); ++ kfree(ufs->config.workdir); ++ kfree(ufs); + out: + return err; + } +-- +2.7.4 + + +From 8f963c31b87794a5c16a6ea155f8001cdd1d93c7 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 24 Oct 2014 00:14:39 +0200 +Subject: [PATCH 23/73] fs: limit filesystem stacking depth + +Add a simple read-only counter to super_block that indicates how deep this +is in the stack of filesystems. Previously ecryptfs was the only stackable +filesystem and it explicitly disallowed multiple layers of itself. + +Overlayfs, however, can be stacked recursively and also may be stacked +on top of ecryptfs or vice versa. + +To limit the kernel stack usage we must limit the depth of the +filesystem stack. Initially the limit is set to 2. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 69c433ed2ecd2d3264efd7afec4439524b319121) +Signed-off-by: Alex Shi +--- + fs/ecryptfs/main.c | 7 +++++++ + fs/overlayfs/super.c | 9 +++++++++ + include/linux/fs.h | 11 +++++++++++ + 3 files changed, 27 insertions(+) + +diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c +index 34eb843..d9eb84b 100644 +--- a/fs/ecryptfs/main.c ++++ b/fs/ecryptfs/main.c +@@ -576,6 +576,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags + s->s_maxbytes = path.dentry->d_sb->s_maxbytes; + s->s_blocksize = path.dentry->d_sb->s_blocksize; + s->s_magic = ECRYPTFS_SUPER_MAGIC; ++ s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; ++ ++ rc = -EINVAL; ++ if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { ++ pr_err("eCryptfs: maximum fs stacking depth exceeded\n"); ++ goto out_free; ++ } + + inode = ecryptfs_get_inode(path.dentry->d_inode, s); + rc = PTR_ERR(inode); +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 7dcc24e..08b704c 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -677,6 +677,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + } + ufs->lower_namelen = statfs.f_namelen; + ++ sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, ++ lowerpath.mnt->mnt_sb->s_stack_depth) + 1; ++ ++ err = -EINVAL; ++ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { ++ pr_err("overlayfs: maximum fs stacking depth exceeded\n"); ++ goto out_put_workpath; ++ } ++ + ufs->upper_mnt = clone_private_mount(&upperpath); + err = PTR_ERR(ufs->upper_mnt); + if (IS_ERR(ufs->upper_mnt)) { +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 18492c8..429af8d 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -256,6 +256,12 @@ struct iattr { + */ + #include + ++/* ++ * Maximum number of layers of fs stack. Needs to be limited to ++ * prevent kernel stack overflow ++ */ ++#define FILESYSTEM_MAX_STACK_DEPTH 2 ++ + /** + * enum positive_aop_returns - aop return codes with specific semantics + * +@@ -1336,6 +1342,11 @@ struct super_block { + struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; + struct list_lru s_inode_lru ____cacheline_aligned_in_smp; + struct rcu_head rcu; ++ ++ /* ++ * Indicates how deep in a filesystem stack this SB is ++ */ ++ int s_stack_depth; + }; + + extern struct timespec current_fs_time(struct super_block *sb); +-- +2.7.4 + + +From a5613f10c5230eb50a863caf596774c50ec8f3e9 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Thu, 23 Oct 2014 22:56:05 -0400 +Subject: [PATCH 24/73] overlayfs: don't hold ->i_mutex over opening the real + directory + +just use it to serialize the assignment + +Signed-off-by: Al Viro +(cherry picked from commit 3d268c9b136f51385f9d041f3f2424501b257388) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index c6787f8..b7d9fb0 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -458,20 +458,27 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, + if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { + struct inode *inode = file_inode(file); + +- mutex_lock(&inode->i_mutex); + realfile = od->upperfile; + if (!realfile) { + struct path upperpath; + + ovl_path_upper(dentry, &upperpath); + realfile = ovl_path_open(&upperpath, O_RDONLY); +- if (IS_ERR(realfile)) { +- mutex_unlock(&inode->i_mutex); +- return PTR_ERR(realfile); ++ mutex_lock(&inode->i_mutex); ++ if (!od->upperfile) { ++ if (IS_ERR(realfile)) { ++ mutex_unlock(&inode->i_mutex); ++ return PTR_ERR(realfile); ++ } ++ od->upperfile = realfile; ++ } else { ++ /* somebody has beaten us to it */ ++ if (!IS_ERR(realfile)) ++ fput(realfile); ++ realfile = od->upperfile; + } +- od->upperfile = realfile; ++ mutex_unlock(&inode->i_mutex); + } +- mutex_unlock(&inode->i_mutex); + } + + return vfs_fsync_range(realfile, start, end, datasync); +-- +2.7.4 + + +From c0913e8f810834e7f4645050bfcb5e1c9f06fd53 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Thu, 23 Oct 2014 22:58:56 -0400 +Subject: [PATCH 25/73] overlayfs: make ovl_cache_entry->name an array instead + of pointer + +Signed-off-by: Al Viro +(cherry picked from commit 68bf8611076a8e4bee8bc8d03ff28bd1e9a9c631) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index b7d9fb0..9c9872b 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -18,13 +18,13 @@ + #include "overlayfs.h" + + struct ovl_cache_entry { +- const char *name; + unsigned int len; + unsigned int type; + u64 ino; + bool is_whiteout; + struct list_head l_node; + struct rb_node node; ++ char name[]; + }; + + struct ovl_dir_cache { +@@ -82,13 +82,12 @@ static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, + u64 ino, unsigned int d_type) + { + struct ovl_cache_entry *p; ++ size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); + +- p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL); ++ p = kmalloc(size, GFP_KERNEL); + if (p) { +- char *name_copy = (char *) (p + 1); +- memcpy(name_copy, name, len); +- name_copy[len] = '\0'; +- p->name = name_copy; ++ memcpy(p->name, name, len); ++ p->name[len] = '\0'; + p->len = len; + p->type = d_type; + p->ino = ino; +-- +2.7.4 + + +From 90887f5c1bb98d0d51933e4d2238b5cc888b56cf Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Thu, 23 Oct 2014 23:00:53 -0400 +Subject: [PATCH 26/73] overlayfs: embed root into overlay_readdir_data + +no sense having it a pointer - all instances have it pointing to +local variable in the same stack frame + +Signed-off-by: Al Viro +(cherry picked from commit 49be4fb9cc3431fc4ebc71c764db848483b2a16c) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 9c9872b..a9ee2c1 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -36,7 +36,7 @@ struct ovl_dir_cache { + struct ovl_readdir_data { + struct dir_context ctx; + bool is_merge; +- struct rb_root *root; ++ struct rb_root root; + struct list_head *list; + struct list_head *middle; + int count; +@@ -101,7 +101,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, + const char *name, int len, u64 ino, + unsigned int d_type) + { +- struct rb_node **newp = &rdd->root->rb_node; ++ struct rb_node **newp = &rdd->root.rb_node; + struct rb_node *parent = NULL; + struct ovl_cache_entry *p; + +@@ -126,7 +126,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, + + list_add_tail(&p->l_node, rdd->list); + rb_link_node(&p->node, parent, newp); +- rb_insert_color(&p->node, rdd->root); ++ rb_insert_color(&p->node, &rdd->root); + + return 0; + } +@@ -137,7 +137,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, + { + struct ovl_cache_entry *p; + +- p = ovl_cache_entry_find(rdd->root, name, namelen); ++ p = ovl_cache_entry_find(&rdd->root, name, namelen); + if (p) { + list_move_tail(&p->l_node, rdd->middle); + } else { +@@ -277,12 +277,11 @@ static inline int ovl_dir_read_merged(struct path *upperpath, + struct list_head *list) + { + int err; +- struct rb_root root = RB_ROOT; + struct list_head middle; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .list = list, +- .root = &root, ++ .root = RB_ROOT, + .is_merge = false, + }; + +-- +2.7.4 + + +From 258e15586ba135f8534ab416c647d2f195e12df1 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Thu, 23 Oct 2014 23:03:03 -0400 +Subject: [PATCH 27/73] overlayfs: embed middle into overlay_readdir_data + +same story... + +Signed-off-by: Al Viro +(cherry picked from commit db6ec212b53abc29a5bb6ac8c810010fc28d5191) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index a9ee2c1..910553f 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -38,7 +38,7 @@ struct ovl_readdir_data { + bool is_merge; + struct rb_root root; + struct list_head *list; +- struct list_head *middle; ++ struct list_head middle; + int count; + int err; + }; +@@ -139,13 +139,13 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, + + p = ovl_cache_entry_find(&rdd->root, name, namelen); + if (p) { +- list_move_tail(&p->l_node, rdd->middle); ++ list_move_tail(&p->l_node, &rdd->middle); + } else { + p = ovl_cache_entry_new(name, namelen, ino, d_type); + if (p == NULL) + rdd->err = -ENOMEM; + else +- list_add_tail(&p->l_node, rdd->middle); ++ list_add_tail(&p->l_node, &rdd->middle); + } + + return rdd->err; +@@ -277,7 +277,6 @@ static inline int ovl_dir_read_merged(struct path *upperpath, + struct list_head *list) + { + int err; +- struct list_head middle; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .list = list, +@@ -301,11 +300,10 @@ static inline int ovl_dir_read_merged(struct path *upperpath, + * Insert lowerpath entries before upperpath ones, this allows + * offsets to be reasonably constant + */ +- list_add(&middle, rdd.list); +- rdd.middle = &middle; ++ list_add(&rdd.middle, rdd.list); + rdd.is_merge = true; + err = ovl_dir_read(lowerpath, &rdd); +- list_del(&middle); ++ list_del(&rdd.middle); + } + out: + return err; +-- +2.7.4 + + +From 313adf61d46e2fc1092a03540f33b1f1345b8c44 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Tue, 28 Oct 2014 18:27:28 -0400 +Subject: [PATCH 28/73] overlayfs: barriers for opening upper-layer directory + +make sure that + a) all stores done by opening struct file don't leak past storing +the reference in od->upperfile + b) the lockless side has read dependency barrier + +Signed-off-by: Al Viro +(cherry picked from commit d45f00ae43e63eff1b3d79df20610ae1ef645ebd) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 910553f..8c8ce9d 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -454,12 +454,13 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, + if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { + struct inode *inode = file_inode(file); + +- realfile = od->upperfile; ++ realfile =lockless_dereference(od->upperfile); + if (!realfile) { + struct path upperpath; + + ovl_path_upper(dentry, &upperpath); + realfile = ovl_path_open(&upperpath, O_RDONLY); ++ smp_mb__before_spinlock(); + mutex_lock(&inode->i_mutex); + if (!od->upperfile) { + if (IS_ERR(realfile)) { +-- +2.7.4 + + +From 2b3b22f12809d5406745005ea0c20c4945dfe789 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Mon, 27 Oct 2014 13:48:48 +0100 +Subject: [PATCH 29/73] ovl: fix check for cursor + +ovl_cache_entry.name is now an array not a pointer, so it makes no sense +test for it being NULL. + +Detected by coverity. + +From: Miklos Szeredi +Fixes: 68bf8611076a ("overlayfs: make ovl_cache_entry->name an array instead of ++pointer") +Signed-off-by: Miklos Szeredi +Signed-off-by: Al Viro + +(cherry picked from commit c2096537d40f026672c4c6adfcd7247ce5799604) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 8c8ce9d..3fbf0d3 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -21,9 +21,10 @@ struct ovl_cache_entry { + unsigned int len; + unsigned int type; + u64 ino; +- bool is_whiteout; + struct list_head l_node; + struct rb_node node; ++ bool is_whiteout; ++ bool is_cursor; + char name[]; + }; + +@@ -251,7 +252,7 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, + + mutex_lock(&dir->d_inode->i_mutex); + list_for_each_entry(p, rdd->list, l_node) { +- if (!p->name) ++ if (p->is_cursor) + continue; + + if (p->type != DT_CHR) +@@ -307,7 +308,6 @@ static inline int ovl_dir_read_merged(struct path *upperpath, + } + out: + return err; +- + } + + static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) +@@ -316,7 +316,7 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) + loff_t off = 0; + + list_for_each_entry(p, &od->cache->entries, l_node) { +- if (!p->name) ++ if (p->is_cursor) + continue; + if (off >= pos) + break; +@@ -389,7 +389,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) + + p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); + /* Skip cursors */ +- if (p->name) { ++ if (!p->is_cursor) { + if (!p->is_whiteout) { + if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) + break; +@@ -519,6 +519,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file) + od->realfile = realfile; + od->is_real = (type != OVL_PATH_MERGE); + od->is_upper = (type != OVL_PATH_LOWER); ++ od->cursor.is_cursor = true; + file->private_data = od; + + return 0; +-- +2.7.4 + + +From 9ba0656673b59bcae46ce2a7849005a07d494fac Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Mon, 27 Oct 2014 15:42:01 +0100 +Subject: [PATCH 30/73] overlayfs: fix lockdep misannotation + +In an overlay directory that shadows an empty lower directory, say +/mnt/a/empty102, do: + + touch /mnt/a/empty102/x + unlink /mnt/a/empty102/x + rmdir /mnt/a/empty102 + +It's actually harmless, but needs another level of nesting between +I_MUTEX_CHILD and I_MUTEX_NORMAL. + +Signed-off-by: Miklos Szeredi +Tested-by: David Howells +Signed-off-by: Al Viro +(cherry picked from commit d1b72cc6d8cb766c802fdc70a5edc2f0ba8a2b57) +Signed-off-by: Alex Shi +--- + fs/namei.c | 2 +- + fs/overlayfs/readdir.c | 2 +- + include/linux/fs.h | 9 ++++++--- + 3 files changed, 8 insertions(+), 5 deletions(-) + +diff --git a/fs/namei.c b/fs/namei.c +index a047265e..0fbf150 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2473,7 +2473,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) + } + + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); +- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); ++ mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2); + return NULL; + } + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 3fbf0d3..401f084 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -571,7 +571,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) + { + struct ovl_cache_entry *p; + +- mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT); ++ mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD); + list_for_each_entry(p, list, l_node) { + struct dentry *dentry; + +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 429af8d..1ec29cc 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -641,11 +641,13 @@ static inline int inode_unhashed(struct inode *inode) + * 2: child/target + * 3: xattr + * 4: second non-directory +- * The last is for certain operations (such as rename) which lock two ++ * 5: second parent (when locking independent directories in rename) ++ * ++ * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two + * non-directories at once. + * + * The locking order between these classes is +- * parent -> child -> normal -> xattr -> second non-directory ++ * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory + */ + enum inode_i_mutex_lock_class + { +@@ -653,7 +655,8 @@ enum inode_i_mutex_lock_class + I_MUTEX_PARENT, + I_MUTEX_CHILD, + I_MUTEX_XATTR, +- I_MUTEX_NONDIR2 ++ I_MUTEX_NONDIR2, ++ I_MUTEX_PARENT2, + }; + + void lock_two_nondirectories(struct inode *, struct inode*); +-- +2.7.4 + + +From a996f76df429a061985a5e7b1904c6574315a818 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Fri, 31 Oct 2014 20:02:42 +0100 +Subject: [PATCH 31/73] ovl: initialize ->is_cursor + +Signed-off-by: Miklos Szeredi +Signed-off-by: Al Viro +(cherry picked from commit 9f2f7d4c8dfcf4617af5de6ea381b91deac3db48) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 401f084..4e9d7c1 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -93,6 +93,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, + p->type = d_type; + p->ino = ino; + p->is_whiteout = false; ++ p->is_cursor = false; + } + + return p; +-- +2.7.4 + + +From 30d1df13eef6be298b5f783a2b36b82cf1c0f033 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 30 Oct 2014 17:37:34 +0100 +Subject: [PATCH 32/73] vfs: make first argument of dir_context.actor typed + +Signed-off-by: Miklos Szeredi +Signed-off-by: Al Viro +(cherry picked from commit ac7576f4b1da8c9c6bc1ae026c2b9e86ae617ba5) +Signed-off-by: Alex Shi +--- + arch/alpha/kernel/osf_sys.c | 7 ++++--- + arch/parisc/hpux/fs.c | 7 ++++--- + drivers/staging/lustre/lustre/llite/llite_nfs.c | 8 +++++--- + fs/afs/dir.c | 9 +++++---- + fs/compat.c | 21 +++++++++++++-------- + fs/ecryptfs/file.c | 6 +++--- + fs/exportfs/expfs.c | 5 +++-- + fs/fat/dir.c | 5 +++-- + fs/gfs2/export.c | 8 +++++--- + fs/hppfs/hppfs.c | 5 +++-- + fs/nfsd/nfs4recover.c | 5 +++-- + fs/nfsd/vfs.c | 12 +++++++----- + fs/nfsd/vfs.h | 4 ++-- + fs/ocfs2/dir.c | 8 +++++--- + fs/ocfs2/journal.c | 8 +++++--- + fs/overlayfs/readdir.c | 8 +++++--- + fs/readdir.c | 21 ++++++++++++--------- + fs/reiserfs/xattr.c | 15 +++++++++------ + include/linux/fs.h | 5 ++++- + 19 files changed, 100 insertions(+), 67 deletions(-) + +diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c +index 1402fcc..98f57d3 100644 +--- a/arch/alpha/kernel/osf_sys.c ++++ b/arch/alpha/kernel/osf_sys.c +@@ -104,11 +104,12 @@ struct osf_dirent_callback { + }; + + static int +-osf_filldir(void *__buf, const char *name, int namlen, loff_t offset, +- u64 ino, unsigned int d_type) ++osf_filldir(struct dir_context *ctx, const char *name, int namlen, ++ loff_t offset, u64 ino, unsigned int d_type) + { + struct osf_dirent __user *dirent; +- struct osf_dirent_callback *buf = (struct osf_dirent_callback *) __buf; ++ struct osf_dirent_callback *buf = ++ container_of(ctx, struct osf_dirent_callback, ctx); + unsigned int reclen = ALIGN(NAME_OFFSET + namlen + 1, sizeof(u32)); + unsigned int d_ino; + +diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c +index 2bedafe..97a7bf8 100644 +--- a/arch/parisc/hpux/fs.c ++++ b/arch/parisc/hpux/fs.c +@@ -56,11 +56,12 @@ struct getdents_callback { + + #define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) + +-static int filldir(void * __buf, const char * name, int namlen, loff_t offset, +- u64 ino, unsigned d_type) ++static int filldir(struct dir_context *ctx, const char *name, int namlen, ++ loff_t offset, u64 ino, unsigned d_type) + { + struct hpux_dirent __user * dirent; +- struct getdents_callback * buf = (struct getdents_callback *) __buf; ++ struct getdents_callback *buf = ++ container_of(ctx, struct getdents_callback, ctx); + ino_t d_ino; + int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(long)); + +diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c +index 1767c74..ed35a88 100644 +--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c ++++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c +@@ -205,13 +205,15 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen, + return LUSTRE_NFS_FID; + } + +-static int ll_nfs_get_name_filldir(void *cookie, const char *name, int namelen, +- loff_t hash, u64 ino, unsigned type) ++static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name, ++ int namelen, loff_t hash, u64 ino, ++ unsigned type) + { + /* It is hack to access lde_fid for comparison with lgd_fid. + * So the input 'name' must be part of the 'lu_dirent'. */ + struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name); +- struct ll_getname_data *lgd = cookie; ++ struct ll_getname_data *lgd = ++ container_of(ctx, struct ll_getname_data, ctx); + struct lu_fid fid; + + fid_le_to_cpu(&fid, &lde->lde_fid); +diff --git a/fs/afs/dir.c b/fs/afs/dir.c +index 5293003..5479af6 100644 +--- a/fs/afs/dir.c ++++ b/fs/afs/dir.c +@@ -26,7 +26,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx); + static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); + static int afs_d_delete(const struct dentry *dentry); + static void afs_d_release(struct dentry *dentry); +-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, ++static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, + loff_t fpos, u64 ino, unsigned dtype); + static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl); +@@ -391,10 +391,11 @@ static int afs_readdir(struct file *file, struct dir_context *ctx) + * - if afs_dir_iterate_block() spots this function, it'll pass the FID + * uniquifier through dtype + */ +-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, +- loff_t fpos, u64 ino, unsigned dtype) ++static int afs_lookup_filldir(struct dir_context *ctx, const char *name, ++ int nlen, loff_t fpos, u64 ino, unsigned dtype) + { +- struct afs_lookup_cookie *cookie = _cookie; ++ struct afs_lookup_cookie *cookie = ++ container_of(ctx, struct afs_lookup_cookie, ctx); + + _enter("{%s,%u},%s,%u,,%llu,%u", + cookie->name.name, cookie->name.len, name, nlen, +diff --git a/fs/compat.c b/fs/compat.c +index 6af20de..14da9b3 100644 +--- a/fs/compat.c ++++ b/fs/compat.c +@@ -837,10 +837,12 @@ struct compat_readdir_callback { + int result; + }; + +-static int compat_fillonedir(void *__buf, const char *name, int namlen, +- loff_t offset, u64 ino, unsigned int d_type) ++static int compat_fillonedir(struct dir_context *ctx, const char *name, ++ int namlen, loff_t offset, u64 ino, ++ unsigned int d_type) + { +- struct compat_readdir_callback *buf = __buf; ++ struct compat_readdir_callback *buf = ++ container_of(ctx, struct compat_readdir_callback, ctx); + struct compat_old_linux_dirent __user *dirent; + compat_ulong_t d_ino; + +@@ -905,11 +907,12 @@ struct compat_getdents_callback { + int error; + }; + +-static int compat_filldir(void *__buf, const char *name, int namlen, ++static int compat_filldir(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) + { + struct compat_linux_dirent __user * dirent; +- struct compat_getdents_callback *buf = __buf; ++ struct compat_getdents_callback *buf = ++ container_of(ctx, struct compat_getdents_callback, ctx); + compat_ulong_t d_ino; + int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + + namlen + 2, sizeof(compat_long_t)); +@@ -991,11 +994,13 @@ struct compat_getdents_callback64 { + int error; + }; + +-static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, +- u64 ino, unsigned int d_type) ++static int compat_filldir64(struct dir_context *ctx, const char *name, ++ int namlen, loff_t offset, u64 ino, ++ unsigned int d_type) + { + struct linux_dirent64 __user *dirent; +- struct compat_getdents_callback64 *buf = __buf; ++ struct compat_getdents_callback64 *buf = ++ container_of(ctx, struct compat_getdents_callback64, ctx); + int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, + sizeof(u64)); + u64 off; +diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c +index 03df502..121a948 100644 +--- a/fs/ecryptfs/file.c ++++ b/fs/ecryptfs/file.c +@@ -76,11 +76,11 @@ struct ecryptfs_getdents_callback { + + /* Inspired by generic filldir in fs/readdir.c */ + static int +-ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, +- loff_t offset, u64 ino, unsigned int d_type) ++ecryptfs_filldir(struct dir_context *ctx, const char *lower_name, ++ int lower_namelen, loff_t offset, u64 ino, unsigned int d_type) + { + struct ecryptfs_getdents_callback *buf = +- (struct ecryptfs_getdents_callback *)dirent; ++ container_of(ctx, struct ecryptfs_getdents_callback, ctx); + size_t name_size; + char *name; + int rc; +diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c +index 48a359d..59d339c 100644 +--- a/fs/exportfs/expfs.c ++++ b/fs/exportfs/expfs.c +@@ -241,10 +241,11 @@ struct getdents_callback { + * A rather strange filldir function to capture + * the name matching the specified inode number. + */ +-static int filldir_one(void * __buf, const char * name, int len, ++static int filldir_one(struct dir_context *ctx, const char *name, int len, + loff_t pos, u64 ino, unsigned int d_type) + { +- struct getdents_callback *buf = __buf; ++ struct getdents_callback *buf = ++ container_of(ctx, struct getdents_callback, ctx); + int result = 0; + + buf->sequence++; +diff --git a/fs/fat/dir.c b/fs/fat/dir.c +index 3963ede..c5d6bb9 100644 +--- a/fs/fat/dir.c ++++ b/fs/fat/dir.c +@@ -702,10 +702,11 @@ static int fat_readdir(struct file *file, struct dir_context *ctx) + } + + #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ +-static int func(void *__buf, const char *name, int name_len, \ ++static int func(struct dir_context *ctx, const char *name, int name_len, \ + loff_t offset, u64 ino, unsigned int d_type) \ + { \ +- struct fat_ioctl_filldir_callback *buf = __buf; \ ++ struct fat_ioctl_filldir_callback *buf = \ ++ container_of(ctx, struct fat_ioctl_filldir_callback, ctx); \ + struct dirent_type __user *d1 = buf->dirent; \ + struct dirent_type __user *d2 = d1 + 1; \ + \ +diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c +index 8b9b377..c41d255 100644 +--- a/fs/gfs2/export.c ++++ b/fs/gfs2/export.c +@@ -69,10 +69,12 @@ struct get_name_filldir { + char *name; + }; + +-static int get_name_filldir(void *opaque, const char *name, int length, +- loff_t offset, u64 inum, unsigned int type) ++static int get_name_filldir(struct dir_context *ctx, const char *name, ++ int length, loff_t offset, u64 inum, ++ unsigned int type) + { +- struct get_name_filldir *gnfd = opaque; ++ struct get_name_filldir *gnfd = ++ container_of(ctx, struct get_name_filldir, ctx); + + if (inum != gnfd->inum.no_addr) + return 0; +diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c +index 4338ff3..5f27551 100644 +--- a/fs/hppfs/hppfs.c ++++ b/fs/hppfs/hppfs.c +@@ -548,10 +548,11 @@ struct hppfs_dirent { + struct dentry *dentry; + }; + +-static int hppfs_filldir(void *d, const char *name, int size, ++static int hppfs_filldir(struct dir_context *ctx, const char *name, int size, + loff_t offset, u64 inode, unsigned int type) + { +- struct hppfs_dirent *dirent = d; ++ struct hppfs_dirent *dirent = ++ container_of(ctx, struct hppfs_dirent, ctx); + + if (file_removed(dirent->dentry, name)) + return 0; +diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c +index 9c271f4..674a5d5 100644 +--- a/fs/nfsd/nfs4recover.c ++++ b/fs/nfsd/nfs4recover.c +@@ -244,10 +244,11 @@ struct nfs4_dir_ctx { + }; + + static int +-nfsd4_build_namelist(void *arg, const char *name, int namlen, ++nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) + { +- struct nfs4_dir_ctx *ctx = arg; ++ struct nfs4_dir_ctx *ctx = ++ container_of(__ctx, struct nfs4_dir_ctx, ctx); + struct name_list *entry; + + if (namlen != HEXDIR_LEN - 1) +diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c +index 464f813..3953a20 100644 +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -1808,10 +1808,12 @@ struct readdir_data { + int full; + }; + +-static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, +- loff_t offset, u64 ino, unsigned int d_type) ++static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, ++ int namlen, loff_t offset, u64 ino, ++ unsigned int d_type) + { +- struct readdir_data *buf = __buf; ++ struct readdir_data *buf = ++ container_of(ctx, struct readdir_data, ctx); + struct buffered_dirent *de = (void *)(buf->dirent + buf->used); + unsigned int reclen; + +@@ -1831,7 +1833,7 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, + return 0; + } + +-static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, ++static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, + struct readdir_cd *cdp, loff_t *offsetp) + { + struct buffered_dirent *de; +@@ -1915,7 +1917,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, + */ + __be32 + nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, +- struct readdir_cd *cdp, filldir_t func) ++ struct readdir_cd *cdp, nfsd_filldir_t func) + { + __be32 err; + struct file *file; +diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h +index fbe90bd..ea760b1 100644 +--- a/fs/nfsd/vfs.h ++++ b/fs/nfsd/vfs.h +@@ -36,7 +36,7 @@ + /* + * Callback function for readdir + */ +-typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); ++typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); + + /* nfsd/vfs.c */ + int nfsd_racache_init(int); +@@ -89,7 +89,7 @@ __be32 nfsd_rename(struct svc_rqst *, + __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, + char *name, int len); + __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, +- loff_t *, struct readdir_cd *, filldir_t); ++ loff_t *, struct readdir_cd *, nfsd_filldir_t); + __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, + struct kstatfs *, int access); + +diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c +index 91a7e85..478e14d 100644 +--- a/fs/ocfs2/dir.c ++++ b/fs/ocfs2/dir.c +@@ -2073,10 +2073,12 @@ struct ocfs2_empty_dir_priv { + unsigned seen_other; + unsigned dx_dir; + }; +-static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, +- loff_t pos, u64 ino, unsigned type) ++static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name, ++ int name_len, loff_t pos, u64 ino, ++ unsigned type) + { +- struct ocfs2_empty_dir_priv *p = priv; ++ struct ocfs2_empty_dir_priv *p = ++ container_of(ctx, struct ocfs2_empty_dir_priv, ctx); + + /* + * Check the positions of "." and ".." records to be sure +diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c +index 44fc3e5..a1b7dca 100644 +--- a/fs/ocfs2/journal.c ++++ b/fs/ocfs2/journal.c +@@ -1981,10 +1981,12 @@ struct ocfs2_orphan_filldir_priv { + struct ocfs2_super *osb; + }; + +-static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, +- loff_t pos, u64 ino, unsigned type) ++static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name, ++ int name_len, loff_t pos, u64 ino, ++ unsigned type) + { +- struct ocfs2_orphan_filldir_priv *p = priv; ++ struct ocfs2_orphan_filldir_priv *p = ++ container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx); + struct inode *iter; + + if (name_len == 1 && !strncmp(".", name, 1)) +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 4e9d7c1..301f64a 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -180,10 +180,12 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) + } + } + +-static int ovl_fill_merge(void *buf, const char *name, int namelen, +- loff_t offset, u64 ino, unsigned int d_type) ++static int ovl_fill_merge(struct dir_context *ctx, const char *name, ++ int namelen, loff_t offset, u64 ino, ++ unsigned int d_type) + { +- struct ovl_readdir_data *rdd = buf; ++ struct ovl_readdir_data *rdd = ++ container_of(ctx, struct ovl_readdir_data, ctx); + + rdd->count++; + if (!rdd->is_merge) +diff --git a/fs/readdir.c b/fs/readdir.c +index 5b53d99..e21af53 100644 +--- a/fs/readdir.c ++++ b/fs/readdir.c +@@ -72,10 +72,11 @@ struct readdir_callback { + int result; + }; + +-static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, +- u64 ino, unsigned int d_type) ++static int fillonedir(struct dir_context *ctx, const char *name, int namlen, ++ loff_t offset, u64 ino, unsigned int d_type) + { +- struct readdir_callback *buf = (struct readdir_callback *) __buf; ++ struct readdir_callback *buf = ++ container_of(ctx, struct readdir_callback, ctx); + struct old_linux_dirent __user * dirent; + unsigned long d_ino; + +@@ -146,11 +147,12 @@ struct getdents_callback { + int error; + }; + +-static int filldir(void * __buf, const char * name, int namlen, loff_t offset, +- u64 ino, unsigned int d_type) ++static int filldir(struct dir_context *ctx, const char *name, int namlen, ++ loff_t offset, u64 ino, unsigned int d_type) + { + struct linux_dirent __user * dirent; +- struct getdents_callback * buf = (struct getdents_callback *) __buf; ++ struct getdents_callback *buf = ++ container_of(ctx, struct getdents_callback, ctx); + unsigned long d_ino; + int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, + sizeof(long)); +@@ -230,11 +232,12 @@ struct getdents_callback64 { + int error; + }; + +-static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, +- u64 ino, unsigned int d_type) ++static int filldir64(struct dir_context *ctx, const char *name, int namlen, ++ loff_t offset, u64 ino, unsigned int d_type) + { + struct linux_dirent64 __user *dirent; +- struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; ++ struct getdents_callback64 *buf = ++ container_of(ctx, struct getdents_callback64, ctx); + int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, + sizeof(u64)); + +diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c +index 5cdfbd6..d8b7acf 100644 +--- a/fs/reiserfs/xattr.c ++++ b/fs/reiserfs/xattr.c +@@ -177,10 +177,11 @@ struct reiserfs_dentry_buf { + }; + + static int +-fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, +- u64 ino, unsigned int d_type) ++fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) + { +- struct reiserfs_dentry_buf *dbuf = buf; ++ struct reiserfs_dentry_buf *dbuf = ++ container_of(ctx, struct reiserfs_dentry_buf, ctx); + struct dentry *dentry; + WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); + +@@ -794,10 +795,12 @@ struct listxattr_buf { + struct dentry *dentry; + }; + +-static int listxattr_filler(void *buf, const char *name, int namelen, +- loff_t offset, u64 ino, unsigned int d_type) ++static int listxattr_filler(struct dir_context *ctx, const char *name, ++ int namelen, loff_t offset, u64 ino, ++ unsigned int d_type) + { +- struct listxattr_buf *b = (struct listxattr_buf *)buf; ++ struct listxattr_buf *b = ++ container_of(ctx, struct listxattr_buf, ctx); + size_t size; + if (name[0] != '.' || + (namelen != 1 && (name[1] != '.' || namelen != 2))) { +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 1ec29cc..e1cee8b 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1530,7 +1530,10 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); + * This allows the kernel to read directories into kernel space or + * to have different dirent layouts depending on the binary type. + */ +-typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); ++struct dir_context; ++typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, ++ unsigned); ++ + struct dir_context { + const filldir_t actor; + loff_t pos; +-- +2.7.4 + + +From 68a4dee66f6231bb2a5058c58e9e9ebea8149b5c Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Tue, 4 Nov 2014 16:11:03 +0100 +Subject: [PATCH 33/73] ovl: don't poison cursor + +ovl_cache_put() can be called from ovl_dir_reset() if the cache needs to be +rebuilt. We did list_del() on the cursor, which results in an Oops on the +poisoned pointer in ovl_seek_cursor(). + +Reported-by: Jordi Pujol Palomer +Signed-off-by: Miklos Szeredi +Tested-by: Jordi Pujol Palomer +Signed-off-by: Al Viro +(cherry picked from commit 3f822c6264954660babce757fb45792fd3af273e) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 301f64a..fdb63de 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -168,7 +168,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) + { + struct ovl_dir_cache *cache = od->cache; + +- list_del(&od->cursor.l_node); ++ list_del_init(&od->cursor.l_node); + WARN_ON(cache->refcount <= 0); + cache->refcount--; + if (!cache->refcount) { +-- +2.7.4 + + +From 5c5ce110a498fc025c7ca85165ba2f5fa1ddef43 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 20 Nov 2014 16:39:59 +0100 +Subject: [PATCH 34/73] ovl: rename filesystem type to "overlay" + +Some distributions carry an "old" format of overlayfs while mainline has a +"new" format. + +The distros will possibly want to keep the old overlayfs alongside the new +for compatibility reasons. + +To make it possible to differentiate the two versions change the name of +the new one from "overlayfs" to "overlay". + +Signed-off-by: Miklos Szeredi +Reported-by: Serge Hallyn +Cc: Andy Whitcroft +(cherry picked from commit ef94b1864d1ed5be54376404bb23d22ed0481feb) +Signed-off-by: Alex Shi +--- + Documentation/filesystems/overlayfs.txt | 198 ++++++++++++++++++++++++++++++++ + MAINTAINERS | 7 ++ + fs/Makefile | 2 +- + fs/overlayfs/Kconfig | 2 +- + fs/overlayfs/Makefile | 4 +- + fs/overlayfs/super.c | 6 +- + 6 files changed, 212 insertions(+), 7 deletions(-) + create mode 100644 Documentation/filesystems/overlayfs.txt + +diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt +new file mode 100644 +index 0000000..a27c950 +--- /dev/null ++++ b/Documentation/filesystems/overlayfs.txt +@@ -0,0 +1,198 @@ ++Written by: Neil Brown ++ ++Overlay Filesystem ++================== ++ ++This document describes a prototype for a new approach to providing ++overlay-filesystem functionality in Linux (sometimes referred to as ++union-filesystems). An overlay-filesystem tries to present a ++filesystem which is the result over overlaying one filesystem on top ++of the other. ++ ++The result will inevitably fail to look exactly like a normal ++filesystem for various technical reasons. The expectation is that ++many use cases will be able to ignore these differences. ++ ++This approach is 'hybrid' because the objects that appear in the ++filesystem do not all appear to belong to that filesystem. In many ++cases an object accessed in the union will be indistinguishable ++from accessing the corresponding object from the original filesystem. ++This is most obvious from the 'st_dev' field returned by stat(2). ++ ++While directories will report an st_dev from the overlay-filesystem, ++all non-directory objects will report an st_dev from the lower or ++upper filesystem that is providing the object. Similarly st_ino will ++only be unique when combined with st_dev, and both of these can change ++over the lifetime of a non-directory object. Many applications and ++tools ignore these values and will not be affected. ++ ++Upper and Lower ++--------------- ++ ++An overlay filesystem combines two filesystems - an 'upper' filesystem ++and a 'lower' filesystem. When a name exists in both filesystems, the ++object in the 'upper' filesystem is visible while the object in the ++'lower' filesystem is either hidden or, in the case of directories, ++merged with the 'upper' object. ++ ++It would be more correct to refer to an upper and lower 'directory ++tree' rather than 'filesystem' as it is quite possible for both ++directory trees to be in the same filesystem and there is no ++requirement that the root of a filesystem be given for either upper or ++lower. ++ ++The lower filesystem can be any filesystem supported by Linux and does ++not need to be writable. The lower filesystem can even be another ++overlayfs. The upper filesystem will normally be writable and if it ++is it must support the creation of trusted.* extended attributes, and ++must provide valid d_type in readdir responses, so NFS is not suitable. ++ ++A read-only overlay of two read-only filesystems may use any ++filesystem type. ++ ++Directories ++----------- ++ ++Overlaying mainly involves directories. If a given name appears in both ++upper and lower filesystems and refers to a non-directory in either, ++then the lower object is hidden - the name refers only to the upper ++object. ++ ++Where both upper and lower objects are directories, a merged directory ++is formed. ++ ++At mount time, the two directories given as mount options "lowerdir" and ++"upperdir" are combined into a merged directory: ++ ++ mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\ ++workdir=/work /merged ++ ++The "workdir" needs to be an empty directory on the same filesystem ++as upperdir. ++ ++Then whenever a lookup is requested in such a merged directory, the ++lookup is performed in each actual directory and the combined result ++is cached in the dentry belonging to the overlay filesystem. If both ++actual lookups find directories, both are stored and a merged ++directory is created, otherwise only one is stored: the upper if it ++exists, else the lower. ++ ++Only the lists of names from directories are merged. Other content ++such as metadata and extended attributes are reported for the upper ++directory only. These attributes of the lower directory are hidden. ++ ++whiteouts and opaque directories ++-------------------------------- ++ ++In order to support rm and rmdir without changing the lower ++filesystem, an overlay filesystem needs to record in the upper filesystem ++that files have been removed. This is done using whiteouts and opaque ++directories (non-directories are always opaque). ++ ++A whiteout is created as a character device with 0/0 device number. ++When a whiteout is found in the upper level of a merged directory, any ++matching name in the lower level is ignored, and the whiteout itself ++is also hidden. ++ ++A directory is made opaque by setting the xattr "trusted.overlay.opaque" ++to "y". Where the upper filesystem contains an opaque directory, any ++directory in the lower filesystem with the same name is ignored. ++ ++readdir ++------- ++ ++When a 'readdir' request is made on a merged directory, the upper and ++lower directories are each read and the name lists merged in the ++obvious way (upper is read first, then lower - entries that already ++exist are not re-added). This merged name list is cached in the ++'struct file' and so remains as long as the file is kept open. If the ++directory is opened and read by two processes at the same time, they ++will each have separate caches. A seekdir to the start of the ++directory (offset 0) followed by a readdir will cause the cache to be ++discarded and rebuilt. ++ ++This means that changes to the merged directory do not appear while a ++directory is being read. This is unlikely to be noticed by many ++programs. ++ ++seek offsets are assigned sequentially when the directories are read. ++Thus if ++ - read part of a directory ++ - remember an offset, and close the directory ++ - re-open the directory some time later ++ - seek to the remembered offset ++ ++there may be little correlation between the old and new locations in ++the list of filenames, particularly if anything has changed in the ++directory. ++ ++Readdir on directories that are not merged is simply handled by the ++underlying directory (upper or lower). ++ ++ ++Non-directories ++--------------- ++ ++Objects that are not directories (files, symlinks, device-special ++files etc.) are presented either from the upper or lower filesystem as ++appropriate. When a file in the lower filesystem is accessed in a way ++the requires write-access, such as opening for write access, changing ++some metadata etc., the file is first copied from the lower filesystem ++to the upper filesystem (copy_up). Note that creating a hard-link ++also requires copy_up, though of course creation of a symlink does ++not. ++ ++The copy_up may turn out to be unnecessary, for example if the file is ++opened for read-write but the data is not modified. ++ ++The copy_up process first makes sure that the containing directory ++exists in the upper filesystem - creating it and any parents as ++necessary. It then creates the object with the same metadata (owner, ++mode, mtime, symlink-target etc.) and then if the object is a file, the ++data is copied from the lower to the upper filesystem. Finally any ++extended attributes are copied up. ++ ++Once the copy_up is complete, the overlay filesystem simply ++provides direct access to the newly created file in the upper ++filesystem - future operations on the file are barely noticed by the ++overlay filesystem (though an operation on the name of the file such as ++rename or unlink will of course be noticed and handled). ++ ++ ++Non-standard behavior ++--------------------- ++ ++The copy_up operation essentially creates a new, identical file and ++moves it over to the old name. The new file may be on a different ++filesystem, so both st_dev and st_ino of the file may change. ++ ++Any open files referring to this inode will access the old data and ++metadata. Similarly any file locks obtained before copy_up will not ++apply to the copied up file. ++ ++On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and ++fsetxattr(2) will fail with EROFS. ++ ++If a file with multiple hard links is copied up, then this will ++"break" the link. Changes will not be propagated to other names ++referring to the same inode. ++ ++Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory ++object in overlayfs will not contain valid absolute paths, only ++relative paths leading up to the filesystem's root. This will be ++fixed in the future. ++ ++Some operations are not atomic, for example a crash during copy_up or ++rename will leave the filesystem in an inconsistent state. This will ++be addressed in the future. ++ ++Changes to underlying filesystems ++--------------------------------- ++ ++Offline changes, when the overlay is not mounted, are allowed to either ++the upper or the lower trees. ++ ++Changes to the underlying filesystems while part of a mounted overlay ++filesystem are not allowed. If the underlying filesystem is changed, ++the behavior of the overlay is undefined, though it will not result in ++a crash or deadlock. +diff --git a/MAINTAINERS b/MAINTAINERS +index 900d98e..976dae0 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -6452,6 +6452,13 @@ F: drivers/scsi/osd/ + F: include/scsi/osd_* + F: fs/exofs/ + ++OVERLAY FILESYSTEM ++M: Miklos Szeredi ++L: linux-fsdevel@vger.kernel.org ++S: Supported ++F: fs/overlayfs/* ++F: Documentation/filesystems/overlayfs.txt ++ + P54 WIRELESS DRIVER + M: Christian Lamparter + L: linux-wireless@vger.kernel.org +diff --git a/fs/Makefile b/fs/Makefile +index 5eb9caf..b595440 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ + obj-$(CONFIG_AUTOFS4_FS) += autofs4/ + obj-$(CONFIG_ADFS_FS) += adfs/ + obj-$(CONFIG_FUSE_FS) += fuse/ +-obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ ++obj-$(CONFIG_OVERLAY_FS) += overlayfs/ + obj-$(CONFIG_UDF_FS) += udf/ + obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ + obj-$(CONFIG_OMFS_FS) += omfs/ +diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig +index e601259..3435581 100644 +--- a/fs/overlayfs/Kconfig ++++ b/fs/overlayfs/Kconfig +@@ -1,4 +1,4 @@ +-config OVERLAYFS_FS ++config OVERLAY_FS + tristate "Overlay filesystem support" + help + An overlay filesystem combines two filesystems - an 'upper' filesystem +diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile +index 8f91889..900daed 100644 +--- a/fs/overlayfs/Makefile ++++ b/fs/overlayfs/Makefile +@@ -2,6 +2,6 @@ + # Makefile for the overlay filesystem. + # + +-obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o ++obj-$(CONFIG_OVERLAY_FS) += overlay.o + +-overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o ++overlay-objs := super.o inode.o dir.o readdir.o copy_up.o +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 08b704c..b92bd18 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi "); + MODULE_DESCRIPTION("Overlay filesystem"); + MODULE_LICENSE("GPL"); + +-#define OVERLAYFS_SUPER_MAGIC 0x794c764f ++#define OVERLAYFS_SUPER_MAGIC 0x794c7630 + + struct ovl_config { + char *lowerdir; +@@ -776,11 +776,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, + + static struct file_system_type ovl_fs_type = { + .owner = THIS_MODULE, +- .name = "overlayfs", ++ .name = "overlay", + .mount = ovl_mount, + .kill_sb = kill_anon_super, + }; +-MODULE_ALIAS_FS("overlayfs"); ++MODULE_ALIAS_FS("overlay"); + + static int __init ovl_init(void) + { +-- +2.7.4 + + +From 67b24788bb53d0b36bad84205ab940718003bffc Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 20 Nov 2014 16:39:59 +0100 +Subject: [PATCH 35/73] ovl: fix remove/copy-up race + +ovl_remove_and_whiteout() needs to check if upper dentry exists or not +after having locked upper parent directory. + +Previously we used a "type" value computed before locking the upper parent +directory, which is susceptible to racing with copy-up. + +There's a similar check in ovl_check_empty_and_clear(). This one is not +actually racy, since copy-up doesn't change the "emptyness" property of a +directory. Add a comment to this effect, and check the existence of upper +dentry locally to make the code cleaner. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit a105d685a8483985a01776411de191a726b48132) +Signed-off-by: Alex Shi +--- + fs/overlayfs/dir.c | 31 +++++++++++++++++++------------ + 1 file changed, 19 insertions(+), 12 deletions(-) + +diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c +index 15cd91a..8ffc4b9 100644 +--- a/fs/overlayfs/dir.c ++++ b/fs/overlayfs/dir.c +@@ -284,8 +284,7 @@ out: + return ERR_PTR(err); + } + +-static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, +- enum ovl_path_type type) ++static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) + { + int err; + struct dentry *ret = NULL; +@@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, + err = ovl_check_empty_dir(dentry, &list); + if (err) + ret = ERR_PTR(err); +- else if (type == OVL_PATH_MERGE) +- ret = ovl_clear_empty(dentry, &list); ++ else { ++ /* ++ * If no upperdentry then skip clearing whiteouts. ++ * ++ * Can race with copy-up, since we don't hold the upperdir ++ * mutex. Doesn't matter, since copy-up can't create a ++ * non-empty directory from an empty one. ++ */ ++ if (ovl_dentry_upper(dentry)) ++ ret = ovl_clear_empty(dentry, &list); ++ } + + ovl_cache_free(&list); + +@@ -487,8 +495,7 @@ out: + return err; + } + +-static int ovl_remove_and_whiteout(struct dentry *dentry, +- enum ovl_path_type type, bool is_dir) ++static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) + { + struct dentry *workdir = ovl_workdir(dentry); + struct inode *wdir = workdir->d_inode; +@@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, + int err; + + if (is_dir) { +- opaquedir = ovl_check_empty_and_clear(dentry, type); ++ opaquedir = ovl_check_empty_and_clear(dentry); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) + goto out; +@@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, + if (IS_ERR(whiteout)) + goto out_unlock; + +- if (type == OVL_PATH_LOWER) { ++ upper = ovl_dentry_upper(dentry); ++ if (!upper) { + upper = lookup_one_len(dentry->d_name.name, upperdir, +- dentry->d_name.len); ++ dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) + goto kill_whiteout; +@@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, + } else { + int flags = 0; + +- upper = ovl_dentry_upper(dentry); + if (opaquedir) + upper = opaquedir; + err = -ESTALE; +@@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) + cap_raise(override_cred->cap_effective, CAP_CHOWN); + old_cred = override_creds(override_cred); + +- err = ovl_remove_and_whiteout(dentry, type, is_dir); ++ err = ovl_remove_and_whiteout(dentry, is_dir); + + revert_creds(old_cred); + put_cred(override_cred); +@@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, + } + + if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { +- opaquedir = ovl_check_empty_and_clear(new, new_type); ++ opaquedir = ovl_check_empty_and_clear(new); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) { + opaquedir = NULL; +-- +2.7.4 + + +From ee3679843c9d6dec75e1d260cb6f041aa36ccbed Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 20 Nov 2014 16:40:00 +0100 +Subject: [PATCH 36/73] ovl: fix race in private xattr checks + +Xattr operations can race with copy up. This does not matter as long as +we consistently fiter out "trunsted.overlay.opaque" attribute on upper +directories. + +Previously we checked parent against OVL_PATH_MERGE. This is too general, +and prone to race with copy-up. I.e. we found the parent to be on the +lower layer but ovl_dentry_real() would return the copied-up dentry, +possibly with the "opaque" attribute. + +So instead use ovl_path_real() and decide to filter the attributes based on +the actual type of the dentry we'll use. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 521484639ec19a6f1ed56de6993feb255f5f676c) +Signed-off-by: Alex Shi +--- + fs/overlayfs/inode.c | 27 ++++++++++++++++++--------- + 1 file changed, 18 insertions(+), 9 deletions(-) + +diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c +index af2d18c..07d74b2 100644 +--- a/fs/overlayfs/inode.c ++++ b/fs/overlayfs/inode.c +@@ -235,26 +235,36 @@ out: + return err; + } + ++static bool ovl_need_xattr_filter(struct dentry *dentry, ++ enum ovl_path_type type) ++{ ++ return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); ++} ++ + ssize_t ovl_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) + { +- if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && +- ovl_is_private_xattr(name)) ++ struct path realpath; ++ enum ovl_path_type type = ovl_path_real(dentry, &realpath); ++ ++ if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + return -ENODATA; + +- return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); ++ return vfs_getxattr(realpath.dentry, name, value, size); + } + + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) + { ++ struct path realpath; ++ enum ovl_path_type type = ovl_path_real(dentry, &realpath); + ssize_t res; + int off; + +- res = vfs_listxattr(ovl_dentry_real(dentry), list, size); ++ res = vfs_listxattr(realpath.dentry, list, size); + if (res <= 0 || size == 0) + return res; + +- if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) ++ if (!ovl_need_xattr_filter(dentry, type)) + return res; + + /* filter out private xattrs */ +@@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name) + { + int err; + struct path realpath; +- enum ovl_path_type type; ++ enum ovl_path_type type = ovl_path_real(dentry, &realpath); + + err = ovl_want_write(dentry); + if (err) + goto out; + +- if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && +- ovl_is_private_xattr(name)) ++ err = -ENODATA; ++ if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + goto out_drop_write; + +- type = ovl_path_real(dentry, &realpath); + if (type == OVL_PATH_LOWER) { + err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (err < 0) +-- +2.7.4 + + +From 0ca1a2c9b8357ebf224d1714a6a1e3635e8e5862 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 20 Nov 2014 16:40:00 +0100 +Subject: [PATCH 37/73] ovl: allow filenames with comma + +Allow option separator (comma) to be escaped with backslash. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 91c77947133f7aef851b625701e182d3f99d14a9) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 45 insertions(+), 3 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index b92bd18..eee7a62 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -462,11 +462,34 @@ static const match_table_t ovl_tokens = { + {OPT_ERR, NULL} + }; + ++static char *ovl_next_opt(char **s) ++{ ++ char *sbegin = *s; ++ char *p; ++ ++ if (sbegin == NULL) ++ return NULL; ++ ++ for (p = sbegin; *p; p++) { ++ if (*p == '\\') { ++ p++; ++ if (!*p) ++ break; ++ } else if (*p == ',') { ++ *p = '\0'; ++ *s = p + 1; ++ return sbegin; ++ } ++ } ++ *s = NULL; ++ return sbegin; ++} ++ + static int ovl_parse_opt(char *opt, struct ovl_config *config) + { + char *p; + +- while ((p = strsep(&opt, ",")) != NULL) { ++ while ((p = ovl_next_opt(&opt)) != NULL) { + int token; + substring_t args[MAX_OPT_ARGS]; + +@@ -554,15 +577,34 @@ out_dput: + goto out_unlock; + } + ++static void ovl_unescape(char *s) ++{ ++ char *d = s; ++ ++ for (;; s++, d++) { ++ if (*s == '\\') ++ s++; ++ *d = *s; ++ if (!*s) ++ break; ++ } ++} ++ + static int ovl_mount_dir(const char *name, struct path *path) + { + int err; ++ char *tmp = kstrdup(name, GFP_KERNEL); ++ ++ if (!tmp) ++ return -ENOMEM; + +- err = kern_path(name, LOOKUP_FOLLOW, path); ++ ovl_unescape(tmp); ++ err = kern_path(tmp, LOOKUP_FOLLOW, path); + if (err) { +- pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); ++ pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); + err = -EINVAL; + } ++ kfree(tmp); + return err; + } + +-- +2.7.4 + + +From a4180dfef4dc3c948d524dfe0c168cb59317df56 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 20 Nov 2014 16:40:01 +0100 +Subject: [PATCH 38/73] ovl: use lockless_dereference() for upperdentry + +Don't open code lockless_dereference() in ovl_upperdentry_dereference(). + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 71d509280f7e92eb60ae6b7c78c20afafff060c7) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index eee7a62..f16d318 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) + + static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) + { +- struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); +- /* +- * Make sure to order reads to upperdentry wrt ovl_dentry_update() +- */ +- smp_read_barrier_depends(); +- return upperdentry; ++ return lockless_dereference(oe->__upperdentry); + } + + void ovl_path_upper(struct dentry *dentry, struct path *path) +-- +2.7.4 + + +From d3a45e42dd711ce4e2095f14d77865b78c749190 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 20 Nov 2014 16:40:01 +0100 +Subject: [PATCH 39/73] ovl: pass dentry into ovl_dir_read_merged() + +Pass dentry into ovl_dir_read_merged() insted of upperpath and lowerpath. +This cleans up callers and paves the way for multi-layer directory reads. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit c9f00fdb9ab3999cb2fb582ad82a5db9e70c82f5) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 35 ++++++++++++++--------------------- + 1 file changed, 14 insertions(+), 21 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index fdb63de..6d5d9fd 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -276,11 +276,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, + return 0; + } + +-static inline int ovl_dir_read_merged(struct path *upperpath, +- struct path *lowerpath, +- struct list_head *list) ++static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) + { + int err; ++ struct path lowerpath; ++ struct path upperpath; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .list = list, +@@ -288,25 +288,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath, + .is_merge = false, + }; + +- if (upperpath->dentry) { +- err = ovl_dir_read(upperpath, &rdd); ++ ovl_path_lower(dentry, &lowerpath); ++ ovl_path_upper(dentry, &upperpath); ++ ++ if (upperpath.dentry) { ++ err = ovl_dir_read(&upperpath, &rdd); + if (err) + goto out; + +- if (lowerpath->dentry) { +- err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); ++ if (lowerpath.dentry) { ++ err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); + if (err) + goto out; + } + } +- if (lowerpath->dentry) { ++ if (lowerpath.dentry) { + /* + * Insert lowerpath entries before upperpath ones, this allows + * offsets to be reasonably constant + */ + list_add(&rdd.middle, rdd.list); + rdd.is_merge = true; +- err = ovl_dir_read(lowerpath, &rdd); ++ err = ovl_dir_read(&lowerpath, &rdd); + list_del(&rdd.middle); + } + out: +@@ -331,8 +334,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) + static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) + { + int res; +- struct path lowerpath; +- struct path upperpath; + struct ovl_dir_cache *cache; + + cache = ovl_dir_cache(dentry); +@@ -349,10 +350,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) + cache->refcount = 1; + INIT_LIST_HEAD(&cache->entries); + +- ovl_path_lower(dentry, &lowerpath); +- ovl_path_upper(dentry, &upperpath); +- +- res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); ++ res = ovl_dir_read_merged(dentry, &cache->entries); + if (res) { + ovl_cache_free(&cache->entries); + kfree(cache); +@@ -540,14 +538,9 @@ const struct file_operations ovl_dir_operations = { + int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) + { + int err; +- struct path lowerpath; +- struct path upperpath; + struct ovl_cache_entry *p; + +- ovl_path_upper(dentry, &upperpath); +- ovl_path_lower(dentry, &lowerpath); +- +- err = ovl_dir_read_merged(&upperpath, &lowerpath, list); ++ err = ovl_dir_read_merged(dentry, list); + if (err) + return err; + +-- +2.7.4 + + +From effb810d716f07d643385ba8206ab981a4b20dd9 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 20 Nov 2014 16:40:02 +0100 +Subject: [PATCH 40/73] ovl: ovl_dir_fsync() cleanup + +Check against !OVL_PATH_LOWER instead of OVL_PATH_MERGE. For a copied up +directory the two are currently equivalent. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 7676895f4736421ebafc48de5078e25ea69e88ee) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 6d5d9fd..c020599 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -452,10 +452,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, + /* + * Need to check if we started out being a lower dir, but got copied up + */ +- if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { ++ if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { + struct inode *inode = file_inode(file); + +- realfile =lockless_dereference(od->upperfile); ++ realfile = lockless_dereference(od->upperfile); + if (!realfile) { + struct path upperpath; + +-- +2.7.4 + + +From 8f7458c9c66418c8b9af323ba29c8f023ffe559e Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:42 +0100 +Subject: [PATCH 41/73] ovl: check whiteout while reading directory + +Don't make a separate pass for checking whiteouts, since we can do it while +reading the upper directory. + +This will make it easier to handle multiple layers. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 49c21e1cacd74a8c83407c70ad860c994e606e25) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 77 ++++++++++++++++++-------------------------------- + 1 file changed, 28 insertions(+), 49 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index c020599..b361719 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -40,6 +40,7 @@ struct ovl_readdir_data { + struct rb_root root; + struct list_head *list; + struct list_head middle; ++ struct dentry *dir; + int count; + int err; + }; +@@ -126,6 +127,32 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, + if (p == NULL) + return -ENOMEM; + ++ if (d_type == DT_CHR) { ++ struct dentry *dentry; ++ const struct cred *old_cred; ++ struct cred *override_cred; ++ ++ override_cred = prepare_creds(); ++ if (!override_cred) { ++ kfree(p); ++ return -ENOMEM; ++ } ++ ++ /* ++ * CAP_DAC_OVERRIDE for lookup ++ */ ++ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); ++ old_cred = override_creds(override_cred); ++ ++ dentry = lookup_one_len(name, rdd->dir, len); ++ if (!IS_ERR(dentry)) { ++ p->is_whiteout = ovl_is_whiteout(dentry); ++ dput(dentry); ++ } ++ revert_creds(old_cred); ++ put_cred(override_cred); ++ } ++ + list_add_tail(&p->l_node, rdd->list); + rb_link_node(&p->node, parent, newp); + rb_insert_color(&p->node, &rdd->root); +@@ -233,49 +260,6 @@ static void ovl_dir_reset(struct file *file) + od->is_real = false; + } + +-static int ovl_dir_mark_whiteouts(struct dentry *dir, +- struct ovl_readdir_data *rdd) +-{ +- struct ovl_cache_entry *p; +- struct dentry *dentry; +- const struct cred *old_cred; +- struct cred *override_cred; +- +- override_cred = prepare_creds(); +- if (!override_cred) { +- ovl_cache_free(rdd->list); +- return -ENOMEM; +- } +- +- /* +- * CAP_DAC_OVERRIDE for lookup +- */ +- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); +- old_cred = override_creds(override_cred); +- +- mutex_lock(&dir->d_inode->i_mutex); +- list_for_each_entry(p, rdd->list, l_node) { +- if (p->is_cursor) +- continue; +- +- if (p->type != DT_CHR) +- continue; +- +- dentry = lookup_one_len(p->name, dir, p->len); +- if (IS_ERR(dentry)) +- continue; +- +- p->is_whiteout = ovl_is_whiteout(dentry); +- dput(dentry); +- } +- mutex_unlock(&dir->d_inode->i_mutex); +- +- revert_creds(old_cred); +- put_cred(override_cred); +- +- return 0; +-} +- + static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) + { + int err; +@@ -292,15 +276,10 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) + ovl_path_upper(dentry, &upperpath); + + if (upperpath.dentry) { ++ rdd.dir = upperpath.dentry; + err = ovl_dir_read(&upperpath, &rdd); + if (err) + goto out; +- +- if (lowerpath.dentry) { +- err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); +- if (err) +- goto out; +- } + } + if (lowerpath.dentry) { + /* +-- +2.7.4 + + +From 8688583b8896b61fb799a3e7a6272b8beb87ec9e Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:42 +0100 +Subject: [PATCH 42/73] ovl: make path-type a bitmap + +OVL_PATH_PURE_UPPER -> __OVL_PATH_UPPER | __OVL_PATH_PURE +OVL_PATH_UPPER -> __OVL_PATH_UPPER +OVL_PATH_MERGE -> __OVL_PATH_UPPER | __OVL_PATH_MERGE +OVL_PATH_LOWER -> 0 + +Multiple R/O layers will allow __OVL_PATH_MERGE without __OVL_PATH_UPPER. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 1afaba1ecb5299cdd0f69b5bad98b0185fe71e79) +Signed-off-by: Alex Shi +--- + fs/overlayfs/copy_up.c | 4 ++-- + fs/overlayfs/dir.c | 22 +++++++++++----------- + fs/overlayfs/inode.c | 9 ++++++--- + fs/overlayfs/overlayfs.h | 13 +++++++++---- + fs/overlayfs/readdir.c | 10 +++++----- + fs/overlayfs/super.c | 19 ++++++++----------- + 6 files changed, 41 insertions(+), 36 deletions(-) + +diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c +index ea10a87..a5bfd60 100644 +--- a/fs/overlayfs/copy_up.c ++++ b/fs/overlayfs/copy_up.c +@@ -385,7 +385,7 @@ int ovl_copy_up(struct dentry *dentry) + struct kstat stat; + enum ovl_path_type type = ovl_path_type(dentry); + +- if (type != OVL_PATH_LOWER) ++ if (OVL_TYPE_UPPER(type)) + break; + + next = dget(dentry); +@@ -394,7 +394,7 @@ int ovl_copy_up(struct dentry *dentry) + parent = dget_parent(next); + + type = ovl_path_type(parent); +- if (type != OVL_PATH_LOWER) ++ if (OVL_TYPE_UPPER(type)) + break; + + dput(next); +diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c +index 8ffc4b9..ab50bd1 100644 +--- a/fs/overlayfs/dir.c ++++ b/fs/overlayfs/dir.c +@@ -152,7 +152,7 @@ static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, + * correct link count. nlink=1 seems to pacify 'find' and + * other utilities. + */ +- if (type == OVL_PATH_MERGE) ++ if (OVL_TYPE_MERGE(type)) + stat->nlink = 1; + + return 0; +@@ -630,7 +630,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) + goto out_drop_write; + + type = ovl_path_type(dentry); +- if (type == OVL_PATH_PURE_UPPER) { ++ if (OVL_TYPE_PURE_UPPER(type)) { + err = ovl_remove_upper(dentry, is_dir); + } else { + const struct cred *old_cred; +@@ -712,7 +712,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, + /* Don't copy up directory trees */ + old_type = ovl_path_type(old); + err = -EXDEV; +- if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir) ++ if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir) + goto out; + + if (new->d_inode) { +@@ -725,25 +725,25 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, + + new_type = ovl_path_type(new); + err = -EXDEV; +- if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) ++ if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) + goto out; + + err = 0; +- if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { ++ if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) { + if (ovl_dentry_lower(old)->d_inode == + ovl_dentry_lower(new)->d_inode) + goto out; + } +- if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { ++ if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) { + if (ovl_dentry_upper(old)->d_inode == + ovl_dentry_upper(new)->d_inode) + goto out; + } + } else { + if (ovl_dentry_is_opaque(new)) +- new_type = OVL_PATH_UPPER; ++ new_type = __OVL_PATH_UPPER; + else +- new_type = OVL_PATH_PURE_UPPER; ++ new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE; + } + + err = ovl_want_write(old); +@@ -763,8 +763,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, + goto out_drop_write; + } + +- old_opaque = old_type != OVL_PATH_PURE_UPPER; +- new_opaque = new_type != OVL_PATH_PURE_UPPER; ++ old_opaque = !OVL_TYPE_PURE_UPPER(old_type); ++ new_opaque = !OVL_TYPE_PURE_UPPER(new_type); + + if (old_opaque || new_opaque) { + err = -ENOMEM; +@@ -787,7 +787,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, + old_cred = override_creds(override_cred); + } + +- if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { ++ if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) { + opaquedir = ovl_check_empty_and_clear(new); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) { +diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c +index 07d74b2..48492f1 100644 +--- a/fs/overlayfs/inode.c ++++ b/fs/overlayfs/inode.c +@@ -238,7 +238,10 @@ out: + static bool ovl_need_xattr_filter(struct dentry *dentry, + enum ovl_path_type type) + { +- return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); ++ if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER) ++ return S_ISDIR(dentry->d_inode->i_mode); ++ else ++ return false; + } + + ssize_t ovl_getxattr(struct dentry *dentry, const char *name, +@@ -299,7 +302,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name) + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + goto out_drop_write; + +- if (type == OVL_PATH_LOWER) { ++ if (!OVL_TYPE_UPPER(type)) { + err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (err < 0) + goto out_drop_write; +@@ -321,7 +324,7 @@ out: + static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, + struct dentry *realdentry) + { +- if (type != OVL_PATH_LOWER) ++ if (OVL_TYPE_UPPER(type)) + return false; + + if (special_file(realdentry->d_inode->i_mode)) +diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h +index 814bed3..d39eaa8 100644 +--- a/fs/overlayfs/overlayfs.h ++++ b/fs/overlayfs/overlayfs.h +@@ -12,12 +12,17 @@ + struct ovl_entry; + + enum ovl_path_type { +- OVL_PATH_PURE_UPPER, +- OVL_PATH_UPPER, +- OVL_PATH_MERGE, +- OVL_PATH_LOWER, ++ __OVL_PATH_PURE = (1 << 0), ++ __OVL_PATH_UPPER = (1 << 1), ++ __OVL_PATH_MERGE = (1 << 2), + }; + ++#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) ++#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) ++#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE) ++#define OVL_TYPE_MERGE_OR_LOWER(type) \ ++ (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) ++ + extern const char *ovl_opaque_xattr; + + static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index b361719..fb18287 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -255,8 +255,8 @@ static void ovl_dir_reset(struct file *file) + ovl_cache_put(od, dentry); + od->cache = NULL; + } +- WARN_ON(!od->is_real && type != OVL_PATH_MERGE); +- if (od->is_real && type == OVL_PATH_MERGE) ++ WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); ++ if (od->is_real && OVL_TYPE_MERGE(type)) + od->is_real = false; + } + +@@ -431,7 +431,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, + /* + * Need to check if we started out being a lower dir, but got copied up + */ +- if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { ++ if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + struct inode *inode = file_inode(file); + + realfile = lockless_dereference(od->upperfile); +@@ -497,8 +497,8 @@ static int ovl_dir_open(struct inode *inode, struct file *file) + } + INIT_LIST_HEAD(&od->cursor.l_node); + od->realfile = realfile; +- od->is_real = (type != OVL_PATH_MERGE); +- od->is_upper = (type != OVL_PATH_LOWER); ++ od->is_real = !OVL_TYPE_MERGE(type); ++ od->is_upper = OVL_TYPE_UPPER(type); + od->cursor.is_cursor = true; + file->private_data = od; + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index f16d318..821719c 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -64,22 +64,19 @@ const char *ovl_opaque_xattr = "trusted.overlay.opaque"; + enum ovl_path_type ovl_path_type(struct dentry *dentry) + { + struct ovl_entry *oe = dentry->d_fsdata; ++ enum ovl_path_type type = 0; + + if (oe->__upperdentry) { ++ type = __OVL_PATH_UPPER; ++ + if (oe->lowerdentry) { + if (S_ISDIR(dentry->d_inode->i_mode)) +- return OVL_PATH_MERGE; +- else +- return OVL_PATH_UPPER; +- } else { +- if (oe->opaque) +- return OVL_PATH_UPPER; +- else +- return OVL_PATH_PURE_UPPER; ++ type |= __OVL_PATH_MERGE; ++ } else if (!oe->opaque) { ++ type |= __OVL_PATH_PURE; + } +- } else { +- return OVL_PATH_LOWER; + } ++ return type; + } + + static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) +@@ -101,7 +98,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) + + enum ovl_path_type type = ovl_path_type(dentry); + +- if (type == OVL_PATH_LOWER) ++ if (!OVL_TYPE_UPPER(type)) + ovl_path_lower(dentry, path); + else + ovl_path_upper(dentry, path); +-- +2.7.4 + + +From 046f00ecd2379495e451658be72234ded7bee154 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:43 +0100 +Subject: [PATCH 43/73] ovl: dont replace opaque dir + +When removing an empty opaque directory, then it makes no sense to replace +it with an exact replica of itself before removal. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 263b4a0fee43f1239c4d6f3c3a62fb5a20d84f2e) +Signed-off-by: Alex Shi +--- + fs/overlayfs/dir.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c +index ab50bd1..dcae3ac 100644 +--- a/fs/overlayfs/dir.c ++++ b/fs/overlayfs/dir.c +@@ -506,7 +506,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) + struct dentry *opaquedir = NULL; + int err; + +- if (is_dir) { ++ if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { + opaquedir = ovl_check_empty_and_clear(dentry); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) +-- +2.7.4 + + +From d669aefff21050073a65d12d8711c7c8bbad65b9 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:43 +0100 +Subject: [PATCH 44/73] ovl: add mutli-layer infrastructure + +Add multiple lower layers to 'struct ovl_fs' and 'struct ovl_entry'. + +ovl_entry will have an array of paths, instead of just the dentry. This +allows a compact array containing just the layers which exist at current +point in the tree (which is expected to be a small number for the majority +of dentries). + +The number of layers is not limited by this infrastructure. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit dd662667e6d3e55b42798a6e6e7f37dddc639460) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 98 ++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 64 insertions(+), 34 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 821719c..460d866 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -35,7 +35,8 @@ struct ovl_config { + /* private information held for overlayfs's superblock */ + struct ovl_fs { + struct vfsmount *upper_mnt; +- struct vfsmount *lower_mnt; ++ unsigned numlower; ++ struct vfsmount **lower_mnt; + struct dentry *workdir; + long lower_namelen; + /* pathnames of lower and upper dirs, for show_options */ +@@ -47,7 +48,6 @@ struct ovl_dir_cache; + /* private information held for every overlayfs dentry */ + struct ovl_entry { + struct dentry *__upperdentry; +- struct dentry *lowerdentry; + struct ovl_dir_cache *cache; + union { + struct { +@@ -56,10 +56,16 @@ struct ovl_entry { + }; + struct rcu_head rcu; + }; ++ unsigned numlower; ++ struct path lowerstack[]; + }; + + const char *ovl_opaque_xattr = "trusted.overlay.opaque"; + ++static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) ++{ ++ return oe->numlower ? oe->lowerstack[0].dentry : NULL; ++} + + enum ovl_path_type ovl_path_type(struct dentry *dentry) + { +@@ -69,7 +75,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) + if (oe->__upperdentry) { + type = __OVL_PATH_UPPER; + +- if (oe->lowerdentry) { ++ if (oe->numlower) { + if (S_ISDIR(dentry->d_inode->i_mode)) + type |= __OVL_PATH_MERGE; + } else if (!oe->opaque) { +@@ -117,7 +123,7 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry) + { + struct ovl_entry *oe = dentry->d_fsdata; + +- return oe->lowerdentry; ++ return __ovl_dentry_lower(oe); + } + + struct dentry *ovl_dentry_real(struct dentry *dentry) +@@ -127,7 +133,7 @@ struct dentry *ovl_dentry_real(struct dentry *dentry) + + realdentry = ovl_upperdentry_dereference(oe); + if (!realdentry) +- realdentry = oe->lowerdentry; ++ realdentry = __ovl_dentry_lower(oe); + + return realdentry; + } +@@ -140,7 +146,7 @@ struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) + if (realdentry) { + *is_upper = true; + } else { +- realdentry = oe->lowerdentry; ++ realdentry = __ovl_dentry_lower(oe); + *is_upper = false; + } + return realdentry; +@@ -162,11 +168,9 @@ void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) + + void ovl_path_lower(struct dentry *dentry, struct path *path) + { +- struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_entry *oe = dentry->d_fsdata; + +- path->mnt = ofs->lower_mnt; +- path->dentry = oe->lowerdentry; ++ *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; + } + + int ovl_want_write(struct dentry *dentry) +@@ -258,8 +262,11 @@ static void ovl_dentry_release(struct dentry *dentry) + struct ovl_entry *oe = dentry->d_fsdata; + + if (oe) { ++ unsigned int i; ++ + dput(oe->__upperdentry); +- dput(oe->lowerdentry); ++ for (i = 0; i < oe->numlower; i++) ++ dput(oe->lowerstack[i].dentry); + kfree_rcu(oe, rcu); + } + } +@@ -268,9 +275,15 @@ static const struct dentry_operations ovl_dentry_operations = { + .d_release = ovl_dentry_release, + }; + +-static struct ovl_entry *ovl_alloc_entry(void) ++static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) + { +- return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); ++ size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); ++ struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); ++ ++ if (oe) ++ oe->numlower = numlower; ++ ++ return oe; + } + + static inline struct dentry *ovl_lookup_real(struct dentry *dir, +@@ -297,19 +310,19 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + { + struct ovl_entry *oe; + struct dentry *upperdir; +- struct dentry *lowerdir; ++ struct path lowerdir; + struct dentry *upperdentry = NULL; + struct dentry *lowerdentry = NULL; + struct inode *inode = NULL; + int err; + + err = -ENOMEM; +- oe = ovl_alloc_entry(); ++ oe = ovl_alloc_entry(1); + if (!oe) + goto out; + + upperdir = ovl_dentry_upper(dentry->d_parent); +- lowerdir = ovl_dentry_lower(dentry->d_parent); ++ ovl_path_lower(dentry->d_parent, &lowerdir); + + if (upperdir) { + upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); +@@ -317,7 +330,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + if (IS_ERR(upperdentry)) + goto out_put_dir; + +- if (lowerdir && upperdentry) { ++ if (lowerdir.dentry && upperdentry) { + if (ovl_is_whiteout(upperdentry)) { + dput(upperdentry); + upperdentry = NULL; +@@ -327,8 +340,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + } + } + } +- if (lowerdir && !oe->opaque) { +- lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); ++ if (lowerdir.dentry && !oe->opaque) { ++ lowerdentry = ovl_lookup_real(lowerdir.dentry, &dentry->d_name); + err = PTR_ERR(lowerdentry); + if (IS_ERR(lowerdentry)) + goto out_dput_upper; +@@ -355,8 +368,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + } + + oe->__upperdentry = upperdentry; +- oe->lowerdentry = lowerdentry; +- ++ if (lowerdentry) { ++ oe->lowerstack[0].dentry = lowerdentry; ++ oe->lowerstack[0].mnt = lowerdir.mnt; ++ } else { ++ oe->numlower = 0; ++ } + dentry->d_fsdata = oe; + d_add(dentry, inode); + +@@ -380,10 +397,12 @@ struct file *ovl_path_open(struct path *path, int flags) + static void ovl_put_super(struct super_block *sb) + { + struct ovl_fs *ufs = sb->s_fs_info; ++ unsigned i; + + dput(ufs->workdir); + mntput(ufs->upper_mnt); +- mntput(ufs->lower_mnt); ++ for (i = 0; i < ufs->numlower; i++) ++ mntput(ufs->lower_mnt[i]); + + kfree(ufs->config.lowerdir); + kfree(ufs->config.upperdir); +@@ -641,6 +660,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + struct ovl_entry *oe; + struct ovl_fs *ufs; + struct kstatfs statfs; ++ struct vfsmount *mnt; ++ unsigned int i; + int err; + + err = -ENOMEM; +@@ -661,7 +682,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + } + + err = -ENOMEM; +- oe = ovl_alloc_entry(); ++ oe = ovl_alloc_entry(1); + if (oe == NULL) + goto out_free_config; + +@@ -727,12 +748,24 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + goto out_put_workpath; + } + +- ufs->lower_mnt = clone_private_mount(&lowerpath); +- err = PTR_ERR(ufs->lower_mnt); +- if (IS_ERR(ufs->lower_mnt)) { +- pr_err("overlayfs: failed to clone lowerpath\n"); ++ ufs->lower_mnt = kcalloc(1, sizeof(struct vfsmount *), GFP_KERNEL); ++ if (ufs->lower_mnt == NULL) + goto out_put_upper_mnt; ++ ++ mnt = clone_private_mount(&lowerpath); ++ err = PTR_ERR(mnt); ++ if (IS_ERR(mnt)) { ++ pr_err("overlayfs: failed to clone lowerpath\n"); ++ goto out_put_lower_mnt; + } ++ /* ++ * Make lower_mnt R/O. That way fchmod/fchown on lower file ++ * will fail instead of modifying lower fs. ++ */ ++ mnt->mnt_flags |= MNT_READONLY; ++ ++ ufs->lower_mnt[0] = mnt; ++ ufs->numlower = 1; + + ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); + err = PTR_ERR(ufs->workdir); +@@ -742,12 +775,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + goto out_put_lower_mnt; + } + +- /* +- * Make lower_mnt R/O. That way fchmod/fchown on lower file +- * will fail instead of modifying lower fs. +- */ +- ufs->lower_mnt->mnt_flags |= MNT_READONLY; +- + /* If the upper fs is r/o, we mark overlayfs r/o too */ + if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) + sb->s_flags |= MS_RDONLY; +@@ -768,7 +795,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + path_put(&workpath); + + oe->__upperdentry = upperpath.dentry; +- oe->lowerdentry = lowerpath.dentry; ++ oe->lowerstack[0].dentry = lowerpath.dentry; ++ oe->lowerstack[0].mnt = ufs->lower_mnt[0]; + + root_dentry->d_fsdata = oe; + +@@ -782,7 +810,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + out_put_workdir: + dput(ufs->workdir); + out_put_lower_mnt: +- mntput(ufs->lower_mnt); ++ for (i = 0; i < ufs->numlower; i++) ++ mntput(ufs->lower_mnt[i]); ++ kfree(ufs->lower_mnt); + out_put_upper_mnt: + mntput(ufs->upper_mnt); + out_put_workpath: +-- +2.7.4 + + +From 8216b4cc31e60690afef76cbe1a310064f8727d7 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:43 +0100 +Subject: [PATCH 45/73] ovl: helper to iterate layers + +Add helper to iterate through all the layers, starting from the upper layer +(if exists) and continuing down through the lower layers. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 5ef88da56a77bfb3b9631f5e5775f3bff86b6219) +Signed-off-by: Alex Shi +--- + fs/overlayfs/overlayfs.h | 1 + + fs/overlayfs/super.c | 21 +++++++++++++++++++++ + 2 files changed, 22 insertions(+) + +diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h +index d39eaa8..d176b67 100644 +--- a/fs/overlayfs/overlayfs.h ++++ b/fs/overlayfs/overlayfs.h +@@ -135,6 +135,7 @@ void ovl_dentry_version_inc(struct dentry *dentry); + void ovl_path_upper(struct dentry *dentry, struct path *path); + void ovl_path_lower(struct dentry *dentry, struct path *path); + enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); ++int ovl_path_next(int idx, struct dentry *dentry, struct path *path); + struct dentry *ovl_dentry_upper(struct dentry *dentry); + struct dentry *ovl_dentry_lower(struct dentry *dentry); + struct dentry *ovl_dentry_real(struct dentry *dentry); +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 460d866..07e4c57 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -305,6 +305,27 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir, + return dentry; + } + ++/* ++ * Returns next layer in stack starting from top. ++ * Returns -1 if this is the last layer. ++ */ ++int ovl_path_next(int idx, struct dentry *dentry, struct path *path) ++{ ++ struct ovl_entry *oe = dentry->d_fsdata; ++ ++ BUG_ON(idx < 0); ++ if (idx == 0) { ++ ovl_path_upper(dentry, path); ++ if (path->dentry) ++ return oe->numlower ? 1 : -1; ++ idx++; ++ } ++ BUG_ON(idx > oe->numlower); ++ *path = oe->lowerstack[idx - 1]; ++ ++ return (idx < oe->numlower) ? idx + 1 : -1; ++} ++ + struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) + { +-- +2.7.4 + + +From 3dbcd51370dba076be4dc8c45d2bf19db48201b8 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:44 +0100 +Subject: [PATCH 46/73] ovl: multi-layer readdir + +If multiple lower layers exist, merge them as well in readdir according to +the same rules as merging upper with lower. I.e. take whiteouts and opaque +directories into account on all but the lowers layer. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 9d7459d834c28f55c82f1737f638a6c90e0c0e0f) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 43 +++++++++++++++++++++---------------------- + fs/overlayfs/super.c | 3 +++ + 2 files changed, 24 insertions(+), 22 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index fb18287..5ef05149 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -263,35 +263,34 @@ static void ovl_dir_reset(struct file *file) + static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) + { + int err; +- struct path lowerpath; +- struct path upperpath; ++ struct path realpath; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .list = list, + .root = RB_ROOT, + .is_merge = false, + }; +- +- ovl_path_lower(dentry, &lowerpath); +- ovl_path_upper(dentry, &upperpath); +- +- if (upperpath.dentry) { +- rdd.dir = upperpath.dentry; +- err = ovl_dir_read(&upperpath, &rdd); +- if (err) +- goto out; +- } +- if (lowerpath.dentry) { +- /* +- * Insert lowerpath entries before upperpath ones, this allows +- * offsets to be reasonably constant +- */ +- list_add(&rdd.middle, rdd.list); +- rdd.is_merge = true; +- err = ovl_dir_read(&lowerpath, &rdd); +- list_del(&rdd.middle); ++ int idx, next; ++ ++ for (idx = 0; idx != -1; idx = next) { ++ next = ovl_path_next(idx, dentry, &realpath); ++ ++ if (next != -1) { ++ rdd.dir = realpath.dentry; ++ err = ovl_dir_read(&realpath, &rdd); ++ if (err) ++ break; ++ } else { ++ /* ++ * Insert lowest layer entries before upper ones, this ++ * allows offsets to be reasonably constant ++ */ ++ list_add(&rdd.middle, rdd.list); ++ rdd.is_merge = true; ++ err = ovl_dir_read(&realpath, &rdd); ++ list_del(&rdd.middle); ++ } + } +-out: + return err; + } + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 07e4c57..c245043 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -81,6 +81,9 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) + } else if (!oe->opaque) { + type |= __OVL_PATH_PURE; + } ++ } else { ++ if (oe->numlower > 1) ++ type |= __OVL_PATH_MERGE; + } + return type; + } +-- +2.7.4 + + +From 1dabc18b8b2115693ec868cb84fda26052a8ee83 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:44 +0100 +Subject: [PATCH 47/73] ovl: multi-layer lookup + +Look up dentry in all relevant layers. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 3d3c6b89399a1b5e8a59ffbb8cb2a7797a9ef154) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 143 +++++++++++++++++++++++++++++++++------------------ + 1 file changed, 94 insertions(+), 49 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index c245043..f72b82f 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -333,82 +333,127 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) + { + struct ovl_entry *oe; +- struct dentry *upperdir; +- struct path lowerdir; +- struct dentry *upperdentry = NULL; +- struct dentry *lowerdentry = NULL; ++ struct ovl_entry *poe = dentry->d_parent->d_fsdata; ++ struct path *stack = NULL; ++ struct dentry *upperdir, *upperdentry = NULL; ++ unsigned int ctr = 0; + struct inode *inode = NULL; ++ bool upperopaque = false; ++ struct dentry *this, *prev = NULL; ++ unsigned int i; + int err; + +- err = -ENOMEM; +- oe = ovl_alloc_entry(1); +- if (!oe) +- goto out; +- +- upperdir = ovl_dentry_upper(dentry->d_parent); +- ovl_path_lower(dentry->d_parent, &lowerdir); +- ++ upperdir = ovl_upperdentry_dereference(poe); + if (upperdir) { +- upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); +- err = PTR_ERR(upperdentry); +- if (IS_ERR(upperdentry)) +- goto out_put_dir; +- +- if (lowerdir.dentry && upperdentry) { +- if (ovl_is_whiteout(upperdentry)) { +- dput(upperdentry); +- upperdentry = NULL; +- oe->opaque = true; +- } else if (ovl_is_opaquedir(upperdentry)) { +- oe->opaque = true; ++ this = ovl_lookup_real(upperdir, &dentry->d_name); ++ err = PTR_ERR(this); ++ if (IS_ERR(this)) ++ goto out; ++ ++ /* ++ * If this is not the lowermost layer, check whiteout and opaque ++ * directory. ++ */ ++ if (poe->numlower && this) { ++ if (ovl_is_whiteout(this)) { ++ dput(this); ++ this = NULL; ++ upperopaque = true; ++ } else if (ovl_is_opaquedir(this)) { ++ upperopaque = true; + } + } ++ upperdentry = prev = this; + } +- if (lowerdir.dentry && !oe->opaque) { +- lowerdentry = ovl_lookup_real(lowerdir.dentry, &dentry->d_name); +- err = PTR_ERR(lowerdentry); +- if (IS_ERR(lowerdentry)) +- goto out_dput_upper; ++ ++ if (!upperopaque && poe->numlower) { ++ err = -ENOMEM; ++ stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL); ++ if (!stack) ++ goto out_put_upper; + } + +- if (lowerdentry && upperdentry && +- (!S_ISDIR(upperdentry->d_inode->i_mode) || +- !S_ISDIR(lowerdentry->d_inode->i_mode))) { +- dput(lowerdentry); +- lowerdentry = NULL; +- oe->opaque = true; ++ for (i = 0; !upperopaque && i < poe->numlower; i++) { ++ bool opaque = false; ++ struct path lowerpath = poe->lowerstack[i]; ++ ++ opaque = false; ++ this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); ++ err = PTR_ERR(this); ++ if (IS_ERR(this)) ++ goto out_put; ++ if (!this) ++ continue; ++ ++ /* ++ * If this is not the lowermost layer, check whiteout and opaque ++ * directory. ++ */ ++ if (i < poe->numlower - 1) { ++ if (ovl_is_whiteout(this)) { ++ dput(this); ++ break; ++ } else if (ovl_is_opaquedir(this)) { ++ opaque = true; ++ } ++ } ++ /* ++ * If this is a non-directory then stop here. ++ * ++ * FIXME: check for opaqueness maybe better done in remove code. ++ */ ++ if (!S_ISDIR(this->d_inode->i_mode)) { ++ opaque = true; ++ } else if (prev && (!S_ISDIR(prev->d_inode->i_mode) || ++ !S_ISDIR(this->d_inode->i_mode))) { ++ if (prev == upperdentry) ++ upperopaque = true; ++ dput(this); ++ break; ++ } ++ stack[ctr].dentry = this; ++ stack[ctr].mnt = lowerpath.mnt; ++ ctr++; ++ prev = this; ++ if (opaque) ++ break; + } + +- if (lowerdentry || upperdentry) { ++ oe = ovl_alloc_entry(ctr); ++ err = -ENOMEM; ++ if (!oe) ++ goto out_put; ++ ++ if (upperdentry || ctr) { + struct dentry *realdentry; + +- realdentry = upperdentry ? upperdentry : lowerdentry; ++ realdentry = upperdentry ? upperdentry : stack[0].dentry; ++ + err = -ENOMEM; + inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, + oe); + if (!inode) +- goto out_dput; ++ goto out_free_oe; + ovl_copyattr(realdentry->d_inode, inode); + } + ++ oe->opaque = upperopaque; + oe->__upperdentry = upperdentry; +- if (lowerdentry) { +- oe->lowerstack[0].dentry = lowerdentry; +- oe->lowerstack[0].mnt = lowerdir.mnt; +- } else { +- oe->numlower = 0; +- } ++ memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); ++ kfree(stack); + dentry->d_fsdata = oe; + d_add(dentry, inode); + + return NULL; + +-out_dput: +- dput(lowerdentry); +-out_dput_upper: +- dput(upperdentry); +-out_put_dir: ++out_free_oe: + kfree(oe); ++out_put: ++ for (i = 0; i < ctr; i++) ++ dput(stack[i].dentry); ++ kfree(stack); ++out_put_upper: ++ dput(upperdentry); + out: + return ERR_PTR(err); + } +-- +2.7.4 + + +From 34d7a5b7b3a920178f662fa768bb724eb41be925 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:45 +0100 +Subject: [PATCH 48/73] ovl: check whiteout on lowest layer as well + +Not checking whiteouts on lowest layer was an optimization (there's nothing +to white out there), but it could result in inconsitent behavior when a +layer previously used as upper/middle is later used as lowest. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 3e01cee3b980f96463cb6f378ab05303a99903d9) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 79 +++++++++++++++++++++++++------------------------- + fs/overlayfs/super.c | 27 +++++++---------- + 2 files changed, 50 insertions(+), 56 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 5ef05149..2b8315b 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -80,23 +80,50 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, + return NULL; + } + +-static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, ++static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, ++ const char *name, int len, + u64 ino, unsigned int d_type) + { + struct ovl_cache_entry *p; + size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); + + p = kmalloc(size, GFP_KERNEL); +- if (p) { +- memcpy(p->name, name, len); +- p->name[len] = '\0'; +- p->len = len; +- p->type = d_type; +- p->ino = ino; +- p->is_whiteout = false; +- p->is_cursor = false; +- } ++ if (!p) ++ return NULL; ++ ++ memcpy(p->name, name, len); ++ p->name[len] = '\0'; ++ p->len = len; ++ p->type = d_type; ++ p->ino = ino; ++ p->is_whiteout = false; ++ p->is_cursor = false; ++ ++ if (d_type == DT_CHR) { ++ struct dentry *dentry; ++ const struct cred *old_cred; ++ struct cred *override_cred; ++ ++ override_cred = prepare_creds(); ++ if (!override_cred) { ++ kfree(p); ++ return NULL; ++ } + ++ /* ++ * CAP_DAC_OVERRIDE for lookup ++ */ ++ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); ++ old_cred = override_creds(override_cred); ++ ++ dentry = lookup_one_len(name, dir, len); ++ if (!IS_ERR(dentry)) { ++ p->is_whiteout = ovl_is_whiteout(dentry); ++ dput(dentry); ++ } ++ revert_creds(old_cred); ++ put_cred(override_cred); ++ } + return p; + } + +@@ -123,36 +150,10 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, + return 0; + } + +- p = ovl_cache_entry_new(name, len, ino, d_type); ++ p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); + if (p == NULL) + return -ENOMEM; + +- if (d_type == DT_CHR) { +- struct dentry *dentry; +- const struct cred *old_cred; +- struct cred *override_cred; +- +- override_cred = prepare_creds(); +- if (!override_cred) { +- kfree(p); +- return -ENOMEM; +- } +- +- /* +- * CAP_DAC_OVERRIDE for lookup +- */ +- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); +- old_cred = override_creds(override_cred); +- +- dentry = lookup_one_len(name, rdd->dir, len); +- if (!IS_ERR(dentry)) { +- p->is_whiteout = ovl_is_whiteout(dentry); +- dput(dentry); +- } +- revert_creds(old_cred); +- put_cred(override_cred); +- } +- + list_add_tail(&p->l_node, rdd->list); + rb_link_node(&p->node, parent, newp); + rb_insert_color(&p->node, &rdd->root); +@@ -170,7 +171,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, + if (p) { + list_move_tail(&p->l_node, &rdd->middle); + } else { +- p = ovl_cache_entry_new(name, namelen, ino, d_type); ++ p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); + if (p == NULL) + rdd->err = -ENOMEM; + else +@@ -231,6 +232,7 @@ static inline int ovl_dir_read(struct path *realpath, + if (IS_ERR(realfile)) + return PTR_ERR(realfile); + ++ rdd->dir = realpath->dentry; + rdd->ctx.pos = 0; + do { + rdd->count = 0; +@@ -276,7 +278,6 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) + next = ovl_path_next(idx, dentry, &realpath); + + if (next != -1) { +- rdd.dir = realpath.dentry; + err = ovl_dir_read(&realpath, &rdd); + if (err) + break; +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index f72b82f..5dbc678 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -350,16 +350,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + if (IS_ERR(this)) + goto out; + +- /* +- * If this is not the lowermost layer, check whiteout and opaque +- * directory. +- */ +- if (poe->numlower && this) { ++ if (this) { + if (ovl_is_whiteout(this)) { + dput(this); + this = NULL; + upperopaque = true; +- } else if (ovl_is_opaquedir(this)) { ++ } else if (poe->numlower && ovl_is_opaquedir(this)) { + upperopaque = true; + } + } +@@ -384,19 +380,16 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + goto out_put; + if (!this) + continue; +- ++ if (ovl_is_whiteout(this)) { ++ dput(this); ++ break; ++ } + /* +- * If this is not the lowermost layer, check whiteout and opaque +- * directory. ++ * Only makes sense to check opaque dir if this is not the ++ * lowermost layer. + */ +- if (i < poe->numlower - 1) { +- if (ovl_is_whiteout(this)) { +- dput(this); +- break; +- } else if (ovl_is_opaquedir(this)) { +- opaque = true; +- } +- } ++ if (i < poe->numlower - 1 && ovl_is_opaquedir(this)) ++ opaque = true; + /* + * If this is a non-directory then stop here. + * +-- +2.7.4 + + +From 5dfc4d41f976aa0916abe3298c71e585e78e5491 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:45 +0100 +Subject: [PATCH 49/73] ovl: lookup ENAMETOOLONG on lower means ENOENT + +"Suppose you have in one of the lower layers a filesystem with +->lookup()-enforced upper limit on name length. Pretty much every local fs +has one, but... they are not all equal. 255 characters is the common upper +limit, but e.g. jffs2 stops at 254, minixfs upper limit is somewhere from +14 to 60, depending upon version, etc. You are doing a lookup for +something that is present in upper layer, but happens to be too long for +one of the lower layers. Too bad - ENAMETOOLONG for you..." + +Reported-by: Al Viro +Signed-off-by: Miklos Szeredi +(cherry picked from commit 09e10322b71716adf567d453889ef0871cf226b9) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 5dbc678..110c968 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -376,8 +376,14 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + opaque = false; + this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); + err = PTR_ERR(this); +- if (IS_ERR(this)) ++ if (IS_ERR(this)) { ++ /* ++ * If it's positive, then treat ENAMETOOLONG as ENOENT. ++ */ ++ if (err == -ENAMETOOLONG && (upperdentry || ctr)) ++ continue; + goto out_put; ++ } + if (!this) + continue; + if (ovl_is_whiteout(this)) { +-- +2.7.4 + + +From f3f4d3482d4f4e8edd9ef8a862676004a07c2e43 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:46 +0100 +Subject: [PATCH 50/73] ovl: allow statfs if no upper layer + +Handle "no upper layer" case in statfs. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 4ebc581828d5d0fe189ca06cef8b7a63cb4583d5) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 110c968..cc7a0f3 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -484,7 +484,7 @@ static void ovl_put_super(struct super_block *sb) + * @buf: The struct kstatfs to fill in with stats + * + * Get the filesystem statistics. As writes always target the upper layer +- * filesystem pass the statfs to the same filesystem. ++ * filesystem pass the statfs to the upper filesystem (if it exists) + */ + static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) + { +@@ -493,7 +493,7 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) + struct path path; + int err; + +- ovl_path_upper(root_dentry, &path); ++ ovl_path_real(root_dentry, &path); + + err = vfs_statfs(&path, buf); + if (!err) { +-- +2.7.4 + + +From ed3dcf4bb43b6d737b85ba134d25d1483fa12c8e Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:48 +0100 +Subject: [PATCH 51/73] ovl: mount: change order of initialization + +Move allocation of root entry above to where it's needed. + +Move initializations related to upperdir and workdir near each other. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 3b7a9a249a93e68b7bb318de40e64d3b68ba1a6d) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 70 ++++++++++++++++++++++++---------------------------- + 1 file changed, 32 insertions(+), 38 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index cc7a0f3..a177028 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -723,7 +723,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + struct path lowerpath; + struct path upperpath; + struct path workpath; +- struct inode *root_inode; + struct dentry *root_dentry; + struct ovl_entry *oe; + struct ovl_fs *ufs; +@@ -749,54 +748,49 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + goto out_free_config; + } + +- err = -ENOMEM; +- oe = ovl_alloc_entry(1); +- if (oe == NULL) +- goto out_free_config; +- + err = ovl_mount_dir(ufs->config.upperdir, &upperpath); + if (err) +- goto out_free_oe; ++ goto out_free_config; + +- err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); ++ err = ovl_mount_dir(ufs->config.workdir, &workpath); + if (err) + goto out_put_upperpath; + +- err = ovl_mount_dir(ufs->config.workdir, &workpath); ++ err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); + if (err) +- goto out_put_lowerpath; ++ goto out_put_workpath; + + err = -EINVAL; + if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || + !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || + !S_ISDIR(workpath.dentry->d_inode->i_mode)) { + pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); +- goto out_put_workpath; ++ goto out_put_lowerpath; + } + + if (upperpath.mnt != workpath.mnt) { + pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); +- goto out_put_workpath; ++ goto out_put_lowerpath; + } + if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { + pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); +- goto out_put_workpath; ++ goto out_put_lowerpath; + } + + if (!ovl_is_allowed_fs_type(upperpath.dentry)) { + pr_err("overlayfs: filesystem of upperdir is not supported\n"); +- goto out_put_workpath; ++ goto out_put_lowerpath; + } + + if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { + pr_err("overlayfs: filesystem of lowerdir is not supported\n"); +- goto out_put_workpath; ++ goto out_put_lowerpath; + } + + err = vfs_statfs(&lowerpath, &statfs); + if (err) { + pr_err("overlayfs: statfs failed on lowerpath\n"); +- goto out_put_workpath; ++ goto out_put_lowerpath; + } + ufs->lower_namelen = statfs.f_namelen; + +@@ -806,19 +800,27 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + err = -EINVAL; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + pr_err("overlayfs: maximum fs stacking depth exceeded\n"); +- goto out_put_workpath; ++ goto out_put_lowerpath; + } + + ufs->upper_mnt = clone_private_mount(&upperpath); + err = PTR_ERR(ufs->upper_mnt); + if (IS_ERR(ufs->upper_mnt)) { + pr_err("overlayfs: failed to clone upperpath\n"); +- goto out_put_workpath; ++ goto out_put_lowerpath; ++ } ++ ++ ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); ++ err = PTR_ERR(ufs->workdir); ++ if (IS_ERR(ufs->workdir)) { ++ pr_err("overlayfs: failed to create directory %s/%s\n", ++ ufs->config.workdir, OVL_WORKDIR_NAME); ++ goto out_put_upper_mnt; + } + + ufs->lower_mnt = kcalloc(1, sizeof(struct vfsmount *), GFP_KERNEL); + if (ufs->lower_mnt == NULL) +- goto out_put_upper_mnt; ++ goto out_put_workdir; + + mnt = clone_private_mount(&lowerpath); + err = PTR_ERR(mnt); +@@ -835,14 +837,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + ufs->lower_mnt[0] = mnt; + ufs->numlower = 1; + +- ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); +- err = PTR_ERR(ufs->workdir); +- if (IS_ERR(ufs->workdir)) { +- pr_err("overlayfs: failed to create directory %s/%s\n", +- ufs->config.workdir, OVL_WORKDIR_NAME); +- goto out_put_lower_mnt; +- } +- + /* If the upper fs is r/o, we mark overlayfs r/o too */ + if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) + sb->s_flags |= MS_RDONLY; +@@ -850,13 +844,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + sb->s_d_op = &ovl_dentry_operations; + + err = -ENOMEM; +- root_inode = ovl_new_inode(sb, S_IFDIR, oe); +- if (!root_inode) +- goto out_put_workdir; ++ oe = ovl_alloc_entry(1); ++ if (!oe) ++ goto out_put_lower_mnt; + +- root_dentry = d_make_root(root_inode); ++ root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, oe)); + if (!root_dentry) +- goto out_put_workdir; ++ goto out_free_oe; + + mntput(upperpath.mnt); + mntput(lowerpath.mnt); +@@ -875,22 +869,22 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + + return 0; + +-out_put_workdir: +- dput(ufs->workdir); ++out_free_oe: ++ kfree(oe); + out_put_lower_mnt: + for (i = 0; i < ufs->numlower; i++) + mntput(ufs->lower_mnt[i]); + kfree(ufs->lower_mnt); ++out_put_workdir: ++ dput(ufs->workdir); + out_put_upper_mnt: + mntput(ufs->upper_mnt); +-out_put_workpath: +- path_put(&workpath); + out_put_lowerpath: + path_put(&lowerpath); ++out_put_workpath: ++ path_put(&workpath); + out_put_upperpath: + path_put(&upperpath); +-out_free_oe: +- kfree(oe); + out_free_config: + kfree(ufs->config.lowerdir); + kfree(ufs->config.upperdir); +-- +2.7.4 + + +From 4db5f1d8cd9d89c8810c8ed10a4e50d1f0debbd1 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:49 +0100 +Subject: [PATCH 52/73] ovl: improve mount helpers + +Move common checks into ovl_mount_dir() helper. + +Create helper for looking up lower directories. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit ab508822cab4c84f07373cd6ad107a1fd1362831) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 125 ++++++++++++++++++++++++++++++--------------------- + 1 file changed, 73 insertions(+), 52 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index a177028..592370f 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -669,24 +669,6 @@ static void ovl_unescape(char *s) + } + } + +-static int ovl_mount_dir(const char *name, struct path *path) +-{ +- int err; +- char *tmp = kstrdup(name, GFP_KERNEL); +- +- if (!tmp) +- return -ENOMEM; +- +- ovl_unescape(tmp); +- err = kern_path(tmp, LOOKUP_FOLLOW, path); +- if (err) { +- pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); +- err = -EINVAL; +- } +- kfree(tmp); +- return err; +-} +- + static bool ovl_is_allowed_fs_type(struct dentry *root) + { + const struct dentry_operations *dop = root->d_op; +@@ -706,6 +688,71 @@ static bool ovl_is_allowed_fs_type(struct dentry *root) + return true; + } + ++static int ovl_mount_dir_noesc(const char *name, struct path *path) ++{ ++ int err; ++ ++ err = kern_path(name, LOOKUP_FOLLOW, path); ++ if (err) { ++ pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); ++ goto out; ++ } ++ err = -EINVAL; ++ if (!ovl_is_allowed_fs_type(path->dentry)) { ++ pr_err("overlayfs: filesystem on '%s' not supported\n", name); ++ goto out_put; ++ } ++ if (!S_ISDIR(path->dentry->d_inode->i_mode)) { ++ pr_err("overlayfs: '%s' not a directory\n", name); ++ goto out_put; ++ } ++ return 0; ++ ++out_put: ++ path_put(path); ++out: ++ return err; ++} ++ ++static int ovl_mount_dir(const char *name, struct path *path) ++{ ++ int err = -ENOMEM; ++ char *tmp = kstrdup(name, GFP_KERNEL); ++ ++ if (tmp) { ++ ovl_unescape(tmp); ++ err = ovl_mount_dir_noesc(tmp, path); ++ kfree(tmp); ++ } ++ return err; ++} ++ ++static int ovl_lower_dir(const char *name, struct path *path, long *namelen, ++ int *stack_depth) ++{ ++ int err; ++ struct kstatfs statfs; ++ ++ err = ovl_mount_dir(name, path); ++ if (err) ++ goto out; ++ ++ err = vfs_statfs(path, &statfs); ++ if (err) { ++ pr_err("overlayfs: statfs failed on '%s'\n", name); ++ goto out_put; ++ } ++ *namelen = max(*namelen, statfs.f_namelen); ++ *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); ++ ++ return 0; ++ ++out_put: ++ path_put(path); ++out: ++ return err; ++} ++ + /* Workdir should not be subdir of upperdir and vice versa */ + static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) + { +@@ -726,7 +773,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + struct dentry *root_dentry; + struct ovl_entry *oe; + struct ovl_fs *ufs; +- struct kstatfs statfs; + struct vfsmount *mnt; + unsigned int i; + int err; +@@ -756,48 +802,23 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + if (err) + goto out_put_upperpath; + +- err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); +- if (err) +- goto out_put_workpath; +- +- err = -EINVAL; +- if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || +- !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || +- !S_ISDIR(workpath.dentry->d_inode->i_mode)) { +- pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); +- goto out_put_lowerpath; +- } +- + if (upperpath.mnt != workpath.mnt) { + pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); +- goto out_put_lowerpath; ++ goto out_put_workpath; + } + if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { + pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); +- goto out_put_lowerpath; +- } +- +- if (!ovl_is_allowed_fs_type(upperpath.dentry)) { +- pr_err("overlayfs: filesystem of upperdir is not supported\n"); +- goto out_put_lowerpath; +- } +- +- if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { +- pr_err("overlayfs: filesystem of lowerdir is not supported\n"); +- goto out_put_lowerpath; +- } +- +- err = vfs_statfs(&lowerpath, &statfs); +- if (err) { +- pr_err("overlayfs: statfs failed on lowerpath\n"); +- goto out_put_lowerpath; ++ goto out_put_workpath; + } +- ufs->lower_namelen = statfs.f_namelen; ++ sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; + +- sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, +- lowerpath.mnt->mnt_sb->s_stack_depth) + 1; ++ err = ovl_lower_dir(ufs->config.lowerdir, &lowerpath, ++ &ufs->lower_namelen, &sb->s_stack_depth); ++ if (err) ++ goto out_put_workpath; + + err = -EINVAL; ++ sb->s_stack_depth++; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + pr_err("overlayfs: maximum fs stacking depth exceeded\n"); + goto out_put_lowerpath; +-- +2.7.4 + + +From b153140225155c1de019eefe5aadde93365f7a66 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:51 +0100 +Subject: [PATCH 53/73] ovl: make upperdir optional + +Make "upperdir=" mount option optional. If "upperdir=" is not given, then +the "workdir=" option is also optional (and ignored if given). + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 53a08cb9b8bccfe58f1228c7c27baf34a83da78b) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 83 +++++++++++++++++++++++++++++----------------------- + 1 file changed, 47 insertions(+), 36 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 592370f..35bb0ad 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -516,8 +516,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) + struct ovl_fs *ufs = sb->s_fs_info; + + seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); +- seq_printf(m, ",upperdir=%s", ufs->config.upperdir); +- seq_printf(m, ",workdir=%s", ufs->config.workdir); ++ if (ufs->config.upperdir) { ++ seq_printf(m, ",upperdir=%s", ufs->config.upperdir); ++ seq_printf(m, ",workdir=%s", ufs->config.workdir); ++ } + return 0; + } + +@@ -768,8 +770,8 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) + static int ovl_fill_super(struct super_block *sb, void *data, int silent) + { + struct path lowerpath; +- struct path upperpath; +- struct path workpath; ++ struct path upperpath = { NULL, NULL }; ++ struct path workpath = { NULL, NULL }; + struct dentry *root_dentry; + struct ovl_entry *oe; + struct ovl_fs *ufs; +@@ -786,31 +788,38 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + if (err) + goto out_free_config; + +- /* FIXME: workdir is not needed for a R/O mount */ + err = -EINVAL; +- if (!ufs->config.upperdir || !ufs->config.lowerdir || +- !ufs->config.workdir) { +- pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); ++ if (!ufs->config.lowerdir) { ++ pr_err("overlayfs: missing 'lowerdir'\n"); + goto out_free_config; + } + +- err = ovl_mount_dir(ufs->config.upperdir, &upperpath); +- if (err) +- goto out_free_config; ++ sb->s_stack_depth = 0; ++ if (ufs->config.upperdir) { ++ /* FIXME: workdir is not needed for a R/O mount */ ++ if (!ufs->config.workdir) { ++ pr_err("overlayfs: missing 'workdir'\n"); ++ goto out_free_config; ++ } + +- err = ovl_mount_dir(ufs->config.workdir, &workpath); +- if (err) +- goto out_put_upperpath; ++ err = ovl_mount_dir(ufs->config.upperdir, &upperpath); ++ if (err) ++ goto out_free_config; + +- if (upperpath.mnt != workpath.mnt) { +- pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); +- goto out_put_workpath; +- } +- if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { +- pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); +- goto out_put_workpath; ++ err = ovl_mount_dir(ufs->config.workdir, &workpath); ++ if (err) ++ goto out_put_upperpath; ++ ++ if (upperpath.mnt != workpath.mnt) { ++ pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); ++ goto out_put_workpath; ++ } ++ if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { ++ pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); ++ goto out_put_workpath; ++ } ++ sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; + } +- sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; + + err = ovl_lower_dir(ufs->config.lowerdir, &lowerpath, + &ufs->lower_namelen, &sb->s_stack_depth); +@@ -824,19 +833,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + goto out_put_lowerpath; + } + +- ufs->upper_mnt = clone_private_mount(&upperpath); +- err = PTR_ERR(ufs->upper_mnt); +- if (IS_ERR(ufs->upper_mnt)) { +- pr_err("overlayfs: failed to clone upperpath\n"); +- goto out_put_lowerpath; +- } ++ if (ufs->config.upperdir) { ++ ufs->upper_mnt = clone_private_mount(&upperpath); ++ err = PTR_ERR(ufs->upper_mnt); ++ if (IS_ERR(ufs->upper_mnt)) { ++ pr_err("overlayfs: failed to clone upperpath\n"); ++ goto out_put_lowerpath; ++ } + +- ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); +- err = PTR_ERR(ufs->workdir); +- if (IS_ERR(ufs->workdir)) { +- pr_err("overlayfs: failed to create directory %s/%s\n", +- ufs->config.workdir, OVL_WORKDIR_NAME); +- goto out_put_upper_mnt; ++ ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); ++ err = PTR_ERR(ufs->workdir); ++ if (IS_ERR(ufs->workdir)) { ++ pr_err("overlayfs: failed to create directory %s/%s\n", ++ ufs->config.workdir, OVL_WORKDIR_NAME); ++ goto out_put_upper_mnt; ++ } + } + + ufs->lower_mnt = kcalloc(1, sizeof(struct vfsmount *), GFP_KERNEL); +@@ -858,8 +869,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + ufs->lower_mnt[0] = mnt; + ufs->numlower = 1; + +- /* If the upper fs is r/o, we mark overlayfs r/o too */ +- if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) ++ /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ ++ if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) + sb->s_flags |= MS_RDONLY; + + sb->s_d_op = &ovl_dentry_operations; +-- +2.7.4 + + +From cb19636311dcb8206f668af9cbb68e9bafaeea14 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:52 +0100 +Subject: [PATCH 54/73] ovl: support multiple lower layers + +Allow "lowerdir=" option to contain multiple lower directories separated by +a colon (e.g. "lowerdir=/bin:/usr/bin"). Colon characters in filenames can +be escaped with a backslash. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit a78d9f0d5d5ca9054703376c7c23c901807ddd87) +Signed-off-by: Alex Shi +--- + Documentation/filesystems/overlayfs.txt | 12 ++++ + fs/overlayfs/super.c | 110 ++++++++++++++++++++++++-------- + 2 files changed, 95 insertions(+), 27 deletions(-) + +diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt +index a27c950..b370928 100644 +--- a/Documentation/filesystems/overlayfs.txt ++++ b/Documentation/filesystems/overlayfs.txt +@@ -159,6 +159,18 @@ overlay filesystem (though an operation on the name of the file such as + rename or unlink will of course be noticed and handled). + + ++Multiple lower layers ++--------------------- ++ ++Multiple lower layers can now be given using the the colon (":") as a ++separator character between the directory names. For example: ++ ++ mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged ++ ++As the example shows, "upperdir=" and "workdir=" may be omitted. In that case ++the overlay will be read-only. ++ ++ + Non-standard behavior + --------------------- + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 35bb0ad..5c495a1 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -60,6 +60,8 @@ struct ovl_entry { + struct path lowerstack[]; + }; + ++#define OVL_MAX_STACK 500 ++ + const char *ovl_opaque_xattr = "trusted.overlay.opaque"; + + static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) +@@ -692,8 +694,12 @@ static bool ovl_is_allowed_fs_type(struct dentry *root) + + static int ovl_mount_dir_noesc(const char *name, struct path *path) + { +- int err; ++ int err = -EINVAL; + ++ if (!*name) { ++ pr_err("overlayfs: empty lowerdir\n"); ++ goto out; ++ } + err = kern_path(name, LOOKUP_FOLLOW, path); + if (err) { + pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); +@@ -735,7 +741,7 @@ static int ovl_lower_dir(const char *name, struct path *path, long *namelen, + int err; + struct kstatfs statfs; + +- err = ovl_mount_dir(name, path); ++ err = ovl_mount_dir_noesc(name, path); + if (err) + goto out; + +@@ -767,15 +773,38 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) + return ok; + } + ++static unsigned int ovl_split_lowerdirs(char *str) ++{ ++ unsigned int ctr = 1; ++ char *s, *d; ++ ++ for (s = d = str;; s++, d++) { ++ if (*s == '\\') { ++ s++; ++ } else if (*s == ':') { ++ *d = '\0'; ++ ctr++; ++ continue; ++ } ++ *d = *s; ++ if (!*s) ++ break; ++ } ++ return ctr; ++} ++ + static int ovl_fill_super(struct super_block *sb, void *data, int silent) + { +- struct path lowerpath; + struct path upperpath = { NULL, NULL }; + struct path workpath = { NULL, NULL }; + struct dentry *root_dentry; + struct ovl_entry *oe; + struct ovl_fs *ufs; +- struct vfsmount *mnt; ++ struct path *stack = NULL; ++ char *lowertmp; ++ char *lower; ++ unsigned int numlower; ++ unsigned int stacklen = 0; + unsigned int i; + int err; + +@@ -820,13 +849,31 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + } + sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; + } +- +- err = ovl_lower_dir(ufs->config.lowerdir, &lowerpath, +- &ufs->lower_namelen, &sb->s_stack_depth); +- if (err) ++ err = -ENOMEM; ++ lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); ++ if (!lowertmp) + goto out_put_workpath; + + err = -EINVAL; ++ stacklen = ovl_split_lowerdirs(lowertmp); ++ if (stacklen > OVL_MAX_STACK) ++ goto out_free_lowertmp; ++ ++ stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); ++ if (!stack) ++ goto out_free_lowertmp; ++ ++ lower = lowertmp; ++ for (numlower = 0; numlower < stacklen; numlower++) { ++ err = ovl_lower_dir(lower, &stack[numlower], ++ &ufs->lower_namelen, &sb->s_stack_depth); ++ if (err) ++ goto out_put_lowerpath; ++ ++ lower = strchr(lower, '\0') + 1; ++ } ++ ++ err = -EINVAL; + sb->s_stack_depth++; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + pr_err("overlayfs: maximum fs stacking depth exceeded\n"); +@@ -850,24 +897,25 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + } + } + +- ufs->lower_mnt = kcalloc(1, sizeof(struct vfsmount *), GFP_KERNEL); ++ ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); + if (ufs->lower_mnt == NULL) + goto out_put_workdir; ++ for (i = 0; i < numlower; i++) { ++ struct vfsmount *mnt = clone_private_mount(&stack[i]); + +- mnt = clone_private_mount(&lowerpath); +- err = PTR_ERR(mnt); +- if (IS_ERR(mnt)) { +- pr_err("overlayfs: failed to clone lowerpath\n"); +- goto out_put_lower_mnt; +- } +- /* +- * Make lower_mnt R/O. That way fchmod/fchown on lower file +- * will fail instead of modifying lower fs. +- */ +- mnt->mnt_flags |= MNT_READONLY; ++ if (IS_ERR(mnt)) { ++ pr_err("overlayfs: failed to clone lowerpath\n"); ++ goto out_put_lower_mnt; ++ } ++ /* ++ * Make lower_mnt R/O. That way fchmod/fchown on lower file ++ * will fail instead of modifying lower fs. ++ */ ++ mnt->mnt_flags |= MNT_READONLY; + +- ufs->lower_mnt[0] = mnt; +- ufs->numlower = 1; ++ ufs->lower_mnt[ufs->numlower] = mnt; ++ ufs->numlower++; ++ } + + /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ + if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) +@@ -876,7 +924,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + sb->s_d_op = &ovl_dentry_operations; + + err = -ENOMEM; +- oe = ovl_alloc_entry(1); ++ oe = ovl_alloc_entry(numlower); + if (!oe) + goto out_put_lower_mnt; + +@@ -885,12 +933,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + goto out_free_oe; + + mntput(upperpath.mnt); +- mntput(lowerpath.mnt); ++ for (i = 0; i < numlower; i++) ++ mntput(stack[i].mnt); + path_put(&workpath); ++ kfree(lowertmp); + + oe->__upperdentry = upperpath.dentry; +- oe->lowerstack[0].dentry = lowerpath.dentry; +- oe->lowerstack[0].mnt = ufs->lower_mnt[0]; ++ for (i = 0; i < numlower; i++) { ++ oe->lowerstack[i].dentry = stack[i].dentry; ++ oe->lowerstack[i].mnt = ufs->lower_mnt[i]; ++ } + + root_dentry->d_fsdata = oe; + +@@ -912,7 +964,11 @@ out_put_workdir: + out_put_upper_mnt: + mntput(ufs->upper_mnt); + out_put_lowerpath: +- path_put(&lowerpath); ++ for (i = 0; i < numlower; i++) ++ path_put(&stack[i]); ++ kfree(stack); ++out_free_lowertmp: ++ kfree(lowertmp); + out_put_workpath: + path_put(&workpath); + out_put_upperpath: +-- +2.7.4 + + +From 96149453dee0b67c9ecb1032c0ed0e4aa31fb914 Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Wed, 26 Nov 2014 16:16:59 +0800 +Subject: [PATCH 55/73] ovl: Cleanup redundant blank lines + +This patch removes redundant blanks lines in overlayfs. + +Signed-off-by: hujianyang +Signed-off-by: Miklos Szeredi +(cherry picked from commit 1ba38725a351f91769918b132c17fb7fcaf6c2f5) +Signed-off-by: Alex Shi +--- + fs/overlayfs/copy_up.c | 1 - + fs/overlayfs/inode.c | 1 - + fs/overlayfs/super.c | 1 - + 3 files changed, 3 deletions(-) + +diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c +index a5bfd60..24f6404 100644 +--- a/fs/overlayfs/copy_up.c ++++ b/fs/overlayfs/copy_up.c +@@ -191,7 +191,6 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) + ovl_set_timestamps(upperdentry, stat); + + return err; +- + } + + static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, +diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c +index 48492f1..5ac1236 100644 +--- a/fs/overlayfs/inode.c ++++ b/fs/overlayfs/inode.c +@@ -433,5 +433,4 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, + } + + return inode; +- + } +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 5c495a1..e9ce4a9 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -106,7 +106,6 @@ void ovl_path_upper(struct dentry *dentry, struct path *path) + + enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) + { +- + enum ovl_path_type type = ovl_path_type(dentry); + + if (!OVL_TYPE_UPPER(type)) +-- +2.7.4 + + +From 4fb1e9e207167f999c57954716a51942d0d61674 Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Mon, 24 Nov 2014 18:25:21 +0800 +Subject: [PATCH 56/73] ovl: Use macros to present ovl_xattr + +This patch adds two macros: + +OVL_XATTR_PRE_NAME and OVL_XATTR_PRE_LEN + +to present ovl_xattr name prefix and its length. Also, a +new macro OVL_XATTR_OPAQUE is introduced to replace old +*ovl_opaque_xattr*. + +Fix the length of "trusted.overlay." to *16*. + +Signed-off-by: hujianyang +Signed-off-by: Miklos Szeredi +(cherry picked from commit cead89bb08c0f64e23886f1c18df9bb98e97c55c) +Signed-off-by: Alex Shi +--- + fs/overlayfs/dir.c | 4 ++-- + fs/overlayfs/inode.c | 2 +- + fs/overlayfs/overlayfs.h | 4 +++- + fs/overlayfs/super.c | 4 +--- + 4 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c +index dcae3ac..0dc4c33 100644 +--- a/fs/overlayfs/dir.c ++++ b/fs/overlayfs/dir.c +@@ -118,14 +118,14 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, + + static int ovl_set_opaque(struct dentry *upperdentry) + { +- return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); ++ return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); + } + + static void ovl_remove_opaque(struct dentry *upperdentry) + { + int err; + +- err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr); ++ err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE); + if (err) { + pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", + upperdentry->d_name.name, err); +diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c +index 5ac1236..04f1248 100644 +--- a/fs/overlayfs/inode.c ++++ b/fs/overlayfs/inode.c +@@ -205,7 +205,7 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) + + static bool ovl_is_private_xattr(const char *name) + { +- return strncmp(name, "trusted.overlay.", 14) == 0; ++ return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0; + } + + int ovl_setxattr(struct dentry *dentry, const char *name, +diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h +index d176b67..17ac5af 100644 +--- a/fs/overlayfs/overlayfs.h ++++ b/fs/overlayfs/overlayfs.h +@@ -23,7 +23,9 @@ enum ovl_path_type { + #define OVL_TYPE_MERGE_OR_LOWER(type) \ + (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) + +-extern const char *ovl_opaque_xattr; ++#define OVL_XATTR_PRE_NAME "trusted.overlay." ++#define OVL_XATTR_PRE_LEN 16 ++#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque" + + static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) + { +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index e9ce4a9..84f3144 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -62,8 +62,6 @@ struct ovl_entry { + + #define OVL_MAX_STACK 500 + +-const char *ovl_opaque_xattr = "trusted.overlay.opaque"; +- + static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) + { + return oe->numlower ? oe->lowerstack[0].dentry : NULL; +@@ -254,7 +252,7 @@ static bool ovl_is_opaquedir(struct dentry *dentry) + if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) + return false; + +- res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1); ++ res = inode->i_op->getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); + if (res == 1 && val == 'y') + return true; + +-- +2.7.4 + + +From 248653d3bb85e5b87899b47e56dc0cf86ee24f89 Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Tue, 6 Jan 2015 12:52:13 +0800 +Subject: [PATCH 57/73] ovl: Fix kernel panic while mounting overlayfs + +The function ovl_fill_super() in recently multi-layer support +version will incorrectly return 0 at error handling path and +then cause kernel panic. + +This failure can be reproduced by mounting a overlayfs with +upperdir and workdir in different mounts. + +And also, If the memory allocation of *lower_mnt* fail, this +function may return an zero either. + +This patch fix this problem by setting *err* to proper error +number before jumping to error handling path. + +Signed-off-by: hujianyang +Signed-off-by: Miklos Szeredi +(cherry picked from commit 2f83fd8c2849a388082f30d755a75c1e67c4643b) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 84f3144..6ca8ea8 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -836,6 +836,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + if (err) + goto out_put_upperpath; + ++ err = -EINVAL; + if (upperpath.mnt != workpath.mnt) { + pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); + goto out_put_workpath; +@@ -894,12 +895,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + } + } + ++ err = -ENOMEM; + ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); + if (ufs->lower_mnt == NULL) + goto out_put_workdir; + for (i = 0; i < numlower; i++) { + struct vfsmount *mnt = clone_private_mount(&stack[i]); + ++ err = PTR_ERR(mnt); + if (IS_ERR(mnt)) { + pr_err("overlayfs: failed to clone lowerpath\n"); + goto out_put_lower_mnt; +-- +2.7.4 + + +From ef2578e619a9107cb007644f38701d6774a46c5f Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Tue, 6 Jan 2015 16:10:01 +0800 +Subject: [PATCH 58/73] ovl: Fix opaque regression in ovl_lookup + +Current multi-layer support overlayfs has a regression in +.lookup(). If there is a directory in upperdir and a regular +file has same name in lowerdir in a merged directory, lower +file is hidden and upper directory is set to opaque in former +case. But it is changed in present code. + +In lowerdir lookup path, if a found inode is not directory, +the type checking of previous inode is missing. This inode +will be copied to the lowerstack of ovl_entry directly. + +That will lead to several wrong conditions, for example, +the reading of the directory in upperdir may return an error +like: + + ls: reading directory .: Not a directory + +This patch makes the lowerdir lookup path check the opaque +for non-directory file too. + +Signed-off-by: hujianyang +Signed-off-by: Miklos Szeredi +(cherry picked from commit a425c037f3dd8a56469158ab5f37beb46402d958) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 6ca8ea8..9e94f4a 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -372,7 +372,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + bool opaque = false; + struct path lowerpath = poe->lowerstack[i]; + +- opaque = false; + this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); + err = PTR_ERR(this); + if (IS_ERR(this)) { +@@ -395,20 +394,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + */ + if (i < poe->numlower - 1 && ovl_is_opaquedir(this)) + opaque = true; +- /* +- * If this is a non-directory then stop here. +- * +- * FIXME: check for opaqueness maybe better done in remove code. +- */ +- if (!S_ISDIR(this->d_inode->i_mode)) { +- opaque = true; +- } else if (prev && (!S_ISDIR(prev->d_inode->i_mode) || +- !S_ISDIR(this->d_inode->i_mode))) { ++ ++ if (prev && (!S_ISDIR(prev->d_inode->i_mode) || ++ !S_ISDIR(this->d_inode->i_mode))) { ++ /* ++ * FIXME: check for upper-opaqueness maybe better done ++ * in remove code. ++ */ + if (prev == upperdentry) + upperopaque = true; + dput(this); + break; + } ++ /* ++ * If this is a non-directory then stop here. ++ */ ++ if (!S_ISDIR(this->d_inode->i_mode)) ++ opaque = true; ++ + stack[ctr].dentry = this; + stack[ctr].mnt = lowerpath.mnt; + ctr++; +-- +2.7.4 + + +From 1676d3ec23db0db95de768672df3355c92589a0e Mon Sep 17 00:00:00 2001 +From: Seunghun Lee +Date: Sat, 3 Jan 2015 02:26:49 +0900 +Subject: [PATCH 59/73] ovl: Prevent rw remount when it should be ro mount + +Overlayfs should be mounted read-only when upper-fs is read-only or nonexistent. +But now it can be remounted read-write and this can cause kernel panic. +So we should prevent read-write remount when the above situation happens. + +Signed-off-by: Seunghun Lee +Signed-off-by: Miklos Szeredi +(cherry picked from commit 3cdf6fe91041b3afd6761f76254f7b6cbe8020fc) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 9e94f4a..b90952f 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -525,10 +525,22 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) + return 0; + } + ++static int ovl_remount(struct super_block *sb, int *flags, char *data) ++{ ++ struct ovl_fs *ufs = sb->s_fs_info; ++ ++ if (!(*flags & MS_RDONLY) && ++ (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))) ++ return -EROFS; ++ ++ return 0; ++} ++ + static const struct super_operations ovl_super_operations = { + .put_super = ovl_put_super, + .statfs = ovl_statfs, + .show_options = ovl_show_options, ++ .remount_fs = ovl_remount, + }; + + enum { +-- +2.7.4 + + +From add5fbd4b119cd897c130cc2282c4040dc004b88 Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Thu, 11 Dec 2014 10:30:18 +0800 +Subject: [PATCH 60/73] ovl: discard independent cursor in readdir() + +Since the ovl_dir_cache is stable during a directory reading, the cursor +of struct ovl_dir_file don't need to be an independent entry in the list +of a merged directory. + +This patch changes *cursor* to a pointer which points to the entry in the +ovl_dir_cache. After this, we don't need to check *is_cursor* either. + +Signed-off-by: hujianyang +Signed-off-by: Miklos Szeredi +(cherry picked from commit 4330397e4e8a662f36d101659e2a59ce32e76ff4) +Signed-off-by: Alex Shi +--- + fs/overlayfs/readdir.c | 38 ++++++++++++++------------------------ + 1 file changed, 14 insertions(+), 24 deletions(-) + +diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c +index 2b8315b..907870e 100644 +--- a/fs/overlayfs/readdir.c ++++ b/fs/overlayfs/readdir.c +@@ -24,7 +24,6 @@ struct ovl_cache_entry { + struct list_head l_node; + struct rb_node node; + bool is_whiteout; +- bool is_cursor; + char name[]; + }; + +@@ -49,7 +48,7 @@ struct ovl_dir_file { + bool is_real; + bool is_upper; + struct ovl_dir_cache *cache; +- struct ovl_cache_entry cursor; ++ struct list_head *cursor; + struct file *realfile; + struct file *upperfile; + }; +@@ -97,7 +96,6 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, + p->type = d_type; + p->ino = ino; + p->is_whiteout = false; +- p->is_cursor = false; + + if (d_type == DT_CHR) { + struct dentry *dentry; +@@ -196,7 +194,6 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) + { + struct ovl_dir_cache *cache = od->cache; + +- list_del_init(&od->cursor.l_node); + WARN_ON(cache->refcount <= 0); + cache->refcount--; + if (!cache->refcount) { +@@ -256,6 +253,7 @@ static void ovl_dir_reset(struct file *file) + if (cache && ovl_dentry_version_get(dentry) != cache->version) { + ovl_cache_put(od, dentry); + od->cache = NULL; ++ od->cursor = NULL; + } + WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); + if (od->is_real && OVL_TYPE_MERGE(type)) +@@ -297,17 +295,16 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) + + static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) + { +- struct ovl_cache_entry *p; ++ struct list_head *p; + loff_t off = 0; + +- list_for_each_entry(p, &od->cache->entries, l_node) { +- if (p->is_cursor) +- continue; ++ list_for_each(p, &od->cache->entries) { + if (off >= pos) + break; + off++; + } +- list_move_tail(&od->cursor.l_node, &p->l_node); ++ /* Cursor is safe since the cache is stable */ ++ od->cursor = p; + } + + static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) +@@ -346,6 +343,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) + { + struct ovl_dir_file *od = file->private_data; + struct dentry *dentry = file->f_path.dentry; ++ struct ovl_cache_entry *p; + + if (!ctx->pos) + ovl_dir_reset(file); +@@ -364,19 +362,13 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) + ovl_seek_cursor(od, ctx->pos); + } + +- while (od->cursor.l_node.next != &od->cache->entries) { +- struct ovl_cache_entry *p; +- +- p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); +- /* Skip cursors */ +- if (!p->is_cursor) { +- if (!p->is_whiteout) { +- if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) +- break; +- } +- ctx->pos++; +- } +- list_move(&od->cursor.l_node, &p->l_node); ++ while (od->cursor != &od->cache->entries) { ++ p = list_entry(od->cursor, struct ovl_cache_entry, l_node); ++ if (!p->is_whiteout) ++ if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) ++ break; ++ od->cursor = p->l_node.next; ++ ctx->pos++; + } + return 0; + } +@@ -495,11 +487,9 @@ static int ovl_dir_open(struct inode *inode, struct file *file) + kfree(od); + return PTR_ERR(realfile); + } +- INIT_LIST_HEAD(&od->cursor.l_node); + od->realfile = realfile; + od->is_real = !OVL_TYPE_MERGE(type); + od->is_upper = OVL_TYPE_UPPER(type); +- od->cursor.is_cursor = true; + file->private_data = od; + + return 0; +-- +2.7.4 + + +From 088805e56746eafd55a13e2dc7fb37d046cfdfdb Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Thu, 15 Jan 2015 13:17:36 +0800 +Subject: [PATCH 61/73] ovl: print error message for invalid mount options + +Overlayfs should print an error message if an incorrect mount option +is caught like other filesystems. + +After this patch, improper option input could be clearly known. + +Reported-by: Fabian Sturm +Signed-off-by: hujianyang +Signed-off-by: Miklos Szeredi +(cherry picked from commit bead55ef775f6e25a8d286c0d47030580f577bec) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index b90952f..ab3c8cb 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -615,6 +615,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) + break; + + default: ++ pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); + return -EINVAL; + } + } +-- +2.7.4 + + +From 0d8e803097108c61b26175178e62465a7d36c889 Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Thu, 15 Jan 2015 13:19:21 +0800 +Subject: [PATCH 62/73] ovl: check lowerdir amount for non-upper mount + +Recently multi-lower layer mount support allow upperdir and workdir +to be omitted, then cause overlayfs can be mount with only one +lowerdir directory. This action make no sense and have potential risk. + +This patch check the total number of lower directories to prevent +mounting overlayfs with only one directory. + +Also, an error message is added to indicate lower directories exceed +OVL_MAX_STACK limit. + +Signed-off-by: hujianyang +Signed-off-by: Miklos Szeredi +(cherry picked from commit 6be4506e34cf6075a1307b646e0a6c46c1c9010d) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index ab3c8cb..edbb3eb 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -870,8 +870,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + + err = -EINVAL; + stacklen = ovl_split_lowerdirs(lowertmp); +- if (stacklen > OVL_MAX_STACK) ++ if (stacklen > OVL_MAX_STACK) { ++ pr_err("overlayfs: too many lower directries, limit is %d\n", ++ OVL_MAX_STACK); + goto out_free_lowertmp; ++ } else if (!ufs->config.upperdir && stacklen == 1) { ++ pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); ++ goto out_free_lowertmp; ++ } + + stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); + if (!stack) +-- +2.7.4 + + +From d7170c004481f9fbe303a391774e0fcb238f0e78 Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Thu, 15 Jan 2015 13:20:57 +0800 +Subject: [PATCH 63/73] ovl: upper fs should not be R/O + +After importing multi-lower layer support, users could mount a r/o +partition as the left most lowerdir instead of using it as upperdir. +And a r/o upperdir may cause an error like + + overlayfs: failed to create directory ./workdir/work + +during mount. + +This patch check the *s_flags* of upper fs and return an error if +it is a r/o partition. The checking of *upper_mnt->mnt_sb->s_flags* +can be removed now. + +This patch also remove + + /* FIXME: workdir is not needed for a R/O mount */ + +from ovl_fill_super() because: + +1) for upper fs r/o case +Setting a r/o partition as upper is prevented, no need to care about +workdir in this case. + +2) for "mount overlay -o ro" with a r/w upper fs case +Users could remount overlayfs to r/w in this case, so workdir should +not be omitted. + +Signed-off-by: hujianyang +Signed-off-by: Miklos Szeredi +(cherry picked from commit 71cbad7e694ee81233b3be3a38b81c3d5872cc6f) +Signed-off-by: Alex Shi +--- + fs/overlayfs/super.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index edbb3eb..5f0d199 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) + { + struct ovl_fs *ufs = sb->s_fs_info; + +- if (!(*flags & MS_RDONLY) && +- (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))) ++ if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) + return -EROFS; + + return 0; +@@ -619,6 +618,15 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) + return -EINVAL; + } + } ++ ++ /* Workdir is useless in non-upper mount */ ++ if (!config->upperdir && config->workdir) { ++ pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", ++ config->workdir); ++ kfree(config->workdir); ++ config->workdir = NULL; ++ } ++ + return 0; + } + +@@ -838,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + + sb->s_stack_depth = 0; + if (ufs->config.upperdir) { +- /* FIXME: workdir is not needed for a R/O mount */ + if (!ufs->config.workdir) { + pr_err("overlayfs: missing 'workdir'\n"); + goto out_free_config; +@@ -848,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + if (err) + goto out_free_config; + ++ /* Upper fs should not be r/o */ ++ if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { ++ pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); ++ err = -EINVAL; ++ goto out_put_upperpath; ++ } ++ + err = ovl_mount_dir(ufs->config.workdir, &workpath); + if (err) + goto out_put_upperpath; +@@ -939,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) + ufs->numlower++; + } + +- /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ +- if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) ++ /* If the upper fs is nonexistent, we mark overlayfs r/o too */ ++ if (!ufs->upper_mnt) + sb->s_flags |= MS_RDONLY; + + sb->s_d_op = &ovl_dentry_operations; +-- +2.7.4 + + +From 59b077027a431f849a07bd330f0de14c31376743 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Sat, 13 Dec 2014 00:59:53 +0100 +Subject: [PATCH 64/73] ovl: add testsuite to docs + +Reported-by: Sedat Dilek +Signed-off-by: Miklos Szeredi +(cherry picked from commit 2b7a8f36f092a7855f6438cd42d6990394f450fa) +Signed-off-by: Alex Shi +--- + Documentation/filesystems/overlayfs.txt | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt +index b370928..006ea48 100644 +--- a/Documentation/filesystems/overlayfs.txt ++++ b/Documentation/filesystems/overlayfs.txt +@@ -208,3 +208,15 @@ Changes to the underlying filesystems while part of a mounted overlay + filesystem are not allowed. If the underlying filesystem is changed, + the behavior of the overlay is undefined, though it will not result in + a crash or deadlock. ++ ++Testsuite ++--------- ++ ++There's testsuite developed by David Howells at: ++ ++ git://git.infradead.org/users/dhowells/unionmount-testsuite.git ++ ++Run as root: ++ ++ # cd unionmount-testsuite ++ # ./run --ov +-- +2.7.4 + + +From 6fb17a412207343151aac1fde91935a542339deb Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 8 Jan 2015 15:09:15 +0100 +Subject: [PATCH 65/73] ovl: document lower layer ordering + +Reported-by: Fabian Sturm +Signed-off-by: Miklos Szeredi +(cherry picked from commit 6d900f5a33393067e370736d39798f814f5e25cc) +Signed-off-by: Alex Shi +--- + Documentation/filesystems/overlayfs.txt | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt +index 006ea48..6db0e5d 100644 +--- a/Documentation/filesystems/overlayfs.txt ++++ b/Documentation/filesystems/overlayfs.txt +@@ -167,8 +167,12 @@ separator character between the directory names. For example: + + mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged + +-As the example shows, "upperdir=" and "workdir=" may be omitted. In that case +-the overlay will be read-only. ++As the example shows, "upperdir=" and "workdir=" may be omitted. In ++that case the overlay will be read-only. ++ ++The specified lower directories will be stacked beginning from the ++rightmost one and going left. In the above example lower1 will be the ++top, lower2 the middle and lower3 the bottom layer. + + + Non-standard behavior +-- +2.7.4 + + +From 77dfdd746390b9230ac5420f13dd363bcdac6e98 Mon Sep 17 00:00:00 2001 +From: Pavel Emelyanov +Date: Thu, 10 Oct 2013 17:10:30 +0400 +Subject: [PATCH 66/73] fuse: Connection bit for enabling writeback + +Off (0) by default. Will be used in the next patches and will be turned +on at the very end. + +Signed-off-by: Maxim Patlasov +Signed-off-by: Pavel Emelyanov +Signed-off-by: Miklos Szeredi +(cherry picked from commit d5cd66c58edf10a7ee786659994595fd43995aab) +Signed-off-by: Alex Shi +--- + fs/fuse/fuse_i.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h +index 2da5db2..374a8be 100644 +--- a/fs/fuse/fuse_i.h ++++ b/fs/fuse/fuse_i.h +@@ -480,6 +480,9 @@ struct fuse_conn { + /** Set if bdi is valid */ + unsigned bdi_initialized:1; + ++ /** write-back cache policy (default is write-through) */ ++ unsigned writeback_cache:1; ++ + /* + * The following bitfields are only for optimization purposes + * and hence races in setting them will not cause malfunction +-- +2.7.4 + + +From 4a40d2a3f50fd5721e3c8896e077d3f62f5b73be Mon Sep 17 00:00:00 2001 +From: Pavel Emelyanov +Date: Thu, 10 Oct 2013 17:10:04 +0400 +Subject: [PATCH 67/73] fuse: Linking file to inode helper + +When writeback is ON every writeable file should be in per-inode write list, +not only mmap-ed ones. Thus introduce a helper for this linkage. + +Signed-off-by: Maxim Patlasov +Signed-off-by: Pavel Emelyanov +Signed-off-by: Miklos Szeredi +(cherry picked from commit 650b22b941fa03590c4a3671e79ec2c96ea59e9a) +Signed-off-by: Alex Shi +--- + fs/fuse/file.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/fs/fuse/file.c b/fs/fuse/file.c +index d8a6027..f018386 100644 +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -188,6 +188,22 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + } + EXPORT_SYMBOL_GPL(fuse_do_open); + ++static void fuse_link_write_file(struct file *file) ++{ ++ struct inode *inode = file_inode(file); ++ struct fuse_conn *fc = get_fuse_conn(inode); ++ struct fuse_inode *fi = get_fuse_inode(inode); ++ struct fuse_file *ff = file->private_data; ++ /* ++ * file may be written through mmap, so chain it onto the ++ * inodes's write_file list ++ */ ++ spin_lock(&fc->lock); ++ if (list_empty(&ff->write_entry)) ++ list_add(&ff->write_entry, &fi->write_files); ++ spin_unlock(&fc->lock); ++} ++ + void fuse_finish_open(struct inode *inode, struct file *file) + { + struct fuse_file *ff = file->private_data; +@@ -1942,20 +1958,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = { + + static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) + { +- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { +- struct inode *inode = file_inode(file); +- struct fuse_conn *fc = get_fuse_conn(inode); +- struct fuse_inode *fi = get_fuse_inode(inode); +- struct fuse_file *ff = file->private_data; +- /* +- * file may be written through mmap, so chain it onto the +- * inodes's write_file list +- */ +- spin_lock(&fc->lock); +- if (list_empty(&ff->write_entry)) +- list_add(&ff->write_entry, &fi->write_files); +- spin_unlock(&fc->lock); +- } ++ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) ++ fuse_link_write_file(file); ++ + file_accessed(file); + vma->vm_ops = &fuse_file_vm_ops; + return 0; +-- +2.7.4 + + +From fb2667e92663bb09a3d7e79de4103b7ce296b589 Mon Sep 17 00:00:00 2001 +From: Pavel Emelyanov +Date: Thu, 10 Oct 2013 17:10:16 +0400 +Subject: [PATCH 68/73] fuse: Prepare to handle short reads + +A helper which gets called when read reports less bytes than was requested. +See patch "trust kernel i_size only" for details. + +Signed-off-by: Maxim Patlasov +Signed-off-by: Pavel Emelyanov +Signed-off-by: Miklos Szeredi +(cherry picked from commit a92adc824ed5feaa2d4f7029f21170f574987aee) +Signed-off-by: Alex Shi +--- + fs/fuse/file.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +diff --git a/fs/fuse/file.c b/fs/fuse/file.c +index f018386..4646157 100644 +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -671,6 +671,15 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, + spin_unlock(&fc->lock); + } + ++static void fuse_short_read(struct fuse_req *req, struct inode *inode, ++ u64 attr_ver) ++{ ++ size_t num_read = req->out.args[0].size; ++ ++ loff_t pos = page_offset(req->pages[0]) + num_read; ++ fuse_read_update_size(inode, pos, attr_ver); ++} ++ + static int fuse_readpage(struct file *file, struct page *page) + { + struct fuse_io_priv io = { .async = 0, .file = file }; +@@ -708,18 +717,18 @@ static int fuse_readpage(struct file *file, struct page *page) + req->page_descs[0].length = count; + num_read = fuse_send_read(req, &io, pos, count, NULL); + err = req->out.h.error; +- fuse_put_request(fc, req); + + if (!err) { + /* + * Short read means EOF. If file size is larger, truncate it + */ + if (num_read < count) +- fuse_read_update_size(inode, pos + num_read, attr_ver); ++ fuse_short_read(req, inode, attr_ver); + + SetPageUptodate(page); + } + ++ fuse_put_request(fc, req); + fuse_invalidate_atime(inode); + out: + unlock_page(page); +@@ -742,13 +751,9 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) + /* + * Short read means EOF. If file size is larger, truncate it + */ +- if (!req->out.h.error && num_read < count) { +- loff_t pos; ++ if (!req->out.h.error && num_read < count) ++ fuse_short_read(req, inode, req->misc.read.attr_ver); + +- pos = page_offset(req->pages[0]) + num_read; +- fuse_read_update_size(inode, pos, +- req->misc.read.attr_ver); +- } + fuse_invalidate_atime(inode); + } + +-- +2.7.4 + + +From 628bfe8bf63df0e83632598115cb91c7c52a115a Mon Sep 17 00:00:00 2001 +From: Pavel Emelyanov +Date: Thu, 10 Oct 2013 17:10:46 +0400 +Subject: [PATCH 69/73] fuse: Trust kernel i_size only + +Make fuse think that when writeback is on the inode's i_size is always +up-to-date and not update it with the value received from the userspace. +This is done because the page cache code may update i_size without letting +the FS know. + +This assumption implies fixing the previously introduced short-read helper -- +when a short read occurs the 'hole' is filled with zeroes. + +fuse_file_fallocate() is also fixed because now we should keep i_size up to +date, so it must be updated if FUSE_FALLOCATE request succeeded. + +Signed-off-by: Maxim V. Patlasov +Signed-off-by: Miklos Szeredi +(cherry picked from commit 8373200b124d03de7fa2e99be56de8642e604e9e) +Signed-off-by: Alex Shi +--- + fs/fuse/dir.c | 13 +++++++++++-- + fs/fuse/file.c | 21 +++++++++++++++++++-- + fs/fuse/inode.c | 11 +++++++++-- + 3 files changed, 39 insertions(+), 6 deletions(-) + +diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c +index d53a7d9..6fc8339 100644 +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -870,6 +870,11 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, + struct kstat *stat) + { + unsigned int blkbits; ++ struct fuse_conn *fc = get_fuse_conn(inode); ++ ++ /* see the comment in fuse_change_attributes() */ ++ if (fc->writeback_cache && S_ISREG(inode->i_mode)) ++ attr->size = i_size_read(inode); + + stat->dev = inode->i_sb->s_dev; + stat->ino = attr->ino; +@@ -1611,6 +1616,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, + struct fuse_setattr_in inarg; + struct fuse_attr_out outarg; + bool is_truncate = false; ++ bool is_wb = fc->writeback_cache; + loff_t oldsize; + int err; + +@@ -1682,7 +1688,9 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, + fuse_change_attributes_common(inode, &outarg.attr, + attr_timeout(&outarg)); + oldsize = inode->i_size; +- i_size_write(inode, outarg.attr.size); ++ /* see the comment in fuse_change_attributes() */ ++ if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) ++ i_size_write(inode, outarg.attr.size); + + if (is_truncate) { + /* NOTE: this may release/reacquire fc->lock */ +@@ -1694,7 +1702,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, + * Only call invalidate_inode_pages2() after removing + * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. + */ +- if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { ++ if ((is_truncate || !is_wb) && ++ S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { + truncate_pagecache(inode, outarg.attr.size); + invalidate_inode_pages2(inode->i_mapping); + } +diff --git a/fs/fuse/file.c b/fs/fuse/file.c +index 4646157..5b863c0 100644 +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -675,9 +675,26 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode, + u64 attr_ver) + { + size_t num_read = req->out.args[0].size; ++ struct fuse_conn *fc = get_fuse_conn(inode); ++ ++ if (fc->writeback_cache) { ++ /* ++ * A hole in a file. Some data after the hole are in page cache, ++ * but have not reached the client fs yet. So, the hole is not ++ * present there. ++ */ ++ int i; ++ int start_idx = num_read >> PAGE_CACHE_SHIFT; ++ size_t off = num_read & (PAGE_CACHE_SIZE - 1); + +- loff_t pos = page_offset(req->pages[0]) + num_read; +- fuse_read_update_size(inode, pos, attr_ver); ++ for (i = start_idx; i < req->num_pages; i++) { ++ zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE); ++ off = 0; ++ } ++ } else { ++ loff_t pos = page_offset(req->pages[0]) + num_read; ++ fuse_read_update_size(inode, pos, attr_ver); ++ } + } + + static int fuse_readpage(struct file *file, struct page *page) +diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c +index 73f6bcb..22a78a6 100644 +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -197,6 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + { + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); ++ bool is_wb = fc->writeback_cache; + loff_t oldsize; + struct timespec old_mtime; + +@@ -211,10 +212,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + fuse_change_attributes_common(inode, attr, attr_valid); + + oldsize = inode->i_size; +- i_size_write(inode, attr->size); ++ /* ++ * In case of writeback_cache enabled, the cached writes beyond EOF ++ * extend local i_size without keeping userspace server in sync. So, ++ * attr->size coming from server can be stale. We cannot trust it. ++ */ ++ if (!is_wb || !S_ISREG(inode->i_mode)) ++ i_size_write(inode, attr->size); + spin_unlock(&fc->lock); + +- if (S_ISREG(inode->i_mode)) { ++ if (!is_wb && S_ISREG(inode->i_mode)) { + bool inval = false; + + if (oldsize != attr->size) { +-- +2.7.4 + + +From 9d48c62fbcbc8ce7c9cad37119d12fe0be61300c Mon Sep 17 00:00:00 2001 +From: Maxim Patlasov +Date: Thu, 26 Dec 2013 19:51:11 +0400 +Subject: [PATCH 70/73] fuse: Trust kernel i_mtime only + +Let the kernel maintain i_mtime locally: + - clear S_NOCMTIME + - implement i_op->update_time() + - flush mtime on fsync and last close + - update i_mtime explicitly on truncate and fallocate + +Fuse inode flag FUSE_I_MTIME_DIRTY serves as indication that local i_mtime +should be flushed to the server eventually. + +Signed-off-by: Maxim Patlasov +Signed-off-by: Miklos Szeredi +(cherry picked from commit b0aa760652179072119582375f8dc896ed5b5dfd) +Signed-off-by: Alex Shi +--- + fs/fuse/dir.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++--------- + fs/fuse/file.c | 30 +++++++++++++--- + fs/fuse/fuse_i.h | 6 +++- + fs/fuse/inode.c | 13 +++++-- + 4 files changed, 132 insertions(+), 25 deletions(-) + +diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c +index 6fc8339..b4adb3d9 100644 +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -873,8 +873,11 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, + struct fuse_conn *fc = get_fuse_conn(inode); + + /* see the comment in fuse_change_attributes() */ +- if (fc->writeback_cache && S_ISREG(inode->i_mode)) ++ if (fc->writeback_cache && S_ISREG(inode->i_mode)) { + attr->size = i_size_read(inode); ++ attr->mtime = inode->i_mtime.tv_sec; ++ attr->mtimensec = inode->i_mtime.tv_nsec; ++ } + + stat->dev = inode->i_sb->s_dev; + stat->ino = attr->ino; +@@ -1513,12 +1516,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, + FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); + } + +-static bool update_mtime(unsigned ivalid) ++static bool update_mtime(unsigned ivalid, bool trust_local_mtime) + { + /* Always update if mtime is explicitly set */ + if (ivalid & ATTR_MTIME_SET) + return true; + ++ /* Or if kernel i_mtime is the official one */ ++ if (trust_local_mtime) ++ return true; ++ + /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ + if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) + return false; +@@ -1527,7 +1534,8 @@ static bool update_mtime(unsigned ivalid) + return true; + } + +-static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) ++static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, ++ bool trust_local_mtime) + { + unsigned ivalid = iattr->ia_valid; + +@@ -1546,11 +1554,11 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) + if (!(ivalid & ATTR_ATIME_SET)) + arg->valid |= FATTR_ATIME_NOW; + } +- if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { ++ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) { + arg->valid |= FATTR_MTIME; + arg->mtime = iattr->ia_mtime.tv_sec; + arg->mtimensec = iattr->ia_mtime.tv_nsec; +- if (!(ivalid & ATTR_MTIME_SET)) ++ if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime) + arg->valid |= FATTR_MTIME_NOW; + } + } +@@ -1599,6 +1607,63 @@ void fuse_release_nowrite(struct inode *inode) + spin_unlock(&fc->lock); + } + ++static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, ++ struct inode *inode, ++ struct fuse_setattr_in *inarg_p, ++ struct fuse_attr_out *outarg_p) ++{ ++ req->in.h.opcode = FUSE_SETATTR; ++ req->in.h.nodeid = get_node_id(inode); ++ req->in.numargs = 1; ++ req->in.args[0].size = sizeof(*inarg_p); ++ req->in.args[0].value = inarg_p; ++ req->out.numargs = 1; ++ if (fc->minor < 9) ++ req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; ++ else ++ req->out.args[0].size = sizeof(*outarg_p); ++ req->out.args[0].value = outarg_p; ++} ++ ++/* ++ * Flush inode->i_mtime to the server ++ */ ++int fuse_flush_mtime(struct file *file, bool nofail) ++{ ++ struct inode *inode = file->f_mapping->host; ++ struct fuse_inode *fi = get_fuse_inode(inode); ++ struct fuse_conn *fc = get_fuse_conn(inode); ++ struct fuse_req *req = NULL; ++ struct fuse_setattr_in inarg; ++ struct fuse_attr_out outarg; ++ int err; ++ ++ if (nofail) { ++ req = fuse_get_req_nofail_nopages(fc, file); ++ } else { ++ req = fuse_get_req_nopages(fc); ++ if (IS_ERR(req)) ++ return PTR_ERR(req); ++ } ++ ++ memset(&inarg, 0, sizeof(inarg)); ++ memset(&outarg, 0, sizeof(outarg)); ++ ++ inarg.valid |= FATTR_MTIME; ++ inarg.mtime = inode->i_mtime.tv_sec; ++ inarg.mtimensec = inode->i_mtime.tv_nsec; ++ ++ fuse_setattr_fill(fc, req, inode, &inarg, &outarg); ++ fuse_request_send(fc, req); ++ err = req->out.h.error; ++ fuse_put_request(fc, req); ++ ++ if (!err) ++ clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); ++ ++ return err; ++} ++ + /* + * Set attributes, and at the same time refresh them. + * +@@ -1619,6 +1684,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, + bool is_wb = fc->writeback_cache; + loff_t oldsize; + int err; ++ bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode); + + if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) + attr->ia_valid |= ATTR_FORCE; +@@ -1647,7 +1713,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, + + memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); +- iattr_to_fattr(attr, &inarg); ++ iattr_to_fattr(attr, &inarg, trust_local_mtime); + if (file) { + struct fuse_file *ff = file->private_data; + inarg.valid |= FATTR_FH; +@@ -1658,17 +1724,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, + inarg.valid |= FATTR_LOCKOWNER; + inarg.lock_owner = fuse_lock_owner_id(fc, current->files); + } +- req->in.h.opcode = FUSE_SETATTR; +- req->in.h.nodeid = get_node_id(inode); +- req->in.numargs = 1; +- req->in.args[0].size = sizeof(inarg); +- req->in.args[0].value = &inarg; +- req->out.numargs = 1; +- if (fc->minor < 9) +- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; +- else +- req->out.args[0].size = sizeof(outarg); +- req->out.args[0].value = &outarg; ++ fuse_setattr_fill(fc, req, inode, &inarg, &outarg); + fuse_request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); +@@ -1685,6 +1741,12 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, + } + + spin_lock(&fc->lock); ++ /* the kernel maintains i_mtime locally */ ++ if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) { ++ inode->i_mtime = attr->ia_mtime; ++ clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); ++ } ++ + fuse_change_attributes_common(inode, &outarg.attr, + attr_timeout(&outarg)); + oldsize = inode->i_size; +@@ -1915,6 +1977,17 @@ static int fuse_removexattr(struct dentry *entry, const char *name) + return err; + } + ++static int fuse_update_time(struct inode *inode, struct timespec *now, ++ int flags) ++{ ++ if (flags & S_MTIME) { ++ inode->i_mtime = *now; ++ set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state); ++ BUG_ON(!S_ISREG(inode->i_mode)); ++ } ++ return 0; ++} ++ + static const struct inode_operations fuse_dir_inode_operations = { + .lookup = fuse_lookup, + .mkdir = fuse_mkdir, +@@ -1954,6 +2027,7 @@ static const struct inode_operations fuse_common_inode_operations = { + .getxattr = fuse_getxattr, + .listxattr = fuse_listxattr, + .removexattr = fuse_removexattr, ++ .update_time = fuse_update_time, + }; + + static const struct inode_operations fuse_symlink_inode_operations = { +diff --git a/fs/fuse/file.c b/fs/fuse/file.c +index 5b863c0..5916dc5 100644 +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -308,6 +308,9 @@ static int fuse_open(struct inode *inode, struct file *file) + + static int fuse_release(struct inode *inode, struct file *file) + { ++ if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) ++ fuse_flush_mtime(file, true); ++ + fuse_release_common(file, FUSE_RELEASE); + + /* return value is ignored by VFS */ +@@ -475,6 +478,12 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, + + fuse_sync_writes(inode); + ++ if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) { ++ int err = fuse_flush_mtime(file, false); ++ if (err) ++ goto out; ++ } ++ + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) { + err = PTR_ERR(req); +@@ -960,16 +969,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, + return req->misc.write.out.size; + } + +-void fuse_write_update_size(struct inode *inode, loff_t pos) ++bool fuse_write_update_size(struct inode *inode, loff_t pos) + { + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); ++ bool ret = false; + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; +- if (pos > inode->i_size) ++ if (pos > inode->i_size) { + i_size_write(inode, pos); ++ ret = true; ++ } + spin_unlock(&fc->lock); ++ ++ return ret; + } + + static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, +@@ -2873,8 +2887,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, + goto out; + + /* we could have extended the file */ +- if (!(mode & FALLOC_FL_KEEP_SIZE)) +- fuse_write_update_size(inode, offset + length); ++ if (!(mode & FALLOC_FL_KEEP_SIZE)) { ++ bool changed = fuse_write_update_size(inode, offset + length); ++ ++ if (changed && fc->writeback_cache) { ++ struct fuse_inode *fi = get_fuse_inode(inode); ++ ++ inode->i_mtime = current_fs_time(inode->i_sb); ++ set_bit(FUSE_I_MTIME_DIRTY, &fi->state); ++ } ++ } + + if (mode & FALLOC_FL_PUNCH_HOLE) + truncate_pagecache_range(inode, offset, offset + length - 1); +diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h +index 374a8be..1e6ad6d 100644 +--- a/fs/fuse/fuse_i.h ++++ b/fs/fuse/fuse_i.h +@@ -119,6 +119,8 @@ enum { + FUSE_I_INIT_RDPLUS, + /** An operation changing file size is in progress */ + FUSE_I_SIZE_UNSTABLE, ++ /** i_mtime has been updated locally; a flush to userspace needed */ ++ FUSE_I_MTIME_DIRTY, + }; + + struct fuse_conn; +@@ -876,7 +878,9 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, + unsigned fuse_file_poll(struct file *file, poll_table *wait); + int fuse_dev_release(struct inode *inode, struct file *file); + +-void fuse_write_update_size(struct inode *inode, loff_t pos); ++bool fuse_write_update_size(struct inode *inode, loff_t pos); ++ ++int fuse_flush_mtime(struct file *file, bool nofail); + + int fuse_do_setattr(struct inode *inode, struct iattr *attr, + struct file *file); +diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c +index 22a78a6..338da52 100644 +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -170,8 +170,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + inode->i_blocks = attr->blocks; + inode->i_atime.tv_sec = attr->atime; + inode->i_atime.tv_nsec = attr->atimensec; +- inode->i_mtime.tv_sec = attr->mtime; +- inode->i_mtime.tv_nsec = attr->mtimensec; ++ /* mtime from server may be stale due to local buffered write */ ++ if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { ++ inode->i_mtime.tv_sec = attr->mtime; ++ inode->i_mtime.tv_nsec = attr->mtimensec; ++ } + inode->i_ctime.tv_sec = attr->ctime; + inode->i_ctime.tv_nsec = attr->ctimensec; + +@@ -250,6 +253,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) + { + inode->i_mode = attr->mode & S_IFMT; + inode->i_size = attr->size; ++ inode->i_mtime.tv_sec = attr->mtime; ++ inode->i_mtime.tv_nsec = attr->mtimensec; + if (S_ISREG(inode->i_mode)) { + fuse_init_common(inode); + fuse_init_file_inode(inode); +@@ -296,7 +301,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, + return NULL; + + if ((inode->i_state & I_NEW)) { +- inode->i_flags |= S_NOATIME|S_NOCMTIME; ++ inode->i_flags |= S_NOATIME; ++ if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) ++ inode->i_flags |= S_NOCMTIME; + inode->i_generation = generation; + inode->i_data.backing_dev_info = &fc->bdi; + fuse_init_inode(inode, attr); +-- +2.7.4 + + +From 6b1c8b7974158eabc822d1a01f528f95793219ae Mon Sep 17 00:00:00 2001 +From: Maxim Patlasov +Date: Mon, 28 Apr 2014 14:19:24 +0200 +Subject: [PATCH 71/73] fuse: trust kernel i_ctime only + +Let the kernel maintain i_ctime locally: update i_ctime explicitly on +truncate, fallocate, open(O_TRUNC), setxattr, removexattr, link, rename, +unlink. + +The inode flag I_DIRTY_SYNC serves as indication that local i_ctime should +be flushed to the server eventually. The patch sets the flag and updates +i_ctime in course of operations listed above. + +Signed-off-by: Maxim Patlasov +Signed-off-by: Miklos Szeredi +(cherry picked from commit 31f3267b4ba16b12fb9dd3b1953ea0f221cc2ab4) +Signed-off-by: Alex Shi +--- + fs/fuse/dir.c | 22 ++++++++++++++++++++-- + fs/fuse/inode.c | 6 ++++-- + 2 files changed, 24 insertions(+), 4 deletions(-) + +diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c +index b4adb3d9..6d5e45c 100644 +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -680,6 +680,14 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, + return create_new_entry(fc, req, dir, entry, S_IFLNK); + } + ++static inline void fuse_update_ctime(struct inode *inode) ++{ ++ if (!IS_NOCMTIME(inode)) { ++ inode->i_ctime = current_fs_time(inode->i_sb); ++ mark_inode_dirty_sync(inode); ++ } ++} ++ + static int fuse_unlink(struct inode *dir, struct dentry *entry) + { + int err; +@@ -714,6 +722,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) + fuse_invalidate_attr(inode); + fuse_invalidate_attr(dir); + fuse_invalidate_entry_cache(entry); ++ fuse_update_ctime(inode); + } else if (err == -EINTR) + fuse_invalidate_entry(entry); + return err; +@@ -772,6 +781,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, + if (!err) { + /* ctime changes */ + fuse_invalidate_attr(oldent->d_inode); ++ fuse_update_ctime(oldent->d_inode); + + fuse_invalidate_attr(olddir); + if (olddir != newdir) +@@ -781,6 +791,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, + if (newent->d_inode) { + fuse_invalidate_attr(newent->d_inode); + fuse_invalidate_entry_cache(newent); ++ fuse_update_ctime(newent->d_inode); + } + } else if (err == -EINTR) { + /* If request was interrupted, DEITY only knows if the +@@ -860,6 +871,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, + inc_nlink(inode); + spin_unlock(&fc->lock); + fuse_invalidate_attr(inode); ++ fuse_update_ctime(inode); + } else if (err == -EINTR) { + fuse_invalidate_attr(inode); + } +@@ -877,6 +889,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, + attr->size = i_size_read(inode); + attr->mtime = inode->i_mtime.tv_sec; + attr->mtimensec = inode->i_mtime.tv_nsec; ++ attr->ctime = inode->i_ctime.tv_sec; ++ attr->ctimensec = inode->i_ctime.tv_nsec; + } + + stat->dev = inode->i_sb->s_dev; +@@ -1841,8 +1855,10 @@ static int fuse_setxattr(struct dentry *entry, const char *name, + fc->no_setxattr = 1; + err = -EOPNOTSUPP; + } +- if (!err) ++ if (!err) { + fuse_invalidate_attr(inode); ++ fuse_update_ctime(inode); ++ } + return err; + } + +@@ -1972,8 +1988,10 @@ static int fuse_removexattr(struct dentry *entry, const char *name) + fc->no_removexattr = 1; + err = -EOPNOTSUPP; + } +- if (!err) ++ if (!err) { + fuse_invalidate_attr(inode); ++ fuse_update_ctime(inode); ++ } + return err; + } + +diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c +index 338da52..8fe0b48 100644 +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -174,9 +174,9 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { + inode->i_mtime.tv_sec = attr->mtime; + inode->i_mtime.tv_nsec = attr->mtimensec; ++ inode->i_ctime.tv_sec = attr->ctime; ++ inode->i_ctime.tv_nsec = attr->ctimensec; + } +- inode->i_ctime.tv_sec = attr->ctime; +- inode->i_ctime.tv_nsec = attr->ctimensec; + + if (attr->blksize != 0) + inode->i_blkbits = ilog2(attr->blksize); +@@ -255,6 +255,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) + inode->i_size = attr->size; + inode->i_mtime.tv_sec = attr->mtime; + inode->i_mtime.tv_nsec = attr->mtimensec; ++ inode->i_ctime.tv_sec = attr->ctime; ++ inode->i_ctime.tv_nsec = attr->ctimensec; + if (S_ISREG(inode->i_mode)) { + fuse_init_common(inode); + fuse_init_file_inode(inode); +-- +2.7.4 + + +From e03ae8b69bb899c5cdd0c594fa58a85cd180df1b Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Mon, 28 Apr 2014 16:43:44 +0200 +Subject: [PATCH 72/73] fuse: add renameat2 support + +Support RENAME_EXCHANGE and RENAME_NOREPLACE flags on the userspace ABI. + +Signed-off-by: Miklos Szeredi +(cherry picked from commit 1560c974dcd40a8d3f193283acd7cc6aee13dc13) +Signed-off-by: Alex Shi + + Conflicts: + fs/fuse/dir.c + include/uapi/linux/fuse.h +--- + fs/fuse/dir.c | 24 ++++++++++++++++-------- + fs/fuse/fuse_i.h | 3 +++ + include/uapi/linux/fuse.h | 10 ++++++++++ + 3 files changed, 29 insertions(+), 8 deletions(-) + +diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c +index 6d5e45c..33dec83 100644 +--- a/fs/fuse/dir.c ++++ b/fs/fuse/dir.c +@@ -753,23 +753,26 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) + return err; + } + +-static int fuse_rename(struct inode *olddir, struct dentry *oldent, +- struct inode *newdir, struct dentry *newent) ++static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, ++ struct inode *newdir, struct dentry *newent, ++ unsigned int flags, int opcode, size_t argsize) + { + int err; +- struct fuse_rename_in inarg; ++ struct fuse_rename2_in inarg; + struct fuse_conn *fc = get_fuse_conn(olddir); +- struct fuse_req *req = fuse_get_req_nopages(fc); ++ struct fuse_req *req; + ++ req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + +- memset(&inarg, 0, sizeof(inarg)); ++ memset(&inarg, 0, argsize); + inarg.newdir = get_node_id(newdir); +- req->in.h.opcode = FUSE_RENAME; ++ inarg.flags = flags; ++ req->in.h.opcode = opcode; + req->in.h.nodeid = get_node_id(olddir); + req->in.numargs = 3; +- req->in.args[0].size = sizeof(inarg); ++ req->in.args[0].size = argsize; + req->in.args[0].value = &inarg; + req->in.args[1].size = oldent->d_name.len + 1; + req->in.args[1].value = oldent->d_name.name; +@@ -783,12 +786,17 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, + fuse_invalidate_attr(oldent->d_inode); + fuse_update_ctime(oldent->d_inode); + ++ if (flags & RENAME_EXCHANGE) { ++ fuse_invalidate_attr(newent->d_inode); ++ fuse_update_ctime(newent->d_inode); ++ } ++ + fuse_invalidate_attr(olddir); + if (olddir != newdir) + fuse_invalidate_attr(newdir); + + /* newent will end up negative */ +- if (newent->d_inode) { ++ if (!(flags & RENAME_EXCHANGE) && newent->d_inode) { + fuse_invalidate_attr(newent->d_inode); + fuse_invalidate_entry_cache(newent); + fuse_update_ctime(newent->d_inode); +diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h +index 1e6ad6d..7cc58c9 100644 +--- a/fs/fuse/fuse_i.h ++++ b/fs/fuse/fuse_i.h +@@ -544,6 +544,9 @@ struct fuse_conn { + /** Is fallocate not implemented by fs? */ + unsigned no_fallocate:1; + ++ /** Is rename with flags implemented by fs? */ ++ unsigned no_rename2:1; ++ + /** Use enhanced/automatic page cache invalidation. */ + unsigned auto_inval_data:1; + +diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h +index 60bb2f9..b77cf38 100644 +--- a/include/uapi/linux/fuse.h ++++ b/include/uapi/linux/fuse.h +@@ -93,6 +93,9 @@ + * + * 7.22 + * - add FUSE_ASYNC_DIO ++ * ++ * 7.23 ++ * - add FUSE_RENAME2 request + */ + + #ifndef _LINUX_FUSE_H +@@ -343,6 +346,7 @@ enum fuse_opcode { + FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, ++ FUSE_RENAME2 = 45, + + /* CUSE specific operations */ + CUSE_INIT = 4096, +@@ -421,6 +425,12 @@ struct fuse_rename_in { + uint64_t newdir; + }; + ++struct fuse_rename2_in { ++ uint64_t newdir; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ + struct fuse_link_in { + uint64_t oldnodeid; + }; +-- +2.7.4 + + +From ab0f88ef018ee96ff56dc39a032c9882a07037d0 Mon Sep 17 00:00:00 2001 +From: Alex Shi +Date: Thu, 20 Aug 2015 14:56:22 +0800 +Subject: [PATCH 73/73] fs/xfs: add missed symbol readlink_copy + +When compile davinci_all_defconfig in Kevin's kernelci.org, +The missed symbol experting cause following error: + + ERROR: "readlink_copy" [fs/xfs/xfs.ko] undefined! + +This patch fixed the problem. + +Signed-off-by: Alex Shi +--- + fs/namei.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/namei.c b/fs/namei.c +index 0fbf150..e3b23a0 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -4340,6 +4340,7 @@ int readlink_copy(char __user *buffer, int buflen, const char *link) + out: + return len; + } ++EXPORT_SYMBOL(readlink_copy); + + /* + * A helper for ->readlink(). This should be used *ONLY* for symlinks that +-- +2.7.4 + From 8c0169a1b8508ebe2199fa8a839a80460af7d38b Mon Sep 17 00:00:00 2001 From: Radostan Riedel Date: Fri, 2 Dec 2016 12:44:31 -0800 Subject: [PATCH 2/5] linux.aarch64.conf: enable CONFIG_OVERLAYFS_FS=y --- projects/Odroid_C2/linux/linux.aarch64.conf | 1 + projects/WeTek_Hub/linux/linux.aarch64.conf | 1 + projects/WeTek_Play_2/linux/linux.aarch64.conf | 1 + 3 files changed, 3 insertions(+) diff --git a/projects/Odroid_C2/linux/linux.aarch64.conf b/projects/Odroid_C2/linux/linux.aarch64.conf index 9200b30a70..f500884fca 100644 --- a/projects/Odroid_C2/linux/linux.aarch64.conf +++ b/projects/Odroid_C2/linux/linux.aarch64.conf @@ -3360,6 +3360,7 @@ CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m # CONFIG_CUSE is not set +CONFIG_OVERLAY_FS=y # # Caches diff --git a/projects/WeTek_Hub/linux/linux.aarch64.conf b/projects/WeTek_Hub/linux/linux.aarch64.conf index 2fcdbb437f..2796493a27 100644 --- a/projects/WeTek_Hub/linux/linux.aarch64.conf +++ b/projects/WeTek_Hub/linux/linux.aarch64.conf @@ -3369,6 +3369,7 @@ CONFIG_FANOTIFY=y CONFIG_AUTOFS4_FS=y CONFIG_FUSE_FS=m # CONFIG_CUSE is not set +CONFIG_OVERLAY_FS=y # # Caches diff --git a/projects/WeTek_Play_2/linux/linux.aarch64.conf b/projects/WeTek_Play_2/linux/linux.aarch64.conf index 0a9f4bb3e2..defce918b7 100644 --- a/projects/WeTek_Play_2/linux/linux.aarch64.conf +++ b/projects/WeTek_Play_2/linux/linux.aarch64.conf @@ -3361,6 +3361,7 @@ CONFIG_FANOTIFY=y CONFIG_AUTOFS4_FS=y CONFIG_FUSE_FS=m # CONFIG_CUSE is not set +CONFIG_OVERLAY_FS=y # # Caches From 5982d8fc6624f67a0c6d7213f2daa274a2541b11 Mon Sep 17 00:00:00 2001 From: Lukas Rusak Date: Fri, 2 Dec 2016 12:44:31 -0800 Subject: [PATCH 3/5] containerd: add aarch64 support --- packages/addons/addon-depends/containerd/package.mk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/addons/addon-depends/containerd/package.mk b/packages/addons/addon-depends/containerd/package.mk index c333922a85..32fb881ed1 100644 --- a/packages/addons/addon-depends/containerd/package.mk +++ b/packages/addons/addon-depends/containerd/package.mk @@ -48,6 +48,9 @@ pre_make_target() { ;; esac ;; + aarch64) + export GOARCH=arm64 + ;; esac export GOOS=linux From 13b8d5923d6a4d0a28a109fca52842b9e0de4d49 Mon Sep 17 00:00:00 2001 From: Lukas Rusak Date: Fri, 2 Dec 2016 12:44:31 -0800 Subject: [PATCH 4/5] runc: add aarch64 support --- packages/addons/addon-depends/runc/package.mk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/addons/addon-depends/runc/package.mk b/packages/addons/addon-depends/runc/package.mk index fdc2644d13..a9d5791dc4 100644 --- a/packages/addons/addon-depends/runc/package.mk +++ b/packages/addons/addon-depends/runc/package.mk @@ -48,6 +48,9 @@ pre_make_target() { ;; esac ;; + aarch64) + export GOARCH=arm64 + ;; esac export GOOS=linux From 0c14cf1e26af83627d3d85b76c75351f1a80027b Mon Sep 17 00:00:00 2001 From: Radostan Riedel Date: Fri, 2 Dec 2016 12:44:32 -0800 Subject: [PATCH 5/5] docker: add aarch64 support --- packages/addons/service/docker/package.mk | 5 ++++- packages/addons/service/docker/source/config/docker.conf | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/addons/service/docker/package.mk b/packages/addons/service/docker/package.mk index b25eac2343..c90bb9de3c 100644 --- a/packages/addons/service/docker/package.mk +++ b/packages/addons/service/docker/package.mk @@ -20,7 +20,7 @@ PKG_NAME="docker" PKG_VERSION="1.12.3" PKG_REV="109" PKG_ARCH="any" -PKG_ADDON_PROJECTS="Generic RPi RPi2 imx6" +PKG_ADDON_PROJECTS="Generic RPi RPi2 imx6 WeTek_Hub WeTek_Play_2 Odroid_C2" PKG_LICENSE="ASL" PKG_SITE="http://www.docker.com/" PKG_URL="https://github.com/docker/docker/archive/v${PKG_VERSION}.tar.gz" @@ -57,6 +57,9 @@ configure_target() { ;; esac ;; + aarch64) + export GOARCH=arm64 + ;; esac export GOOS=linux diff --git a/packages/addons/service/docker/source/config/docker.conf b/packages/addons/service/docker/source/config/docker.conf index 8c717fee6c..dd42259fbf 100644 --- a/packages/addons/service/docker/source/config/docker.conf +++ b/packages/addons/service/docker/source/config/docker.conf @@ -1,2 +1,2 @@ DOCKER_DAEMON_OPTS="--graph=/storage/.kodi/userdata/addon_data/service.system.docker/docker" -DOCKER_STORAGE_OPTS="--storage-driver=overlay2" +DOCKER_STORAGE_OPTS="--storage-driver=overlay2 --storage-opt overlay2.override_kernel_check=1"