From 60a1facd87c64d8350a6ee1d8dbc83f731a62149 Mon Sep 17 00:00:00 2001 From: kszaq Date: Sat, 13 Jan 2018 23:10:54 +0100 Subject: [PATCH] linux: bump amlogic-3.14 kernel to c855778 and remove merged patches --- packages/linux/package.mk | 4 +- ...rtl8712_avoid_lots_of_build_warnings.patch | 113 - .../linux-002-backport_overlayfs.patch | 11997 ---------------- ...ly-working-frequecies-and-bit-depths.patch | 58 - .../linux/linux-008-max_freq_dvfs_table.patch | 103 - 5 files changed, 2 insertions(+), 12273 deletions(-) delete mode 100644 packages/linux/patches/amlogic-3.14/linux-001-staging_rtl8712_rtl8712_avoid_lots_of_build_warnings.patch delete mode 100644 packages/linux/patches/amlogic-3.14/linux-002-backport_overlayfs.patch delete mode 100644 packages/linux/patches/amlogic-3.14/linux-007-sound_soc_aml_m8-Report-only-working-frequecies-and-bit-depths.patch delete mode 100644 projects/Odroid_C2/patches/linux/linux-008-max_freq_dvfs_table.patch diff --git a/packages/linux/package.mk b/packages/linux/package.mk index a5b429571c..ee840c868d 100644 --- a/packages/linux/package.mk +++ b/packages/linux/package.mk @@ -39,8 +39,8 @@ case "$LINUX" in PKG_DEPENDS_TARGET="$PKG_DEPENDS_TARGET aml-dtbtools:host" ;; amlogic-3.14) - PKG_VERSION="9ccf3f0" - PKG_SHA256="0b5f0ecffe6ceb0e31dfc6c27ba328d0682b05723fe88a2f5c4cf6ceb7b7565f" + PKG_VERSION="c855778" + PKG_SHA256="c6a2066e4a1052503a798ae7bf10aaf551466989fe909b130da8bdf99eb59b1a" PKG_URL="https://github.com/LibreELEC/linux-amlogic/archive/$PKG_VERSION.tar.gz" PKG_SOURCE_DIR="$PKG_NAME-amlogic-$PKG_VERSION*" PKG_PATCH_DIRS="amlogic-3.14" diff --git a/packages/linux/patches/amlogic-3.14/linux-001-staging_rtl8712_rtl8712_avoid_lots_of_build_warnings.patch b/packages/linux/patches/amlogic-3.14/linux-001-staging_rtl8712_rtl8712_avoid_lots_of_build_warnings.patch deleted file mode 100644 index 0ad0dc7884..0000000000 --- a/packages/linux/patches/amlogic-3.14/linux-001-staging_rtl8712_rtl8712_avoid_lots_of_build_warnings.patch +++ /dev/null @@ -1,113 +0,0 @@ -From fa89009a1869844f9a9360eb07c45d457446ac0e Mon Sep 17 00:00:00 2001 -From: Arnd Bergmann -Date: Thu, 5 Jun 2014 22:48:15 +0200 -Subject: [PATCH] staging: rtl8712, rtl8712: avoid lots of build warnings - -commit 0c9f3a65c5eb7fe1fc611a22eb8a8b71ea865998 upstream. - -The rtl8712 driver has an 'extern inline' function that contains an -'if', which causes lots of warnings with CONFIG_PROFILE_ALL_BRANCHES -overriding the definition of 'if': - -drivers/staging/rtl8712/ieee80211.h:759:229: warning: '______f' is static but declared in inline function 'ieee80211_get_hdrlen' which is not static [enabled by default] - -This changes the driver to use 'static inline' instead, which happens -to be the correct annotation anyway. - -Signed-off-by: Arnd Bergmann -Cc: Larry Finger -Cc: Florian Schilhabel -Signed-off-by: Greg Kroah-Hartman ---- - drivers/staging/rtl8187se/ieee80211/ieee80211.h | 4 ++-- - drivers/staging/rtl8192u/ieee80211/ieee80211.h | 10 +++++----- - drivers/staging/rtl8712/ieee80211.h | 4 ++-- - 3 files changed, 9 insertions(+), 9 deletions(-) - -diff --git a/drivers/staging/rtl8187se/ieee80211/ieee80211.h b/drivers/staging/rtl8187se/ieee80211/ieee80211.h -index 09ffd9b..6ebdd3f 100644 ---- a/drivers/staging/rtl8187se/ieee80211/ieee80211.h -+++ b/drivers/staging/rtl8187se/ieee80211/ieee80211.h -@@ -1460,12 +1460,12 @@ extern void ieee80211_sta_ps_send_null_frame(struct ieee80211_device *ieee, - - extern const long ieee80211_wlan_frequencies[]; - --extern inline void ieee80211_increment_scans(struct ieee80211_device *ieee) -+static inline void ieee80211_increment_scans(struct ieee80211_device *ieee) - { - ieee->scans++; - } - --extern inline int ieee80211_get_scans(struct ieee80211_device *ieee) -+static inline int ieee80211_get_scans(struct ieee80211_device *ieee) - { - return ieee->scans; - } -diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h b/drivers/staging/rtl8192u/ieee80211/ieee80211.h -index bc64f05..b1a0380 100644 ---- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h -+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h -@@ -2250,7 +2250,7 @@ static inline void *ieee80211_priv(struct net_device *dev) - return ((struct ieee80211_device *)netdev_priv(dev))->priv; - } - --extern inline int ieee80211_is_empty_essid(const char *essid, int essid_len) -+static inline int ieee80211_is_empty_essid(const char *essid, int essid_len) - { - /* Single white space is for Linksys APs */ - if (essid_len == 1 && essid[0] == ' ') -@@ -2266,7 +2266,7 @@ extern inline int ieee80211_is_empty_essid(const char *essid, int essid_len) - return 1; - } - --extern inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee, int mode) -+static inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee, int mode) - { - /* - * It is possible for both access points and our device to support -@@ -2292,7 +2292,7 @@ extern inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee, int mod - return 0; - } - --extern inline int ieee80211_get_hdrlen(u16 fc) -+static inline int ieee80211_get_hdrlen(u16 fc) - { - int hdrlen = IEEE80211_3ADDR_LEN; - -@@ -2578,12 +2578,12 @@ void ieee80211_softmac_scan_syncro(struct ieee80211_device *ieee); - - extern const long ieee80211_wlan_frequencies[]; - --extern inline void ieee80211_increment_scans(struct ieee80211_device *ieee) -+static inline void ieee80211_increment_scans(struct ieee80211_device *ieee) - { - ieee->scans++; - } - --extern inline int ieee80211_get_scans(struct ieee80211_device *ieee) -+static inline int ieee80211_get_scans(struct ieee80211_device *ieee) - { - return ieee->scans; - } -diff --git a/drivers/staging/rtl8712/ieee80211.h b/drivers/staging/rtl8712/ieee80211.h -index da4000e..8269be8 100644 ---- a/drivers/staging/rtl8712/ieee80211.h -+++ b/drivers/staging/rtl8712/ieee80211.h -@@ -734,7 +734,7 @@ enum ieee80211_state { - #define IEEE_G (1<<2) - #define IEEE_MODE_MASK (IEEE_A|IEEE_B|IEEE_G) - --extern inline int ieee80211_is_empty_essid(const char *essid, int essid_len) -+static inline int ieee80211_is_empty_essid(const char *essid, int essid_len) - { - /* Single white space is for Linksys APs */ - if (essid_len == 1 && essid[0] == ' ') -@@ -748,7 +748,7 @@ extern inline int ieee80211_is_empty_essid(const char *essid, int essid_len) - return 1; - } - --extern inline int ieee80211_get_hdrlen(u16 fc) -+static inline int ieee80211_get_hdrlen(u16 fc) - { - int hdrlen = 24; - diff --git a/packages/linux/patches/amlogic-3.14/linux-002-backport_overlayfs.patch b/packages/linux/patches/amlogic-3.14/linux-002-backport_overlayfs.patch deleted file mode 100644 index 64fa02d97b..0000000000 --- a/packages/linux/patches/amlogic-3.14/linux-002-backport_overlayfs.patch +++ /dev/null @@ -1,11997 +0,0 @@ -From c82d759a596eb688e804ec1d1727f78d042b26fb Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Tue, 1 Apr 2014 17:08:42 +0200 -Subject: [PATCH 01/73] vfs: rename: move d_move() up - -Move the d_move() in vfs_rename_dir() up, similarly to how it's done in -vfs_rename_other(). The next patch will consolidate these two functions -and this is the only structural difference between them. - -I'm not sure if doing the d_move() after the dput is even valid. But there -may be a logical explanation for that. But moving the d_move() before the -dput() (and the mutex_unlock()) should definitely not hurt. - -Signed-off-by: Miklos Szeredi -Reviewed-by: J. Bruce Fields -(cherry picked from commit de22a4c3720a96f1c2ebf12b0857b6db6a991f2c) -Signed-off-by: Alex Shi ---- - fs/namei.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/fs/namei.c b/fs/namei.c -index 0dd72c8..f8a6d19 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4049,13 +4049,12 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, - target->i_flags |= S_DEAD; - dont_mount(new_dentry); - } -+ if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) -+ d_move(old_dentry, new_dentry); - out: - if (target) - mutex_unlock(&target->i_mutex); - dput(new_dentry); -- if (!error) -- if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) -- d_move(old_dentry,new_dentry); - return error; - } - --- -2.7.4 - - -From 75a4b5ddf548bcfb932cc0ba8cb8eb7806ae9dbc Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Tue, 1 Apr 2014 17:08:42 +0200 -Subject: [PATCH 02/73] vfs: rename: use common code for dir and non-dir - -There's actually very little difference between vfs_rename_dir() and -vfs_rename_other() so move both inline into vfs_rename() which still stays -reasonably readable. - -Signed-off-by: Miklos Szeredi -Reviewed-by: J. Bruce Fields -(cherry picked from commit bc27027a73e8b80376b51a1583ad1c7445605e8a) -Signed-off-by: Alex Shi ---- - fs/namei.c | 187 +++++++++++++++++++++++++------------------------------------ - 1 file changed, 75 insertions(+), 112 deletions(-) - -diff --git a/fs/namei.c b/fs/namei.c -index f8a6d19..4fa9e66 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -3977,7 +3977,27 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname - return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); - } - --/* -+/** -+ * vfs_rename - rename a filesystem object -+ * @old_dir: parent of source -+ * @old_dentry: source -+ * @new_dir: parent of destination -+ * @new_dentry: destination -+ * @delegated_inode: returns an inode needing a delegation break -+ * -+ * The caller must hold multiple mutexes--see lock_rename()). -+ * -+ * If vfs_rename discovers a delegation in need of breaking at either -+ * the source or destination, it will return -EWOULDBLOCK and return a -+ * reference to the inode in delegated_inode. The caller should then -+ * break the delegation and retry. Because breaking a delegation may -+ * take a long time, the caller should drop all locks before doing -+ * so. -+ * -+ * Alternatively, a caller may pass NULL for delegated_inode. This may -+ * be appropriate for callers that expect the underlying filesystem not -+ * to be NFS exported. -+ * - * The worst of all namespace operations - renaming directory. "Perverted" - * doesn't even start to describe it. Somebody in UCB had a heck of a trip... - * Problems: -@@ -4005,19 +4025,39 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname - * ->i_mutex on parents, which works but leads to some truly excessive - * locking]. - */ --static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct inode **delegated_inode) - { -- int error = 0; -+ int error; -+ bool is_dir = d_is_dir(old_dentry); -+ const unsigned char *old_name; -+ struct inode *source = old_dentry->d_inode; - struct inode *target = new_dentry->d_inode; -- unsigned max_links = new_dir->i_sb->s_max_links; -+ -+ if (source == target) -+ return 0; -+ -+ error = may_delete(old_dir, old_dentry, is_dir); -+ if (error) -+ return error; -+ -+ if (!target) -+ error = may_create(new_dir, new_dentry); -+ else -+ error = may_delete(new_dir, new_dentry, is_dir); -+ if (error) -+ return error; -+ -+ if (!old_dir->i_op->rename) -+ return -EPERM; - - /* - * If we are going to change the parent - check write permissions, - * we'll need to flip '..'. - */ -- if (new_dir != old_dir) { -- error = inode_permission(old_dentry->d_inode, MAY_WRITE); -+ if (is_dir && new_dir != old_dir) { -+ error = inode_permission(source, MAY_WRITE); - if (error) - return error; - } -@@ -4026,134 +4066,57 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, - if (error) - return error; - -+ old_name = fsnotify_oldname_init(old_dentry->d_name.name); - dget(new_dentry); -- if (target) -+ if (!is_dir) -+ lock_two_nondirectories(source, target); -+ else if (target) - mutex_lock(&target->i_mutex); - - error = -EBUSY; - if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) - goto out; - -- error = -EMLINK; -- if (max_links && !target && new_dir != old_dir && -- new_dir->i_nlink >= max_links) -- goto out; -- -- if (target) -- shrink_dcache_parent(new_dentry); -- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -- if (error) -- goto out; -- -- if (target) { -- target->i_flags |= S_DEAD; -- dont_mount(new_dentry); -- } -- if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) -- d_move(old_dentry, new_dentry); --out: -- if (target) -- mutex_unlock(&target->i_mutex); -- dput(new_dentry); -- return error; --} -- --static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry, -- struct inode **delegated_inode) --{ -- struct inode *target = new_dentry->d_inode; -- struct inode *source = old_dentry->d_inode; -- int error; -- -- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); -- if (error) -- return error; -- -- dget(new_dentry); -- lock_two_nondirectories(source, target); -+ if (is_dir) { -+ unsigned max_links = new_dir->i_sb->s_max_links; - -- error = -EBUSY; -- if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) -- goto out; -+ error = -EMLINK; -+ if (max_links && !target && new_dir != old_dir && -+ new_dir->i_nlink >= max_links) -+ goto out; - -- error = try_break_deleg(source, delegated_inode); -- if (error) -- goto out; -- if (target) { -- error = try_break_deleg(target, delegated_inode); -+ if (target) -+ shrink_dcache_parent(new_dentry); -+ } else { -+ error = try_break_deleg(source, delegated_inode); - if (error) - goto out; -+ if (target) { -+ error = try_break_deleg(target, delegated_inode); -+ if (error) -+ goto out; -+ } - } - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); - if (error) - goto out; - -- if (target) -+ if (target) { -+ if (is_dir) -+ target->i_flags |= S_DEAD; - dont_mount(new_dentry); -+ } - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry, new_dentry); - out: -- unlock_two_nondirectories(source, target); -+ if (!is_dir) -+ unlock_two_nondirectories(source, target); -+ else if (target) -+ mutex_unlock(&target->i_mutex); - dput(new_dentry); -- return error; --} -- --/** -- * vfs_rename - rename a filesystem object -- * @old_dir: parent of source -- * @old_dentry: source -- * @new_dir: parent of destination -- * @new_dentry: destination -- * @delegated_inode: returns an inode needing a delegation break -- * -- * The caller must hold multiple mutexes--see lock_rename()). -- * -- * If vfs_rename discovers a delegation in need of breaking at either -- * the source or destination, it will return -EWOULDBLOCK and return a -- * reference to the inode in delegated_inode. The caller should then -- * break the delegation and retry. Because breaking a delegation may -- * take a long time, the caller should drop all locks before doing -- * so. -- * -- * Alternatively, a caller may pass NULL for delegated_inode. This may -- * be appropriate for callers that expect the underlying filesystem not -- * to be NFS exported. -- */ --int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry, -- struct inode **delegated_inode) --{ -- int error; -- int is_dir = d_is_dir(old_dentry); -- const unsigned char *old_name; -- -- if (old_dentry->d_inode == new_dentry->d_inode) -- return 0; -- -- error = may_delete(old_dir, old_dentry, is_dir); -- if (error) -- return error; -- -- if (!new_dentry->d_inode) -- error = may_create(new_dir, new_dentry); -- else -- error = may_delete(new_dir, new_dentry, is_dir); -- if (error) -- return error; -- -- if (!old_dir->i_op->rename) -- return -EPERM; -- -- old_name = fsnotify_oldname_init(old_dentry->d_name.name); -- -- if (is_dir) -- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); -- else -- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); - if (!error) - fsnotify_move(old_dir, new_dir, old_name, is_dir, -- new_dentry->d_inode, old_dentry); -+ target, old_dentry); - fsnotify_oldname_free(old_name); - - return error; --- -2.7.4 - - -From 148622f8708456dda3262bbbe08742a962a0d667 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Tue, 1 Apr 2014 17:08:42 +0200 -Subject: [PATCH 03/73] vfs: add renameat2 syscall - -Add new renameat2 syscall, which is the same as renameat with an added -flags argument. - -Pass flags to vfs_rename() and to i_op->rename() as well. - -Signed-off-by: Miklos Szeredi -Reviewed-by: J. Bruce Fields -(cherry picked from commit 520c8b16505236fc82daa352e6c5e73cd9870cff) -Signed-off-by: Alex Shi ---- - Documentation/filesystems/Locking | 6 +++- - Documentation/filesystems/vfs.txt | 16 ++++++++++ - arch/x86/syscalls/syscall_64.tbl | 1 + - .../lustre/lustre/include/linux/lustre_compat25.h | 4 +-- - drivers/staging/lustre/lustre/lvfs/lvfs_linux.c | 2 +- - fs/cachefiles/namei.c | 2 +- - fs/ecryptfs/inode.c | 2 +- - fs/namei.c | 34 +++++++++++++++++----- - fs/nfsd/vfs.c | 2 +- - include/linux/fs.h | 4 ++- - 10 files changed, 58 insertions(+), 15 deletions(-) - -diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking -index 5b0c083..f424e0e 100644 ---- a/Documentation/filesystems/Locking -+++ b/Documentation/filesystems/Locking -@@ -47,6 +47,8 @@ prototypes: - int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct dentry *, -+ struct inode *, struct dentry *, unsigned int); - int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); -@@ -78,6 +80,7 @@ mkdir: yes - unlink: yes (both) - rmdir: yes (both) (see below) - rename: yes (all) (see below) -+rename2: yes (all) (see below) - readlink: no - follow_link: no - put_link: no -@@ -96,7 +99,8 @@ tmpfile: no - - Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on - victim. -- cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. -+ cross-directory ->rename() and rename2() has (per-superblock) -+->s_vfs_rename_sem. - - See Documentation/filesystems/directory-locking for more detailed discussion - of the locking scheme for directory operations. -diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt -index c53784c..94eb862 100644 ---- a/Documentation/filesystems/vfs.txt -+++ b/Documentation/filesystems/vfs.txt -@@ -347,6 +347,8 @@ struct inode_operations { - int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct dentry *, -+ struct inode *, struct dentry *, unsigned int); - int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); -@@ -414,6 +416,20 @@ otherwise noted. - rename: called by the rename(2) system call to rename the object to - have the parent and name given by the second inode and dentry. - -+ rename2: this has an additional flags argument compared to rename. -+ If no flags are supported by the filesystem then this method -+ need not be implemented. If some flags are supported then the -+ filesystem must return -EINVAL for any unsupported or unknown -+ flags. Currently the following flags are implemented: -+ (1) RENAME_NOREPLACE: this flag indicates that if the target -+ of the rename exists the rename should fail with -EEXIST -+ instead of replacing the target. The VFS already checks for -+ existence, so for local filesystems the RENAME_NOREPLACE -+ implementation is equivalent to plain rename. -+ (2) RENAME_EXCHANGE: exchange source and target. Both must -+ exist; this is checked by the VFS. Unlike plain rename, -+ source and target may be of different type. -+ - readlink: called by the readlink(2) system call. Only required if - you want to support reading symbolic links - -diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h -index eefdb8d..81cc7a0 100644 ---- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h -+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h -@@ -105,8 +105,8 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, - #define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry) - #define ll_vfs_mknod(dir,entry,mnt,mode,dev) vfs_mknod(dir,entry,mode,dev) - #define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry) --#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1,delegated_inode) \ -- vfs_rename(old,old_dir,new,new_dir,delegated_inode) -+#define ll_vfs_rename(old, old_dir, mnt, new, new_dir, mnt1) \ -+ vfs_rename(old, old_dir, new, new_dir, NULL, 0) - - #define cfs_bio_io_error(a,b) bio_io_error((a)) - #define cfs_bio_endio(a,b,c) bio_endio((a),(c)) -diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c -index 428ffd8..d50822b 100644 ---- a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c -+++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c -@@ -223,7 +223,7 @@ int lustre_rename(struct dentry *dir, struct vfsmount *mnt, - GOTO(put_old, err = PTR_ERR(dchild_new)); - - err = ll_vfs_rename(dir->d_inode, dchild_old, mnt, -- dir->d_inode, dchild_new, mnt, NULL); -+ dir->d_inode, dchild_new, mnt); - - dput(dchild_new); - put_old: -diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c -index ca65f39..31088a9 100644 ---- a/fs/cachefiles/namei.c -+++ b/fs/cachefiles/namei.c -@@ -396,7 +396,7 @@ try_again: - cachefiles_io_error(cache, "Rename security error %d", ret); - } else { - ret = vfs_rename(dir->d_inode, rep, -- cache->graveyard->d_inode, grave, NULL); -+ cache->graveyard->d_inode, grave, NULL, 0); - if (ret != 0 && ret != -ENOMEM) - cachefiles_io_error(cache, - "Rename failed with error %d", ret); -diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c -index a85ceb7..57ee4c5 100644 ---- a/fs/ecryptfs/inode.c -+++ b/fs/ecryptfs/inode.c -@@ -641,7 +641,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, - } - rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, - lower_new_dir_dentry->d_inode, lower_new_dentry, -- NULL); -+ NULL, 0); - if (rc) - goto out_lock; - if (target_inode) -diff --git a/fs/namei.c b/fs/namei.c -index 4fa9e66..5134f8c 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -3984,6 +3984,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname - * @new_dir: parent of destination - * @new_dentry: destination - * @delegated_inode: returns an inode needing a delegation break -+ * @flags: rename flags - * - * The caller must hold multiple mutexes--see lock_rename()). - * -@@ -4027,7 +4028,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname - */ - int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, -- struct inode **delegated_inode) -+ struct inode **delegated_inode, unsigned int flags) - { - int error; - bool is_dir = d_is_dir(old_dentry); -@@ -4052,6 +4053,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - if (!old_dir->i_op->rename) - return -EPERM; - -+ if (flags && !old_dir->i_op->rename2) -+ return -EINVAL; -+ - /* - * If we are going to change the parent - check write permissions, - * we'll need to flip '..'. -@@ -4097,7 +4101,13 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - goto out; - } - } -- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ if (!flags) { -+ error = old_dir->i_op->rename(old_dir, old_dentry, -+ new_dir, new_dentry); -+ } else { -+ error = old_dir->i_op->rename2(old_dir, old_dentry, -+ new_dir, new_dentry, flags); -+ } - if (error) - goto out; - -@@ -4122,8 +4132,8 @@ out: - return error; - } - --SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, -- int, newdfd, const char __user *, newname) -+SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, -+ int, newdfd, const char __user *, newname, unsigned int, flags) - { - struct dentry *old_dir, *new_dir; - struct dentry *old_dentry, *new_dentry; -@@ -4135,6 +4145,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, - unsigned int lookup_flags = 0; - bool should_retry = false; - int error; -+ -+ if (flags) -+ return -EINVAL; -+ - retry: - from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); - if (IS_ERR(from)) { -@@ -4206,8 +4220,8 @@ retry_deleg: - if (error) - goto exit5; - error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry, -- &delegated_inode); -+ new_dir->d_inode, new_dentry, -+ &delegated_inode, flags); - exit5: - dput(new_dentry); - exit4: -@@ -4237,9 +4251,15 @@ exit: - return error; - } - -+SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, -+ int, newdfd, const char __user *, newname) -+{ -+ return sys_renameat2(olddfd, oldname, newdfd, newname, 0); -+} -+ - SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) - { -- return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); -+ return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); - } - - int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) -diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c -index eea5ad1..464f813 100644 ---- a/fs/nfsd/vfs.c -+++ b/fs/nfsd/vfs.c -@@ -1703,7 +1703,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, - if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) - goto out_dput_new; - -- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); -+ host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); - if (!host_err) { - host_err = commit_metadata(tfhp); - if (!host_err) -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 23b2a35..3b3670e 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -1460,7 +1460,7 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *); - extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **); -+extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); - - /* - * VFS dentry helper functions. -@@ -1571,6 +1571,8 @@ struct inode_operations { - int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct dentry *, -+ struct inode *, struct dentry *, unsigned int); - int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); --- -2.7.4 - - -From f1bdf915fa37836cc39f76e86c65521504deb7e1 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Tue, 1 Apr 2014 17:08:43 +0200 -Subject: [PATCH 04/73] vfs: add RENAME_NOREPLACE flag - -If this flag is specified and the target of the rename exists then the -rename syscall fails with EEXIST. - -The VFS does the existence checking, so it is trivial to enable for most -local filesystems. This patch only enables it in ext4. - -For network filesystems the VFS check is not enough as there may be a race -between a remote create and the rename, so these filesystems need to handle -this flag in their ->rename() implementations to ensure atomicity. - -Andy writes about why this is useful: - -"The trivial answer: to eliminate the race condition from 'mv -i'. - -Another answer: there's a common pattern to atomically create a file -with contents: open a temporary file, write to it, optionally fsync -it, close it, then link(2) it to the final name, then unlink the -temporary file. - -The reason to use link(2) is because it won't silently clobber the destination. - -This is annoying: - - It requires an extra system call that shouldn't be necessary. - - It doesn't work on (IMO sensible) filesystems that don't support -hard links (e.g. vfat). - - It's not atomic -- there's an intermediate state where both files exist. - - It's ugly. - -The new rename flag will make this totally sensible. - -To be fair, on new enough kernels, you can also use O_TMPFILE and -linkat to achieve the same thing even more cleanly." - -Suggested-by: Andy Lutomirski -Signed-off-by: Miklos Szeredi -Reviewed-by: J. Bruce Fields -(cherry picked from commit 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c) -Signed-off-by: Alex Shi ---- - fs/ext4/namei.c | 11 +++++++++++ - fs/namei.c | 21 +++++++++++++-------- - include/uapi/linux/fs.h | 2 ++ - 3 files changed, 26 insertions(+), 8 deletions(-) - -diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c -index 2dcbfb6..f0598d6 100644 ---- a/fs/ext4/namei.c -+++ b/fs/ext4/namei.c -@@ -3191,6 +3191,16 @@ end_rename: - return retval; - } - -+static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry, -+ unsigned int flags) -+{ -+ if (flags & ~RENAME_NOREPLACE) -+ return -EINVAL; -+ -+ return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); -+} -+ - /* - * directories can handle most operations... - */ -@@ -3205,6 +3215,7 @@ const struct inode_operations ext4_dir_inode_operations = { - .mknod = ext4_mknod, - .tmpfile = ext4_tmpfile, - .rename = ext4_rename, -+ .rename2 = ext4_rename2, - .setattr = ext4_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, -diff --git a/fs/namei.c b/fs/namei.c -index 5134f8c..3f2cc3c 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4146,7 +4146,7 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, - bool should_retry = false; - int error; - -- if (flags) -+ if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - - retry: -@@ -4172,6 +4172,8 @@ retry: - goto exit2; - - new_dir = newnd.path.dentry; -+ if (flags & RENAME_NOREPLACE) -+ error = -EEXIST; - if (newnd.last_type != LAST_NORM) - goto exit2; - -@@ -4194,22 +4196,25 @@ retry_deleg: - error = -ENOENT; - if (d_is_negative(old_dentry)) - goto exit4; -+ new_dentry = lookup_hash(&newnd); -+ error = PTR_ERR(new_dentry); -+ if (IS_ERR(new_dentry)) -+ goto exit4; -+ error = -EEXIST; -+ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) -+ goto exit5; - /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!d_is_dir(old_dentry)) { - error = -ENOTDIR; - if (oldnd.last.name[oldnd.last.len]) -- goto exit4; -+ goto exit5; - if (newnd.last.name[newnd.last.len]) -- goto exit4; -+ goto exit5; - } - /* source should not be ancestor of target */ - error = -EINVAL; - if (old_dentry == trap) -- goto exit4; -- new_dentry = lookup_hash(&newnd); -- error = PTR_ERR(new_dentry); -- if (IS_ERR(new_dentry)) -- goto exit4; -+ goto exit5; - /* target should not be an ancestor of source */ - error = -ENOTEMPTY; - if (new_dentry == trap) -diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h -index 6c28b61..9250f4d 100644 ---- a/include/uapi/linux/fs.h -+++ b/include/uapi/linux/fs.h -@@ -35,6 +35,8 @@ - #define SEEK_HOLE 4 /* seek to the next hole */ - #define SEEK_MAX SEEK_HOLE - -+#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ -+ - struct fstrim_range { - __u64 start; - __u64 len; --- -2.7.4 - - -From 5371fcd46e9276f4216e329ba2714d595a384a1f Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Tue, 1 Apr 2014 17:08:43 +0200 -Subject: [PATCH 05/73] security: add flags to rename hooks - -Add flags to security_path_rename() and security_inode_rename() hooks. - -Signed-off-by: Miklos Szeredi -Reviewed-by: J. Bruce Fields -(cherry picked from commit 0b3974eb04c4874e85fa1d4fc70450d12f28611d) -Signed-off-by: Alex Shi ---- - fs/cachefiles/namei.c | 2 +- - fs/namei.c | 5 +++-- - include/linux/security.h | 12 ++++++++---- - security/security.c | 6 ++++-- - 4 files changed, 16 insertions(+), 9 deletions(-) - -diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c -index 31088a9..6494d9f 100644 ---- a/fs/cachefiles/namei.c -+++ b/fs/cachefiles/namei.c -@@ -391,7 +391,7 @@ try_again: - path.dentry = dir; - path_to_graveyard.mnt = cache->mnt; - path_to_graveyard.dentry = cache->graveyard; -- ret = security_path_rename(&path, rep, &path_to_graveyard, grave); -+ ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0); - if (ret < 0) { - cachefiles_io_error(cache, "Rename security error %d", ret); - } else { -diff --git a/fs/namei.c b/fs/namei.c -index 3f2cc3c..137a7b8 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4066,7 +4066,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - return error; - } - -- error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); -+ error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, -+ flags); - if (error) - return error; - -@@ -4221,7 +4222,7 @@ retry_deleg: - goto exit5; - - error = security_path_rename(&oldnd.path, old_dentry, -- &newnd.path, new_dentry); -+ &newnd.path, new_dentry, flags); - if (error) - goto exit5; - error = vfs_rename(old_dir->d_inode, old_dentry, -diff --git a/include/linux/security.h b/include/linux/security.h -index 2fc42d1..6478ce3 100644 ---- a/include/linux/security.h -+++ b/include/linux/security.h -@@ -1793,7 +1793,8 @@ int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) - int security_inode_rmdir(struct inode *dir, struct dentry *dentry); - int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev); - int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry); -+ struct inode *new_dir, struct dentry *new_dentry, -+ unsigned int flags); - int security_inode_readlink(struct dentry *dentry); - int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); - int security_inode_permission(struct inode *inode, int mask); -@@ -2161,7 +2162,8 @@ static inline int security_inode_mknod(struct inode *dir, - static inline int security_inode_rename(struct inode *old_dir, - struct dentry *old_dentry, - struct inode *new_dir, -- struct dentry *new_dentry) -+ struct dentry *new_dentry, -+ unsigned int flags) - { - return 0; - } -@@ -2955,7 +2957,8 @@ int security_path_symlink(struct path *dir, struct dentry *dentry, - int security_path_link(struct dentry *old_dentry, struct path *new_dir, - struct dentry *new_dentry); - int security_path_rename(struct path *old_dir, struct dentry *old_dentry, -- struct path *new_dir, struct dentry *new_dentry); -+ struct path *new_dir, struct dentry *new_dentry, -+ unsigned int flags); - int security_path_chmod(struct path *path, umode_t mode); - int security_path_chown(struct path *path, kuid_t uid, kgid_t gid); - int security_path_chroot(struct path *path); -@@ -3003,7 +3006,8 @@ static inline int security_path_link(struct dentry *old_dentry, - static inline int security_path_rename(struct path *old_dir, - struct dentry *old_dentry, - struct path *new_dir, -- struct dentry *new_dentry) -+ struct dentry *new_dentry, -+ unsigned int flags) - { - return 0; - } -diff --git a/security/security.c b/security/security.c -index 919cad9..284fbc9 100644 ---- a/security/security.c -+++ b/security/security.c -@@ -433,7 +433,8 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, - } - - int security_path_rename(struct path *old_dir, struct dentry *old_dentry, -- struct path *new_dir, struct dentry *new_dentry) -+ struct path *new_dir, struct dentry *new_dentry, -+ unsigned int flags) - { - if (unlikely(IS_PRIVATE(old_dentry->d_inode) || - (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) -@@ -524,7 +525,8 @@ int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, - } - - int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ unsigned int flags) - { - if (unlikely(IS_PRIVATE(old_dentry->d_inode) || - (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) --- -2.7.4 - - -From c3ae3e8840e6b555949cda895d885ad91c5e3f24 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Tue, 1 Apr 2014 17:08:43 +0200 -Subject: [PATCH 06/73] vfs: add cross-rename - -If flags contain RENAME_EXCHANGE then exchange source and destination files. -There's no restriction on the type of the files; e.g. a directory can be -exchanged with a symlink. - -Signed-off-by: Miklos Szeredi -Reviewed-by: Jan Kara -Reviewed-by: J. Bruce Fields -(cherry picked from commit da1ce0670c14d8380e423a3239e562a1dc15fa9e) -Signed-off-by: Alex Shi ---- - fs/dcache.c | 50 ++++++++++++++++++----- - fs/namei.c | 104 +++++++++++++++++++++++++++++++++--------------- - include/linux/dcache.h | 1 + - include/uapi/linux/fs.h | 1 + - security/security.c | 16 ++++++++ - 5 files changed, 131 insertions(+), 41 deletions(-) - -diff --git a/fs/dcache.c b/fs/dcache.c -index 4366127..e3c5563 100644 ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -2481,12 +2481,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) - dentry->d_name.name = dentry->d_iname; - } else { - /* -- * Both are internal. Just copy target to dentry -+ * Both are internal. - */ -- memcpy(dentry->d_iname, target->d_name.name, -- target->d_name.len + 1); -- dentry->d_name.len = target->d_name.len; -- return; -+ unsigned int i; -+ BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); -+ for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { -+ swap(((long *) &dentry->d_iname)[i], -+ ((long *) &target->d_iname)[i]); -+ } - } - } - swap(dentry->d_name.len, target->d_name.len); -@@ -2543,13 +2545,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, - * __d_move - move a dentry - * @dentry: entry to move - * @target: new dentry -+ * @exchange: exchange the two dentries - * - * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. Caller must hold - * rename_lock, the i_mutex of the source and target directories, - * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). - */ --static void __d_move(struct dentry * dentry, struct dentry * target) -+static void __d_move(struct dentry *dentry, struct dentry *target, -+ bool exchange) - { - if (!dentry->d_inode) - printk(KERN_WARNING "VFS: moving negative dcache entry\n"); -@@ -2571,8 +2575,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) - __d_drop(dentry); - __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); - -- /* Unhash the target: dput() will then get rid of it */ -+ /* -+ * Unhash the target (d_delete() is not usable here). If exchanging -+ * the two dentries, then rehash onto the other's hash queue. -+ */ - __d_drop(target); -+ if (exchange) { -+ __d_rehash(target, -+ d_hash(dentry->d_parent, dentry->d_name.hash)); -+ } - - list_del(&dentry->d_u.d_child); - list_del(&target->d_u.d_child); -@@ -2599,6 +2610,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) - write_seqcount_end(&dentry->d_seq); - - dentry_unlock_parents_for_move(dentry, target); -+ if (exchange) -+ fsnotify_d_move(target); - spin_unlock(&target->d_lock); - fsnotify_d_move(dentry); - spin_unlock(&dentry->d_lock); -@@ -2616,11 +2629,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target) - void d_move(struct dentry *dentry, struct dentry *target) - { - write_seqlock(&rename_lock); -- __d_move(dentry, target); -+ __d_move(dentry, target, false); - write_sequnlock(&rename_lock); - } - EXPORT_SYMBOL(d_move); - -+/* -+ * d_exchange - exchange two dentries -+ * @dentry1: first dentry -+ * @dentry2: second dentry -+ */ -+void d_exchange(struct dentry *dentry1, struct dentry *dentry2) -+{ -+ write_seqlock(&rename_lock); -+ -+ WARN_ON(!dentry1->d_inode); -+ WARN_ON(!dentry2->d_inode); -+ WARN_ON(IS_ROOT(dentry1)); -+ WARN_ON(IS_ROOT(dentry2)); -+ -+ __d_move(dentry1, dentry2, true); -+ -+ write_sequnlock(&rename_lock); -+} -+ - /** - * d_ancestor - search for an ancestor - * @p1: ancestor dentry -@@ -2668,7 +2700,7 @@ static struct dentry *__d_unalias(struct inode *inode, - m2 = &alias->d_parent->d_inode->i_mutex; - out_unalias: - if (likely(!d_mountpoint(alias))) { -- __d_move(alias, dentry); -+ __d_move(alias, dentry, false); - ret = alias; - } - out_err: -diff --git a/fs/namei.c b/fs/namei.c -index 137a7b8..5e41971 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4035,6 +4035,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - const unsigned char *old_name; - struct inode *source = old_dentry->d_inode; - struct inode *target = new_dentry->d_inode; -+ bool new_is_dir = false; -+ unsigned max_links = new_dir->i_sb->s_max_links; - - if (source == target) - return 0; -@@ -4043,10 +4045,16 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - if (error) - return error; - -- if (!target) -+ if (!target) { - error = may_create(new_dir, new_dentry); -- else -- error = may_delete(new_dir, new_dentry, is_dir); -+ } else { -+ new_is_dir = d_is_dir(new_dentry); -+ -+ if (!(flags & RENAME_EXCHANGE)) -+ error = may_delete(new_dir, new_dentry, is_dir); -+ else -+ error = may_delete(new_dir, new_dentry, new_is_dir); -+ } - if (error) - return error; - -@@ -4060,10 +4068,17 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - * If we are going to change the parent - check write permissions, - * we'll need to flip '..'. - */ -- if (is_dir && new_dir != old_dir) { -- error = inode_permission(source, MAY_WRITE); -- if (error) -- return error; -+ if (new_dir != old_dir) { -+ if (is_dir) { -+ error = inode_permission(source, MAY_WRITE); -+ if (error) -+ return error; -+ } -+ if ((flags & RENAME_EXCHANGE) && new_is_dir) { -+ error = inode_permission(target, MAY_WRITE); -+ if (error) -+ return error; -+ } - } - - error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, -@@ -4073,7 +4088,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - - old_name = fsnotify_oldname_init(old_dentry->d_name.name); - dget(new_dentry); -- if (!is_dir) -+ if (!is_dir || (flags & RENAME_EXCHANGE)) - lock_two_nondirectories(source, target); - else if (target) - mutex_lock(&target->i_mutex); -@@ -4082,25 +4097,25 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) - goto out; - -- if (is_dir) { -- unsigned max_links = new_dir->i_sb->s_max_links; -- -+ if (max_links && new_dir != old_dir) { - error = -EMLINK; -- if (max_links && !target && new_dir != old_dir && -- new_dir->i_nlink >= max_links) -+ if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) - goto out; -- -- if (target) -- shrink_dcache_parent(new_dentry); -- } else { -+ if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && -+ old_dir->i_nlink >= max_links) -+ goto out; -+ } -+ if (is_dir && !(flags & RENAME_EXCHANGE) && target) -+ shrink_dcache_parent(new_dentry); -+ if (!is_dir) { - error = try_break_deleg(source, delegated_inode); - if (error) - goto out; -- if (target) { -- error = try_break_deleg(target, delegated_inode); -- if (error) -- goto out; -- } -+ } -+ if (target && !new_is_dir) { -+ error = try_break_deleg(target, delegated_inode); -+ if (error) -+ goto out; - } - if (!flags) { - error = old_dir->i_op->rename(old_dir, old_dentry, -@@ -4112,22 +4127,31 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - if (error) - goto out; - -- if (target) { -+ if (!(flags & RENAME_EXCHANGE) && target) { - if (is_dir) - target->i_flags |= S_DEAD; - dont_mount(new_dentry); - } -- if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) -- d_move(old_dentry, new_dentry); -+ if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { -+ if (!(flags & RENAME_EXCHANGE)) -+ d_move(old_dentry, new_dentry); -+ else -+ d_exchange(old_dentry, new_dentry); -+ } - out: -- if (!is_dir) -+ if (!is_dir || (flags & RENAME_EXCHANGE)) - unlock_two_nondirectories(source, target); - else if (target) - mutex_unlock(&target->i_mutex); - dput(new_dentry); -- if (!error) -+ if (!error) { - fsnotify_move(old_dir, new_dir, old_name, is_dir, -- target, old_dentry); -+ !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); -+ if (flags & RENAME_EXCHANGE) { -+ fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, -+ new_is_dir, NULL, new_dentry); -+ } -+ } - fsnotify_oldname_free(old_name); - - return error; -@@ -4147,7 +4171,10 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, - bool should_retry = false; - int error; - -- if (flags & ~RENAME_NOREPLACE) -+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) -+ return -EINVAL; -+ -+ if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) - return -EINVAL; - - retry: -@@ -4184,7 +4211,8 @@ retry: - - oldnd.flags &= ~LOOKUP_PARENT; - newnd.flags &= ~LOOKUP_PARENT; -- newnd.flags |= LOOKUP_RENAME_TARGET; -+ if (!(flags & RENAME_EXCHANGE)) -+ newnd.flags |= LOOKUP_RENAME_TARGET; - - retry_deleg: - trap = lock_rename(new_dir, old_dir); -@@ -4204,12 +4232,23 @@ retry_deleg: - error = -EEXIST; - if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) - goto exit5; -+ if (flags & RENAME_EXCHANGE) { -+ error = -ENOENT; -+ if (d_is_negative(new_dentry)) -+ goto exit5; -+ -+ if (!d_is_dir(new_dentry)) { -+ error = -ENOTDIR; -+ if (newnd.last.name[newnd.last.len]) -+ goto exit5; -+ } -+ } - /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!d_is_dir(old_dentry)) { - error = -ENOTDIR; - if (oldnd.last.name[oldnd.last.len]) - goto exit5; -- if (newnd.last.name[newnd.last.len]) -+ if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) - goto exit5; - } - /* source should not be ancestor of target */ -@@ -4217,7 +4256,8 @@ retry_deleg: - if (old_dentry == trap) - goto exit5; - /* target should not be an ancestor of source */ -- error = -ENOTEMPTY; -+ if (!(flags & RENAME_EXCHANGE)) -+ error = -ENOTEMPTY; - if (new_dentry == trap) - goto exit5; - -diff --git a/include/linux/dcache.h b/include/linux/dcache.h -index 3b50cac..3b9bfdb 100644 ---- a/include/linux/dcache.h -+++ b/include/linux/dcache.h -@@ -308,6 +308,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *); - - /* used for rename() and baskets */ - extern void d_move(struct dentry *, struct dentry *); -+extern void d_exchange(struct dentry *, struct dentry *); - extern struct dentry *d_ancestor(struct dentry *, struct dentry *); - - /* appendix may either be NULL or be used for transname suffixes */ -diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h -index 9250f4d..ca1a11b 100644 ---- a/include/uapi/linux/fs.h -+++ b/include/uapi/linux/fs.h -@@ -36,6 +36,7 @@ - #define SEEK_MAX SEEK_HOLE - - #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ -+#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ - - struct fstrim_range { - __u64 start; -diff --git a/security/security.c b/security/security.c -index 284fbc9..8b774f3 100644 ---- a/security/security.c -+++ b/security/security.c -@@ -439,6 +439,14 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, - if (unlikely(IS_PRIVATE(old_dentry->d_inode) || - (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) - return 0; -+ -+ if (flags & RENAME_EXCHANGE) { -+ int err = security_ops->path_rename(new_dir, new_dentry, -+ old_dir, old_dentry); -+ if (err) -+ return err; -+ } -+ - return security_ops->path_rename(old_dir, old_dentry, new_dir, - new_dentry); - } -@@ -531,6 +539,14 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, - if (unlikely(IS_PRIVATE(old_dentry->d_inode) || - (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) - return 0; -+ -+ if (flags & RENAME_EXCHANGE) { -+ int err = security_ops->inode_rename(new_dir, new_dentry, -+ old_dir, old_dentry); -+ if (err) -+ return err; -+ } -+ - return security_ops->inode_rename(old_dir, old_dentry, - new_dir, new_dentry); - } --- -2.7.4 - - -From a756aea51bb937b6a150c4d1cfff66824285491d Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Wed, 23 Jul 2014 15:15:30 +0200 -Subject: [PATCH 07/73] fs: call rename2 if exists - -Christoph Hellwig suggests: - -1) make vfs_rename call ->rename2 if it exists instead of ->rename -2) switch all filesystems that you're adding NOREPLACE support for to - use ->rename2 -3) see how many ->rename instances we'll have left after a few - iterations of 2. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Christoph Hellwig -Signed-off-by: Al Viro -(cherry picked from commit 7177a9c4b509eb357cc450256bc3cf39f1a1e639) -Signed-off-by: Alex Shi - - Conflicts: - fs/fuse/dir.c ---- - fs/ext4/namei.c | 1 - - fs/fuse/dir.c | 32 +++++++++++++++++++++++++++++++- - fs/namei.c | 5 +++-- - 3 files changed, 34 insertions(+), 4 deletions(-) - -diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c -index f0598d6..98de4b3 100644 ---- a/fs/ext4/namei.c -+++ b/fs/ext4/namei.c -@@ -3214,7 +3214,6 @@ const struct inode_operations ext4_dir_inode_operations = { - .rmdir = ext4_rmdir, - .mknod = ext4_mknod, - .tmpfile = ext4_tmpfile, -- .rename = ext4_rename, - .rename2 = ext4_rename2, - .setattr = ext4_setattr, - .setxattr = generic_setxattr, -diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c -index 342f0239..d53a7d9 100644 ---- a/fs/fuse/dir.c -+++ b/fs/fuse/dir.c -@@ -796,6 +796,36 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, - return err; - } - -+static int fuse_rename2(struct inode *olddir, struct dentry *oldent, -+ struct inode *newdir, struct dentry *newent, -+ unsigned int flags) -+{ -+ struct fuse_conn *fc = get_fuse_conn(olddir); -+ int err; -+ -+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) -+ return -EINVAL; -+ -+ if (flags) { -+ if (fc->no_rename2 || fc->minor < 23) -+ return -EINVAL; -+ -+ err = fuse_rename_common(olddir, oldent, newdir, newent, flags, -+ FUSE_RENAME2, -+ sizeof(struct fuse_rename2_in)); -+ if (err == -ENOSYS) { -+ fc->no_rename2 = 1; -+ err = -EINVAL; -+ } -+ } else { -+ err = fuse_rename_common(olddir, oldent, newdir, newent, 0, -+ FUSE_RENAME, -+ sizeof(struct fuse_rename_in)); -+ } -+ -+ return err; -+} -+ - static int fuse_link(struct dentry *entry, struct inode *newdir, - struct dentry *newent) - { -@@ -1882,7 +1912,7 @@ static const struct inode_operations fuse_dir_inode_operations = { - .symlink = fuse_symlink, - .unlink = fuse_unlink, - .rmdir = fuse_rmdir, -- .rename = fuse_rename, -+ .rename2 = fuse_rename2, - .link = fuse_link, - .setattr = fuse_setattr, - .create = fuse_create, -diff --git a/fs/namei.c b/fs/namei.c -index 5e41971..fa7dd2d 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4058,7 +4058,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - if (error) - return error; - -- if (!old_dir->i_op->rename) -+ if (!old_dir->i_op->rename && !old_dir->i_op->rename2) - return -EPERM; - - if (flags && !old_dir->i_op->rename2) -@@ -4117,10 +4117,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - if (error) - goto out; - } -- if (!flags) { -+ if (!old_dir->i_op->rename2) { - error = old_dir->i_op->rename(old_dir, old_dentry, - new_dir, new_dentry); - } else { -+ WARN_ON(old_dir->i_op->rename != NULL); - error = old_dir->i_op->rename2(old_dir, old_dentry, - new_dir, new_dentry, flags); - } --- -2.7.4 - - -From c5948f26c2de38f3b475d10cfb6695245f52b6ff Mon Sep 17 00:00:00 2001 -From: Al Viro -Date: Sat, 1 Feb 2014 04:43:32 -0500 -Subject: [PATCH 08/73] get rid of pointless checks for NULL ->i_op - -Signed-off-by: Al Viro -(cherry picked from commit 627bf81ac625f05060db033a0f3791521ad7bd79) -Signed-off-by: Alex Shi ---- - fs/cachefiles/bind.c | 1 - - fs/cachefiles/namei.c | 3 +-- - security/integrity/evm/evm_crypto.c | 2 +- - security/integrity/evm/evm_main.c | 2 +- - security/tomoyo/realpath.c | 4 ++-- - 5 files changed, 5 insertions(+), 7 deletions(-) - -diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c -index 622f469..5b99baf 100644 ---- a/fs/cachefiles/bind.c -+++ b/fs/cachefiles/bind.c -@@ -124,7 +124,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) - /* check parameters */ - ret = -EOPNOTSUPP; - if (!root->d_inode || -- !root->d_inode->i_op || - !root->d_inode->i_op->lookup || - !root->d_inode->i_op->mkdir || - !root->d_inode->i_op->setxattr || -diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c -index 6494d9f..c0a6817 100644 ---- a/fs/cachefiles/namei.c -+++ b/fs/cachefiles/namei.c -@@ -779,8 +779,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, - } - - ret = -EPERM; -- if (!subdir->d_inode->i_op || -- !subdir->d_inode->i_op->setxattr || -+ if (!subdir->d_inode->i_op->setxattr || - !subdir->d_inode->i_op->getxattr || - !subdir->d_inode->i_op->lookup || - !subdir->d_inode->i_op->mkdir || -diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c -index 3bab89e..e90ab0e 100644 ---- a/security/integrity/evm/evm_crypto.c -+++ b/security/integrity/evm/evm_crypto.c -@@ -137,7 +137,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, - int error; - int size; - -- if (!inode->i_op || !inode->i_op->getxattr) -+ if (!inode->i_op->getxattr) - return -EOPNOTSUPP; - desc = init_desc(type); - if (IS_ERR(desc)) -diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c -index 7e71e06..690cd63 100644 ---- a/security/integrity/evm/evm_main.c -+++ b/security/integrity/evm/evm_main.c -@@ -62,7 +62,7 @@ static int evm_find_protected_xattrs(struct dentry *dentry) - int error; - int count = 0; - -- if (!inode->i_op || !inode->i_op->getxattr) -+ if (!inode->i_op->getxattr) - return -EOPNOTSUPP; - - for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) { -diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c -index 80a09c3..a3386d1 100644 ---- a/security/tomoyo/realpath.c -+++ b/security/tomoyo/realpath.c -@@ -173,7 +173,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer, - * Use filesystem name if filesystem does not support rename() - * operation. - */ -- if (inode->i_op && !inode->i_op->rename) -+ if (!inode->i_op->rename) - goto prepend_filesystem_name; - } - /* Prepend device name. */ -@@ -282,7 +282,7 @@ char *tomoyo_realpath_from_path(struct path *path) - * Get local name for filesystems without rename() operation - * or dentry without vfsmount. - */ -- if (!path->mnt || (inode->i_op && !inode->i_op->rename)) -+ if (!path->mnt || !inode->i_op->rename) - pos = tomoyo_get_local_path(path->dentry, buf, - buf_len - 1); - /* Get absolute name for the rest. */ --- -2.7.4 - - -From bb9934859afca84e19c3213060f2f2eeaff5d6a1 Mon Sep 17 00:00:00 2001 -From: Tetsuo Handa -Date: Wed, 20 Aug 2014 14:14:04 +0900 -Subject: [PATCH 09/73] tomoyo: Fix pathname calculation breakage. - -Commit 7177a9c4b509 ("fs: call rename2 if exists") changed -"struct inode_operations"->rename == NULL if -"struct inode_operations"->rename2 != NULL . - -TOMOYO needs to check for both ->rename and ->rename2 , or -a system on (e.g.) ext4 filesystem won't boot. - -Signed-off-by: Tetsuo Handa -Signed-off-by: Serge E. Hallyn -(cherry picked from commit 8fe7a268b18ebc89203c766b020b9e32f1cfeebf) -Signed-off-by: Alex Shi ---- - security/tomoyo/realpath.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c -index a3386d1..bed745c 100644 ---- a/security/tomoyo/realpath.c -+++ b/security/tomoyo/realpath.c -@@ -173,7 +173,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer, - * Use filesystem name if filesystem does not support rename() - * operation. - */ -- if (!inode->i_op->rename) -+ if (!inode->i_op->rename && !inode->i_op->rename2) - goto prepend_filesystem_name; - } - /* Prepend device name. */ -@@ -282,7 +282,8 @@ char *tomoyo_realpath_from_path(struct path *path) - * Get local name for filesystems without rename() operation - * or dentry without vfsmount. - */ -- if (!path->mnt || !inode->i_op->rename) -+ if (!path->mnt || -+ (!inode->i_op->rename && !inode->i_op->rename2)) - pos = tomoyo_get_local_path(path->dentry, buf, - buf_len - 1); - /* Get absolute name for the rest. */ --- -2.7.4 - - -From 0750d56323044df86224bb9aa3d0830bada1cb81 Mon Sep 17 00:00:00 2001 -From: Al Viro -Date: Fri, 14 Mar 2014 13:42:45 -0400 -Subject: [PATCH 10/73] new helper: readlink_copy() - -Signed-off-by: Al Viro -(cherry picked from commit 5d826c847b34de6415b4f1becd88a57ff619af50) -Signed-off-by: Alex Shi - - Conflicts: - fs/namei.c ---- - fs/namei.c | 12 ++++-------- - fs/proc/namespaces.c | 14 ++++---------- - fs/proc/self.c | 2 +- - fs/xfs/xfs_ioctl.c | 28 +--------------------------- - include/linux/fs.h | 2 +- - 5 files changed, 11 insertions(+), 47 deletions(-) - -diff --git a/fs/namei.c b/fs/namei.c -index fa7dd2d..ef28dcd 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4309,11 +4309,9 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna - return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); - } - --int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) -+int readlink_copy(char __user *buffer, int buflen, const char *link) - { -- int len; -- -- len = PTR_ERR(link); -+ int len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; - -@@ -4342,7 +4340,7 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) - if (IS_ERR(cookie)) - return PTR_ERR(cookie); - -- res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); -+ res = readlink_copy(buffer, buflen, nd_get_link(&nd)); - if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, &nd, cookie); - return res; -@@ -4366,8 +4364,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage) - int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) - { - struct page *page = NULL; -- char *s = page_getlink(dentry, &page); -- int res = vfs_readlink(dentry,buffer,buflen,s); -+ int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page)); - if (page) { - kunmap(page); - page_cache_release(page); -@@ -4463,7 +4460,6 @@ EXPORT_SYMBOL(vfs_link); - EXPORT_SYMBOL(vfs_mkdir); - EXPORT_SYMBOL(vfs_mknod); - EXPORT_SYMBOL(generic_permission); --EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_rename); - EXPORT_SYMBOL(vfs_rmdir); - EXPORT_SYMBOL(vfs_symlink); -diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c -index 9ae46b8..8902609 100644 ---- a/fs/proc/namespaces.c -+++ b/fs/proc/namespaces.c -@@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl - struct task_struct *task; - void *ns; - char name[50]; -- int len = -EACCES; -+ int res = -EACCES; - - task = get_proc_task(inode); - if (!task) -@@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out_put_task; - -- len = -ENOENT; -+ res = -ENOENT; - ns = ns_ops->get(task); - if (!ns) - goto out_put_task; - - snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); -- len = strlen(name); -- -- if (len > buflen) -- len = buflen; -- if (copy_to_user(buffer, name, len)) -- len = -EFAULT; -- -+ res = readlink_copy(buffer, buflen, name); - ns_ops->put(ns); - out_put_task: - put_task_struct(task); - out: -- return len; -+ return res; - } - - static const struct inode_operations proc_ns_link_inode_operations = { -diff --git a/fs/proc/self.c b/fs/proc/self.c -index ffeb202..4348bb8 100644 ---- a/fs/proc/self.c -+++ b/fs/proc/self.c -@@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, - if (!tgid) - return -ENOENT; - sprintf(tmp, "%d", tgid); -- return vfs_readlink(dentry,buffer,buflen,tmp); -+ return readlink_copy(buffer, buflen, tmp); - } - - static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) -diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c -index 78e62cc..6152cbe 100644 ---- a/fs/xfs/xfs_ioctl.c -+++ b/fs/xfs/xfs_ioctl.c -@@ -271,32 +271,6 @@ xfs_open_by_handle( - return error; - } - --/* -- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's -- * unused first argument. -- */ --STATIC int --do_readlink( -- char __user *buffer, -- int buflen, -- const char *link) --{ -- int len; -- -- len = PTR_ERR(link); -- if (IS_ERR(link)) -- goto out; -- -- len = strlen(link); -- if (len > (unsigned) buflen) -- len = buflen; -- if (copy_to_user(buffer, link, len)) -- len = -EFAULT; -- out: -- return len; --} -- -- - int - xfs_readlink_by_handle( - struct file *parfilp, -@@ -334,7 +308,7 @@ xfs_readlink_by_handle( - error = -xfs_readlink(XFS_I(dentry->d_inode), link); - if (error) - goto out_kfree; -- error = do_readlink(hreq->ohandle, olen, link); -+ error = readlink_copy(hreq->ohandle, olen, link); - if (error) - goto out_kfree; - -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 3b3670e..29170bf 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -2566,7 +2566,7 @@ extern const struct file_operations generic_ro_fops; - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) - --extern int vfs_readlink(struct dentry *, char __user *, int, const char *); -+extern int readlink_copy(char __user *, int, const char *); - extern int page_readlink(struct dentry *, char __user *, int); - extern void *page_follow_link_light(struct dentry *, struct nameidata *); - extern void page_put_link(struct dentry *, struct nameidata *, void *); --- -2.7.4 - - -From 6c1a7bfb6a5ac8cf057fd191acd8f28b23c60dd4 Mon Sep 17 00:00:00 2001 -From: Al Viro -Date: Fri, 14 Mar 2014 12:54:25 -0400 -Subject: [PATCH 11/73] lustre: generic_readlink() is just fine there, TYVM... - -Signed-off-by: Al Viro -(cherry picked from commit 4efcc9ffcd4fc53f1f7de539842cdffa1f8e5ecc) -Signed-off-by: Alex Shi ---- - drivers/staging/lustre/lustre/llite/symlink.c | 23 +---------------------- - 1 file changed, 1 insertion(+), 22 deletions(-) - -diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c -index ab06891..80d48b5 100644 ---- a/drivers/staging/lustre/lustre/llite/symlink.c -+++ b/drivers/staging/lustre/lustre/llite/symlink.c -@@ -115,27 +115,6 @@ failed: - return rc; - } - --static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) --{ -- struct inode *inode = dentry->d_inode; -- struct ptlrpc_request *request; -- char *symname; -- int rc; -- -- CDEBUG(D_VFSTRACE, "VFS Op\n"); -- -- ll_inode_size_lock(inode); -- rc = ll_readlink_internal(inode, &request, &symname); -- if (rc) -- GOTO(out, rc); -- -- rc = vfs_readlink(dentry, buffer, buflen, symname); -- out: -- ptlrpc_req_finished(request); -- ll_inode_size_unlock(inode); -- return rc; --} -- - static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd) - { - struct inode *inode = dentry->d_inode; -@@ -175,7 +154,7 @@ static void ll_put_link(struct dentry *dentry, struct nameidata *nd, void *cooki - } - - struct inode_operations ll_fast_symlink_inode_operations = { -- .readlink = ll_readlink, -+ .readlink = generic_readlink, - .setattr = ll_setattr, - .follow_link = ll_follow_link, - .put_link = ll_put_link, --- -2.7.4 - - -From 71732036259eac84c4b4add5b7503a0f01905bc4 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:35 +0200 -Subject: [PATCH 12/73] vfs: add i_op->dentry_open() - -Add a new inode operation i_op->dentry_open(). This is for stacked filesystems -that want to return a struct file from a different filesystem. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 4aa7c6346be395bdf776f82bbb2e3e2bc60bdd2b) -Signed-off-by: Alex Shi ---- - Documentation/filesystems/Locking | 2 ++ - Documentation/filesystems/vfs.txt | 7 +++++++ - fs/namei.c | 9 ++++++--- - fs/open.c | 23 +++++++++++++++++++++-- - include/linux/fs.h | 4 ++++ - 5 files changed, 40 insertions(+), 5 deletions(-) - -diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking -index f424e0e..821c8f4 100644 ---- a/Documentation/filesystems/Locking -+++ b/Documentation/filesystems/Locking -@@ -67,6 +67,7 @@ prototypes: - struct file *, unsigned open_flag, - umode_t create_mode, int *opened); - int (*tmpfile) (struct inode *, struct dentry *, umode_t); -+ int (*dentry_open)(struct dentry *, struct file *, const struct cred *); - - locking rules: - all may block -@@ -96,6 +97,7 @@ fiemap: no - update_time: no - atomic_open: yes - tmpfile: no -+dentry_open: no - - Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on - victim. -diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt -index 94eb862..5d06f2a 100644 ---- a/Documentation/filesystems/vfs.txt -+++ b/Documentation/filesystems/vfs.txt -@@ -364,6 +364,7 @@ struct inode_operations { - int (*atomic_open)(struct inode *, struct dentry *, struct file *, - unsigned open_flag, umode_t create_mode, int *opened); - int (*tmpfile) (struct inode *, struct dentry *, umode_t); -+ int (*dentry_open)(struct dentry *, struct file *, const struct cred *); - }; - - Again, all methods are called without any locks being held, unless -@@ -697,6 +698,12 @@ struct address_space_operations { - but instead uses bmap to find out where the blocks in the file - are and uses those addresses directly. - -+ dentry_open: *WARNING: probably going away soon, do not use!* This is an -+ alternative to f_op->open(), the difference is that this method may open -+ a file not necessarily originating from the same filesystem as the one -+ i_op->open() was called on. It may be useful for stacking filesystems -+ which want to allow native I/O directly on underlying files. -+ - - invalidatepage: If a page has PagePrivate set, then invalidatepage - will be called when part or all of the page is to be removed -diff --git a/fs/namei.c b/fs/namei.c -index ef28dcd..b987228 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -3037,9 +3037,12 @@ finish_open_created: - error = may_open(&nd->path, acc_mode, open_flag); - if (error) - goto out; -- file->f_path.mnt = nd->path.mnt; -- error = finish_open(file, nd->path.dentry, NULL, opened); -- if (error) { -+ -+ BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ -+ error = vfs_open(&nd->path, file, current_cred()); -+ if (!error) { -+ *opened |= FILE_OPENED; -+ } else { - if (error == -EOPENSTALE) - goto stale_open; - goto out; -diff --git a/fs/open.c b/fs/open.c -index 2ed7325..c92c6ef 100644 ---- a/fs/open.c -+++ b/fs/open.c -@@ -822,8 +822,7 @@ struct file *dentry_open(const struct path *path, int flags, - f = get_empty_filp(); - if (!IS_ERR(f)) { - f->f_flags = flags; -- f->f_path = *path; -- error = do_dentry_open(f, NULL, cred); -+ error = vfs_open(path, f, cred); - if (!error) { - /* from now on we need fput() to dispose of f */ - error = open_check_o_direct(f); -@@ -840,6 +839,26 @@ struct file *dentry_open(const struct path *path, int flags, - } - EXPORT_SYMBOL(dentry_open); - -+/** -+ * vfs_open - open the file at the given path -+ * @path: path to open -+ * @filp: newly allocated file with f_flag initialized -+ * @cred: credentials to use -+ */ -+int vfs_open(const struct path *path, struct file *filp, -+ const struct cred *cred) -+{ -+ struct inode *inode = path->dentry->d_inode; -+ -+ if (inode->i_op->dentry_open) -+ return inode->i_op->dentry_open(path->dentry, filp, cred); -+ else { -+ filp->f_path = *path; -+ return do_dentry_open(filp, NULL, cred); -+ } -+} -+EXPORT_SYMBOL(vfs_open); -+ - static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) - { - int lookup_flags = 0; -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 29170bf..ce38acf 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -1587,6 +1587,9 @@ struct inode_operations { - umode_t create_mode, int *opened); - int (*tmpfile) (struct inode *, struct dentry *, umode_t); - int (*set_acl)(struct inode *, struct posix_acl *, int); -+ -+ /* WARNING: probably going away soon, do not use! */ -+ int (*dentry_open)(struct dentry *, struct file *, const struct cred *); - } ____cacheline_aligned; - - ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, -@@ -2081,6 +2084,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); - extern struct file *filp_open(const char *, int, umode_t); - extern struct file *file_open_root(struct dentry *, struct vfsmount *, - const char *, int); -+extern int vfs_open(const struct path *, struct file *, const struct cred *); - extern struct file * dentry_open(const struct path *, int, const struct cred *); - extern int filp_close(struct file *, fl_owner_t id); - --- -2.7.4 - - -From 5c45fc6a9c14be17ae4d203d35fe82c6b4575de4 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:35 +0200 -Subject: [PATCH 13/73] vfs: export do_splice_direct() to modules - -Export do_splice_direct() to modules. Needed by overlay filesystem. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 1c118596a7682912106c80007102ce0184c77780) -Signed-off-by: Alex Shi ---- - fs/internal.h | 6 ------ - fs/splice.c | 1 + - include/linux/fs.h | 3 +++ - 3 files changed, 4 insertions(+), 6 deletions(-) - -diff --git a/fs/internal.h b/fs/internal.h -index 4657424..873cdf5 100644 ---- a/fs/internal.h -+++ b/fs/internal.h -@@ -135,12 +135,6 @@ extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); - extern int rw_verify_area(int, struct file *, const loff_t *, size_t); - - /* -- * splice.c -- */ --extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, -- loff_t *opos, size_t len, unsigned int flags); -- --/* - * pipe.c - */ - extern const struct file_operations pipefifo_fops; -diff --git a/fs/splice.c b/fs/splice.c -index 12028fa..ffb92b9 100644 ---- a/fs/splice.c -+++ b/fs/splice.c -@@ -1327,6 +1327,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - - return ret; - } -+EXPORT_SYMBOL(do_splice_direct); - - static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, - struct pipe_inode_info *opipe, -diff --git a/include/linux/fs.h b/include/linux/fs.h -index ce38acf..5bcbbf3 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -2506,6 +2506,9 @@ extern ssize_t generic_file_splice_write(struct pipe_inode_info *, - struct file *, loff_t *, size_t, unsigned int); - extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, - struct file *out, loff_t *, size_t len, unsigned int flags); -+extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, -+ loff_t *opos, size_t len, unsigned int flags); -+ - - extern void - file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); --- -2.7.4 - - -From 96bd0c53e4204392144978e5a40f4699570be22b Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:35 +0200 -Subject: [PATCH 14/73] vfs: export __inode_permission() to modules - -We need to be able to check inode permissions (but not filesystem implied -permissions) for stackable filesystems. Expose this interface for overlayfs. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit bd5d08569cc379f8366663a61558a9ce17c2e460) -Signed-off-by: Alex Shi ---- - fs/internal.h | 1 - - fs/namei.c | 1 + - include/linux/fs.h | 1 + - 3 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/internal.h b/fs/internal.h -index 873cdf5..dd41b12 100644 ---- a/fs/internal.h -+++ b/fs/internal.h -@@ -42,7 +42,6 @@ extern void __init chrdev_init(void); - /* - * namei.c - */ --extern int __inode_permission(struct inode *, int); - extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); - extern int vfs_path_lookup(struct dentry *, struct vfsmount *, - const char *, unsigned int, struct path *); -diff --git a/fs/namei.c b/fs/namei.c -index b987228..56b30af 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -415,6 +415,7 @@ int __inode_permission(struct inode *inode, int mask) - - return security_inode_permission(inode, mask); - } -+EXPORT_SYMBOL(__inode_permission); - - /** - * sb_permission - Check superblock-level permissions -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 5bcbbf3..82c78d6 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -2298,6 +2298,7 @@ extern sector_t bmap(struct inode *, sector_t); - #endif - extern int notify_change(struct dentry *, struct iattr *, struct inode **); - extern int inode_permission(struct inode *, int); -+extern int __inode_permission(struct inode *, int); - extern int generic_permission(struct inode *, int); - - static inline bool execute_ok(struct inode *inode) --- -2.7.4 - - -From 574c7ef9787b3b3d0513830e40323c34c2f53a39 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:36 +0200 -Subject: [PATCH 15/73] vfs: introduce clone_private_mount() - -Overlayfs needs a private clone of the mount, so create a function for -this and export to modules. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit c771d683a62e5d36bc46036f5c07f4f5bb7dda61) -Signed-off-by: Alex Shi ---- - fs/namespace.c | 27 +++++++++++++++++++++++++++ - include/linux/mount.h | 3 +++ - 2 files changed, 30 insertions(+) - -diff --git a/fs/namespace.c b/fs/namespace.c -index 039f380..c6533ce 100644 ---- a/fs/namespace.c -+++ b/fs/namespace.c -@@ -1586,6 +1586,33 @@ void drop_collected_mounts(struct vfsmount *mnt) - namespace_unlock(); - } - -+/** -+ * clone_private_mount - create a private clone of a path -+ * -+ * This creates a new vfsmount, which will be the clone of @path. The new will -+ * not be attached anywhere in the namespace and will be private (i.e. changes -+ * to the originating mount won't be propagated into this). -+ * -+ * Release with mntput(). -+ */ -+struct vfsmount *clone_private_mount(struct path *path) -+{ -+ struct mount *old_mnt = real_mount(path->mnt); -+ struct mount *new_mnt; -+ -+ if (IS_MNT_UNBINDABLE(old_mnt)) -+ return ERR_PTR(-EINVAL); -+ -+ down_read(&namespace_sem); -+ new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); -+ up_read(&namespace_sem); -+ if (IS_ERR(new_mnt)) -+ return ERR_CAST(new_mnt); -+ -+ return &new_mnt->mnt; -+} -+EXPORT_SYMBOL_GPL(clone_private_mount); -+ - int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, - struct vfsmount *root) - { -diff --git a/include/linux/mount.h b/include/linux/mount.h -index b0c1e65..fff78cb 100644 ---- a/include/linux/mount.h -+++ b/include/linux/mount.h -@@ -81,6 +81,9 @@ extern void mnt_pin(struct vfsmount *mnt); - extern void mnt_unpin(struct vfsmount *mnt); - extern int __mnt_is_readonly(struct vfsmount *mnt); - -+struct path; -+extern struct vfsmount *clone_private_mount(struct path *path); -+ - struct file_system_type; - extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, - int flags, const char *name, --- -2.7.4 - - -From ae311653282de97e927ff8c961a79be3ab259cb1 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:36 +0200 -Subject: [PATCH 16/73] vfs: export check_sticky() - -It's already duplicated in btrfs and about to be used in overlayfs too. - -Move the sticky bit check to an inline helper and call the out-of-line -helper only in the unlikly case of the sticky bit being set. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit cbdf35bcb833bfd00f0925d7a9a33a21f41ea582) -Signed-off-by: Alex Shi ---- - fs/btrfs/ioctl.c | 20 +------------------- - fs/namei.c | 9 ++------- - include/linux/fs.h | 9 +++++++++ - 3 files changed, 12 insertions(+), 26 deletions(-) - -diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c -index 0b72006..96b95e6 100644 ---- a/fs/btrfs/ioctl.c -+++ b/fs/btrfs/ioctl.c -@@ -682,23 +682,6 @@ out: - return ret; - } - --/* copy of check_sticky in fs/namei.c() --* It's inline, so penalty for filesystems that don't use sticky bit is --* minimal. --*/ --static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) --{ -- kuid_t fsuid = current_fsuid(); -- -- if (!(dir->i_mode & S_ISVTX)) -- return 0; -- if (uid_eq(inode->i_uid, fsuid)) -- return 0; -- if (uid_eq(dir->i_uid, fsuid)) -- return 0; -- return !capable(CAP_FOWNER); --} -- - /* copy of may_delete in fs/namei.c() - * Check whether we can remove a link victim from directory dir, check - * whether the type of victim is right. -@@ -734,8 +717,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) - return error; - if (IS_APPEND(dir)) - return -EPERM; -- if (btrfs_check_sticky(dir, victim->d_inode)|| -- IS_APPEND(victim->d_inode)|| -+ if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) || - IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) - return -EPERM; - if (isdir) { -diff --git a/fs/namei.c b/fs/namei.c -index 56b30af..1fd5d75 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -2360,22 +2360,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path, - } - EXPORT_SYMBOL(kern_path_mountpoint); - --/* -- * It's inline, so penalty for filesystems that don't use sticky bit is -- * minimal. -- */ --static inline int check_sticky(struct inode *dir, struct inode *inode) -+int __check_sticky(struct inode *dir, struct inode *inode) - { - kuid_t fsuid = current_fsuid(); - -- if (!(dir->i_mode & S_ISVTX)) -- return 0; - if (uid_eq(inode->i_uid, fsuid)) - return 0; - if (uid_eq(dir->i_uid, fsuid)) - return 0; - return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); - } -+EXPORT_SYMBOL(__check_sticky); - - /* - * Check whether we can remove a link victim from directory dir, check -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 82c78d6..ddb9ab5 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -2300,6 +2300,7 @@ extern int notify_change(struct dentry *, struct iattr *, struct inode **); - extern int inode_permission(struct inode *, int); - extern int __inode_permission(struct inode *, int); - extern int generic_permission(struct inode *, int); -+extern int __check_sticky(struct inode *dir, struct inode *inode); - - static inline bool execute_ok(struct inode *inode) - { -@@ -2787,6 +2788,14 @@ static inline int is_sxid(umode_t mode) - return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); - } - -+static inline int check_sticky(struct inode *dir, struct inode *inode) -+{ -+ if (!(dir->i_mode & S_ISVTX)) -+ return 0; -+ -+ return __check_sticky(dir, inode); -+} -+ - static inline void inode_has_no_xattr(struct inode *inode) - { - if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) --- -2.7.4 - - -From 9354e064adb1c963fdec1d38dc43d48dee394e1e Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:36 +0200 -Subject: [PATCH 17/73] vfs: add whiteout support - -Whiteout isn't actually a new file type, but is represented as a char -device (Linus's idea) with 0/0 device number. - -This has several advantages compared to introducing a new whiteout file -type: - - - no userspace API changes (e.g. trivial to make backups of upper layer - filesystem, without losing whiteouts) - - - no fs image format changes (you can boot an old kernel/fsck without - whiteout support and things won't break) - - - implementation is trivial - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 787fb6bc9682ec7c05fb5d9561b57100fbc1cc41) -Signed-off-by: Alex Shi ---- - fs/namei.c | 14 ++++++++++++++ - include/linux/fs.h | 11 +++++++++++ - 2 files changed, 25 insertions(+) - -diff --git a/fs/namei.c b/fs/namei.c -index 1fd5d75..9a4443b 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4308,6 +4308,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna - return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); - } - -+int vfs_whiteout(struct inode *dir, struct dentry *dentry) -+{ -+ int error = may_create(dir, dentry); -+ if (error) -+ return error; -+ -+ if (!dir->i_op->mknod) -+ return -EPERM; -+ -+ return dir->i_op->mknod(dir, dentry, -+ S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); -+} -+EXPORT_SYMBOL(vfs_whiteout); -+ - int readlink_copy(char __user *buffer, int buflen, const char *link) - { - int len = PTR_ERR(link); -diff --git a/include/linux/fs.h b/include/linux/fs.h -index ddb9ab5..18492c8 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -218,6 +218,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, - #define ATTR_TIMES_SET (1 << 16) - - /* -+ * Whiteout is represented by a char device. The following constants define the -+ * mode and device number to use. -+ */ -+#define WHITEOUT_MODE 0 -+#define WHITEOUT_DEV 0 -+ -+/* - * This is the Inode Attributes structure, used for notify_change(). It - * uses the above definitions as flags, to know which values have changed. - * Also, in this manner, a Filesystem can look at only the values it cares -@@ -1461,6 +1468,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); - extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); -+extern int vfs_whiteout(struct inode *, struct dentry *); - - /* - * VFS dentry helper functions. -@@ -1687,6 +1695,9 @@ struct super_operations { - #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) - #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) - -+#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ -+ (inode)->i_rdev == WHITEOUT_DEV) -+ - /* - * Inode state bits. Protected by inode->i_lock - * --- -2.7.4 - - -From 0e98d78deaf317c6524acf9c57eecdefbfdbe444 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:37 +0200 -Subject: [PATCH 18/73] vfs: add RENAME_WHITEOUT - -This adds a new RENAME_WHITEOUT flag. This flag makes rename() create a -whiteout of source. The whiteout creation is atomic relative to the -rename. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 0d7a855526dd672e114aff2ac22b60fc6f155b08) -Signed-off-by: Alex Shi ---- - fs/namei.c | 8 ++++++-- - include/uapi/linux/fs.h | 1 + - 2 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/fs/namei.c b/fs/namei.c -index 9a4443b..a047265e 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4171,12 +4171,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, - bool should_retry = false; - int error; - -- if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) -+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) - return -EINVAL; - -- if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) -+ if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && -+ (flags & RENAME_EXCHANGE)) - return -EINVAL; - -+ if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) -+ return -EPERM; -+ - retry: - from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); - if (IS_ERR(from)) { -diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h -index ca1a11b..3735fa0 100644 ---- a/include/uapi/linux/fs.h -+++ b/include/uapi/linux/fs.h -@@ -37,6 +37,7 @@ - - #define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ - #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ -+#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ - - struct fstrim_range { - __u64 start; --- -2.7.4 - - -From c5341097e1a69ba7f5cbc938b927489ad3bef1ee Mon Sep 17 00:00:00 2001 -From: "Paul E. McKenney" -Date: Mon, 27 Oct 2014 21:11:27 -0700 -Subject: [PATCH 19/73] rcu: Provide counterpart to rcu_dereference() for - non-RCU situations - -Although rcu_dereference() and friends can be used in situations where -object lifetimes are being managed by something other than RCU, the -resulting sparse and lockdep-RCU noise can be annoying. This commit -therefore supplies a lockless_dereference(), which provides the -protection for dereferences without the RCU-related debugging noise. - -Reported-by: Al Viro -Signed-off-by: Paul E. McKenney -Signed-off-by: Al Viro -(cherry picked from commit 54ef6df3f3f1353d99c80c437259d317b2cd1cbd) -Signed-off-by: Alex Shi ---- - include/linux/rcupdate.h | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h -index 72bf3a0..a9a98ff 100644 ---- a/include/linux/rcupdate.h -+++ b/include/linux/rcupdate.h -@@ -555,6 +555,21 @@ static inline void rcu_preempt_sleep_check(void) - #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) - - /** -+ * lockless_dereference() - safely load a pointer for later dereference -+ * @p: The pointer to load -+ * -+ * Similar to rcu_dereference(), but for situations where the pointed-to -+ * object's lifetime is managed by something other than RCU. That -+ * "something other" might be reference counting or simple immortality. -+ */ -+#define lockless_dereference(p) \ -+({ \ -+ typeof(p) _________p1 = ACCESS_ONCE(p); \ -+ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ -+ (_________p1); \ -+}) -+ -+/** - * rcu_assign_pointer() - assign to RCU-protected pointer - * @p: pointer to assign to - * @v: value to assign (publish) --- -2.7.4 - - -From 5587227cc0f683fe4732c0a6813f96154254e96b Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:38 +0200 -Subject: [PATCH 20/73] overlay filesystem - -Overlayfs allows one, usually read-write, directory tree to be -overlaid onto another, read-only directory tree. All modifications -go to the upper, writable layer. - -This type of mechanism is most often used for live CDs but there's a -wide variety of other uses. - -The implementation differs from other "union filesystem" -implementations in that after a file is opened all operations go -directly to the underlying, lower or upper, filesystems. This -simplifies the implementation and allows native performance in these -cases. - -The dentry tree is duplicated from the underlying filesystems, this -enables fast cached lookups without adding special support into the -VFS. This uses slightly more memory than union mounts, but dentries -are relatively small. - -Currently inodes are duplicated as well, but it is a possible -optimization to share inodes for non-directories. - -Opening non directories results in the open forwarded to the -underlying filesystem. This makes the behavior very similar to union -mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file -descriptors). - -Usage: - - mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay - -The following cotributions have been folded into this patch: - -Neil Brown : - - minimal remount support - - use correct seek function for directories - - initialise is_real before use - - rename ovl_fill_cache to ovl_dir_read - -Felix Fietkau : - - fix a deadlock in ovl_dir_read_merged - - fix a deadlock in ovl_remove_whiteouts - -Erez Zadok - - fix cleanup after WARN_ON - -Sedat Dilek - - fix up permission to confirm to new API - -Robin Dong - - fix possible leak in ovl_new_inode - - create new inode in ovl_link - -Andy Whitcroft - - switch to __inode_permission() - - copy up i_uid/i_gid from the underlying inode - -AV: - - ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits - - ovl_clear_empty() - one failure exit forgetting to do unlock_rename(), - lack of check for udir being the parent of upper, dropping and regaining - the lock on udir (which would require _another_ check for parent being - right). - - bogus d_drop() in copyup and rename [fix from your mail] - - copyup/remove and copyup/rename races [fix from your mail] - - ovl_dir_fsync() leaving ERR_PTR() in ->realfile - - ovl_entry_free() is pointless - it's just a kfree_rcu() - - fold ovl_do_lookup() into ovl_lookup() - - manually assigning ->d_op is wrong. Just use ->s_d_op. - [patches picked from Miklos]: - * copyup/remove and copyup/rename races - * bogus d_drop() in copyup and rename - -Also thanks to the following people for testing and reporting bugs: - - Jordi Pujol - Andy Whitcroft - Michal Suchanek - Felix Fietkau - Erez Zadok - Randy Dunlap - -Signed-off-by: Miklos Szeredi -(cherry picked from commit e9be9d5e76e34872f0c37d72e25bc27fe9e2c54c) -Signed-off-by: Alex Shi ---- - fs/Kconfig | 1 + - fs/Makefile | 1 + - fs/overlayfs/Kconfig | 10 + - fs/overlayfs/Makefile | 7 + - fs/overlayfs/copy_up.c | 414 +++++++++++++++++++++ - fs/overlayfs/dir.c | 921 +++++++++++++++++++++++++++++++++++++++++++++++ - fs/overlayfs/inode.c | 425 ++++++++++++++++++++++ - fs/overlayfs/overlayfs.h | 191 ++++++++++ - fs/overlayfs/readdir.c | 587 ++++++++++++++++++++++++++++++ - fs/overlayfs/super.c | 727 +++++++++++++++++++++++++++++++++++++ - 10 files changed, 3284 insertions(+) - create mode 100644 fs/overlayfs/Kconfig - create mode 100644 fs/overlayfs/Makefile - create mode 100644 fs/overlayfs/copy_up.c - create mode 100644 fs/overlayfs/dir.c - create mode 100644 fs/overlayfs/inode.c - create mode 100644 fs/overlayfs/overlayfs.h - create mode 100644 fs/overlayfs/readdir.c - create mode 100644 fs/overlayfs/super.c - -diff --git a/fs/Kconfig b/fs/Kconfig -index 7385e54..6b40fd8 100644 ---- a/fs/Kconfig -+++ b/fs/Kconfig -@@ -67,6 +67,7 @@ source "fs/quota/Kconfig" - - source "fs/autofs4/Kconfig" - source "fs/fuse/Kconfig" -+source "fs/overlayfs/Kconfig" - - menu "Caches" - -diff --git a/fs/Makefile b/fs/Makefile -index 47ac07b..5eb9caf 100644 ---- a/fs/Makefile -+++ b/fs/Makefile -@@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ - obj-$(CONFIG_AUTOFS4_FS) += autofs4/ - obj-$(CONFIG_ADFS_FS) += adfs/ - obj-$(CONFIG_FUSE_FS) += fuse/ -+obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ - obj-$(CONFIG_UDF_FS) += udf/ - obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ - obj-$(CONFIG_OMFS_FS) += omfs/ -diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig -new file mode 100644 -index 0000000..e601259 ---- /dev/null -+++ b/fs/overlayfs/Kconfig -@@ -0,0 +1,10 @@ -+config OVERLAYFS_FS -+ tristate "Overlay filesystem support" -+ help -+ An overlay filesystem combines two filesystems - an 'upper' filesystem -+ and a 'lower' filesystem. When a name exists in both filesystems, the -+ object in the 'upper' filesystem is visible while the object in the -+ 'lower' filesystem is either hidden or, in the case of directories, -+ merged with the 'upper' object. -+ -+ For more information see Documentation/filesystems/overlayfs.txt -diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile -new file mode 100644 -index 0000000..8f91889 ---- /dev/null -+++ b/fs/overlayfs/Makefile -@@ -0,0 +1,7 @@ -+# -+# Makefile for the overlay filesystem. -+# -+ -+obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o -+ -+overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o -diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c -new file mode 100644 -index 0000000..ea10a87 ---- /dev/null -+++ b/fs/overlayfs/copy_up.c -@@ -0,0 +1,414 @@ -+/* -+ * -+ * Copyright (C) 2011 Novell Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "overlayfs.h" -+ -+#define OVL_COPY_UP_CHUNK_SIZE (1 << 20) -+ -+int ovl_copy_xattr(struct dentry *old, struct dentry *new) -+{ -+ ssize_t list_size, size; -+ char *buf, *name, *value; -+ int error; -+ -+ if (!old->d_inode->i_op->getxattr || -+ !new->d_inode->i_op->getxattr) -+ return 0; -+ -+ list_size = vfs_listxattr(old, NULL, 0); -+ if (list_size <= 0) { -+ if (list_size == -EOPNOTSUPP) -+ return 0; -+ return list_size; -+ } -+ -+ buf = kzalloc(list_size, GFP_KERNEL); -+ if (!buf) -+ return -ENOMEM; -+ -+ error = -ENOMEM; -+ value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); -+ if (!value) -+ goto out; -+ -+ list_size = vfs_listxattr(old, buf, list_size); -+ if (list_size <= 0) { -+ error = list_size; -+ goto out_free_value; -+ } -+ -+ for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { -+ size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); -+ if (size <= 0) { -+ error = size; -+ goto out_free_value; -+ } -+ error = vfs_setxattr(new, name, value, size, 0); -+ if (error) -+ goto out_free_value; -+ } -+ -+out_free_value: -+ kfree(value); -+out: -+ kfree(buf); -+ return error; -+} -+ -+static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) -+{ -+ struct file *old_file; -+ struct file *new_file; -+ loff_t old_pos = 0; -+ loff_t new_pos = 0; -+ int error = 0; -+ -+ if (len == 0) -+ return 0; -+ -+ old_file = ovl_path_open(old, O_RDONLY); -+ if (IS_ERR(old_file)) -+ return PTR_ERR(old_file); -+ -+ new_file = ovl_path_open(new, O_WRONLY); -+ if (IS_ERR(new_file)) { -+ error = PTR_ERR(new_file); -+ goto out_fput; -+ } -+ -+ /* FIXME: copy up sparse files efficiently */ -+ while (len) { -+ size_t this_len = OVL_COPY_UP_CHUNK_SIZE; -+ long bytes; -+ -+ if (len < this_len) -+ this_len = len; -+ -+ if (signal_pending_state(TASK_KILLABLE, current)) { -+ error = -EINTR; -+ break; -+ } -+ -+ bytes = do_splice_direct(old_file, &old_pos, -+ new_file, &new_pos, -+ this_len, SPLICE_F_MOVE); -+ if (bytes <= 0) { -+ error = bytes; -+ break; -+ } -+ WARN_ON(old_pos != new_pos); -+ -+ len -= bytes; -+ } -+ -+ fput(new_file); -+out_fput: -+ fput(old_file); -+ return error; -+} -+ -+static char *ovl_read_symlink(struct dentry *realdentry) -+{ -+ int res; -+ char *buf; -+ struct inode *inode = realdentry->d_inode; -+ mm_segment_t old_fs; -+ -+ res = -EINVAL; -+ if (!inode->i_op->readlink) -+ goto err; -+ -+ res = -ENOMEM; -+ buf = (char *) __get_free_page(GFP_KERNEL); -+ if (!buf) -+ goto err; -+ -+ old_fs = get_fs(); -+ set_fs(get_ds()); -+ /* The cast to a user pointer is valid due to the set_fs() */ -+ res = inode->i_op->readlink(realdentry, -+ (char __user *)buf, PAGE_SIZE - 1); -+ set_fs(old_fs); -+ if (res < 0) { -+ free_page((unsigned long) buf); -+ goto err; -+ } -+ buf[res] = '\0'; -+ -+ return buf; -+ -+err: -+ return ERR_PTR(res); -+} -+ -+static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) -+{ -+ struct iattr attr = { -+ .ia_valid = -+ ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, -+ .ia_atime = stat->atime, -+ .ia_mtime = stat->mtime, -+ }; -+ -+ return notify_change(upperdentry, &attr, NULL); -+} -+ -+int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) -+{ -+ int err = 0; -+ -+ if (!S_ISLNK(stat->mode)) { -+ struct iattr attr = { -+ .ia_valid = ATTR_MODE, -+ .ia_mode = stat->mode, -+ }; -+ err = notify_change(upperdentry, &attr, NULL); -+ } -+ if (!err) { -+ struct iattr attr = { -+ .ia_valid = ATTR_UID | ATTR_GID, -+ .ia_uid = stat->uid, -+ .ia_gid = stat->gid, -+ }; -+ err = notify_change(upperdentry, &attr, NULL); -+ } -+ if (!err) -+ ovl_set_timestamps(upperdentry, stat); -+ -+ return err; -+ -+} -+ -+static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, -+ struct dentry *dentry, struct path *lowerpath, -+ struct kstat *stat, struct iattr *attr, -+ const char *link) -+{ -+ struct inode *wdir = workdir->d_inode; -+ struct inode *udir = upperdir->d_inode; -+ struct dentry *newdentry = NULL; -+ struct dentry *upper = NULL; -+ umode_t mode = stat->mode; -+ int err; -+ -+ newdentry = ovl_lookup_temp(workdir, dentry); -+ err = PTR_ERR(newdentry); -+ if (IS_ERR(newdentry)) -+ goto out; -+ -+ upper = lookup_one_len(dentry->d_name.name, upperdir, -+ dentry->d_name.len); -+ err = PTR_ERR(upper); -+ if (IS_ERR(upper)) -+ goto out1; -+ -+ /* Can't properly set mode on creation because of the umask */ -+ stat->mode &= S_IFMT; -+ err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); -+ stat->mode = mode; -+ if (err) -+ goto out2; -+ -+ if (S_ISREG(stat->mode)) { -+ struct path upperpath; -+ ovl_path_upper(dentry, &upperpath); -+ BUG_ON(upperpath.dentry != NULL); -+ upperpath.dentry = newdentry; -+ -+ err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); -+ if (err) -+ goto out_cleanup; -+ } -+ -+ err = ovl_copy_xattr(lowerpath->dentry, newdentry); -+ if (err) -+ goto out_cleanup; -+ -+ mutex_lock(&newdentry->d_inode->i_mutex); -+ err = ovl_set_attr(newdentry, stat); -+ if (!err && attr) -+ err = notify_change(newdentry, attr, NULL); -+ mutex_unlock(&newdentry->d_inode->i_mutex); -+ if (err) -+ goto out_cleanup; -+ -+ err = ovl_do_rename(wdir, newdentry, udir, upper, 0); -+ if (err) -+ goto out_cleanup; -+ -+ ovl_dentry_update(dentry, newdentry); -+ newdentry = NULL; -+ -+ /* -+ * Non-directores become opaque when copied up. -+ */ -+ if (!S_ISDIR(stat->mode)) -+ ovl_dentry_set_opaque(dentry, true); -+out2: -+ dput(upper); -+out1: -+ dput(newdentry); -+out: -+ return err; -+ -+out_cleanup: -+ ovl_cleanup(wdir, newdentry); -+ goto out; -+} -+ -+/* -+ * Copy up a single dentry -+ * -+ * Directory renames only allowed on "pure upper" (already created on -+ * upper filesystem, never copied up). Directories which are on lower or -+ * are merged may not be renamed. For these -EXDEV is returned and -+ * userspace has to deal with it. This means, when copying up a -+ * directory we can rely on it and ancestors being stable. -+ * -+ * Non-directory renames start with copy up of source if necessary. The -+ * actual rename will only proceed once the copy up was successful. Copy -+ * up uses upper parent i_mutex for exclusion. Since rename can change -+ * d_parent it is possible that the copy up will lock the old parent. At -+ * that point the file will have already been copied up anyway. -+ */ -+int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, -+ struct path *lowerpath, struct kstat *stat, -+ struct iattr *attr) -+{ -+ struct dentry *workdir = ovl_workdir(dentry); -+ int err; -+ struct kstat pstat; -+ struct path parentpath; -+ struct dentry *upperdir; -+ struct dentry *upperdentry; -+ const struct cred *old_cred; -+ struct cred *override_cred; -+ char *link = NULL; -+ -+ ovl_path_upper(parent, &parentpath); -+ upperdir = parentpath.dentry; -+ -+ err = vfs_getattr(&parentpath, &pstat); -+ if (err) -+ return err; -+ -+ if (S_ISLNK(stat->mode)) { -+ link = ovl_read_symlink(lowerpath->dentry); -+ if (IS_ERR(link)) -+ return PTR_ERR(link); -+ } -+ -+ err = -ENOMEM; -+ override_cred = prepare_creds(); -+ if (!override_cred) -+ goto out_free_link; -+ -+ override_cred->fsuid = stat->uid; -+ override_cred->fsgid = stat->gid; -+ /* -+ * CAP_SYS_ADMIN for copying up extended attributes -+ * CAP_DAC_OVERRIDE for create -+ * CAP_FOWNER for chmod, timestamp update -+ * CAP_FSETID for chmod -+ * CAP_CHOWN for chown -+ * CAP_MKNOD for mknod -+ */ -+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); -+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -+ cap_raise(override_cred->cap_effective, CAP_FOWNER); -+ cap_raise(override_cred->cap_effective, CAP_FSETID); -+ cap_raise(override_cred->cap_effective, CAP_CHOWN); -+ cap_raise(override_cred->cap_effective, CAP_MKNOD); -+ old_cred = override_creds(override_cred); -+ -+ err = -EIO; -+ if (lock_rename(workdir, upperdir) != NULL) { -+ pr_err("overlayfs: failed to lock workdir+upperdir\n"); -+ goto out_unlock; -+ } -+ upperdentry = ovl_dentry_upper(dentry); -+ if (upperdentry) { -+ unlock_rename(workdir, upperdir); -+ err = 0; -+ /* Raced with another copy-up? Do the setattr here */ -+ if (attr) { -+ mutex_lock(&upperdentry->d_inode->i_mutex); -+ err = notify_change(upperdentry, attr, NULL); -+ mutex_unlock(&upperdentry->d_inode->i_mutex); -+ } -+ goto out_put_cred; -+ } -+ -+ err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, -+ stat, attr, link); -+ if (!err) { -+ /* Restore timestamps on parent (best effort) */ -+ ovl_set_timestamps(upperdir, &pstat); -+ } -+out_unlock: -+ unlock_rename(workdir, upperdir); -+out_put_cred: -+ revert_creds(old_cred); -+ put_cred(override_cred); -+ -+out_free_link: -+ if (link) -+ free_page((unsigned long) link); -+ -+ return err; -+} -+ -+int ovl_copy_up(struct dentry *dentry) -+{ -+ int err; -+ -+ err = 0; -+ while (!err) { -+ struct dentry *next; -+ struct dentry *parent; -+ struct path lowerpath; -+ struct kstat stat; -+ enum ovl_path_type type = ovl_path_type(dentry); -+ -+ if (type != OVL_PATH_LOWER) -+ break; -+ -+ next = dget(dentry); -+ /* find the topmost dentry not yet copied up */ -+ for (;;) { -+ parent = dget_parent(next); -+ -+ type = ovl_path_type(parent); -+ if (type != OVL_PATH_LOWER) -+ break; -+ -+ dput(next); -+ next = parent; -+ } -+ -+ ovl_path_lower(next, &lowerpath); -+ err = vfs_getattr(&lowerpath, &stat); -+ if (!err) -+ err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); -+ -+ dput(parent); -+ dput(next); -+ } -+ -+ return err; -+} -diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c -new file mode 100644 -index 0000000..15cd91a ---- /dev/null -+++ b/fs/overlayfs/dir.c -@@ -0,0 +1,921 @@ -+/* -+ * -+ * Copyright (C) 2011 Novell Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "overlayfs.h" -+ -+void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) -+{ -+ int err; -+ -+ dget(wdentry); -+ if (S_ISDIR(wdentry->d_inode->i_mode)) -+ err = ovl_do_rmdir(wdir, wdentry); -+ else -+ err = ovl_do_unlink(wdir, wdentry); -+ dput(wdentry); -+ -+ if (err) { -+ pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", -+ wdentry, err); -+ } -+} -+ -+struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) -+{ -+ struct dentry *temp; -+ char name[20]; -+ -+ snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry); -+ -+ temp = lookup_one_len(name, workdir, strlen(name)); -+ if (!IS_ERR(temp) && temp->d_inode) { -+ pr_err("overlayfs: workdir/%s already exists\n", name); -+ dput(temp); -+ temp = ERR_PTR(-EIO); -+ } -+ -+ return temp; -+} -+ -+/* caller holds i_mutex on workdir */ -+static struct dentry *ovl_whiteout(struct dentry *workdir, -+ struct dentry *dentry) -+{ -+ int err; -+ struct dentry *whiteout; -+ struct inode *wdir = workdir->d_inode; -+ -+ whiteout = ovl_lookup_temp(workdir, dentry); -+ if (IS_ERR(whiteout)) -+ return whiteout; -+ -+ err = ovl_do_whiteout(wdir, whiteout); -+ if (err) { -+ dput(whiteout); -+ whiteout = ERR_PTR(err); -+ } -+ -+ return whiteout; -+} -+ -+int ovl_create_real(struct inode *dir, struct dentry *newdentry, -+ struct kstat *stat, const char *link, -+ struct dentry *hardlink, bool debug) -+{ -+ int err; -+ -+ if (newdentry->d_inode) -+ return -ESTALE; -+ -+ if (hardlink) { -+ err = ovl_do_link(hardlink, dir, newdentry, debug); -+ } else { -+ switch (stat->mode & S_IFMT) { -+ case S_IFREG: -+ err = ovl_do_create(dir, newdentry, stat->mode, debug); -+ break; -+ -+ case S_IFDIR: -+ err = ovl_do_mkdir(dir, newdentry, stat->mode, debug); -+ break; -+ -+ case S_IFCHR: -+ case S_IFBLK: -+ case S_IFIFO: -+ case S_IFSOCK: -+ err = ovl_do_mknod(dir, newdentry, -+ stat->mode, stat->rdev, debug); -+ break; -+ -+ case S_IFLNK: -+ err = ovl_do_symlink(dir, newdentry, link, debug); -+ break; -+ -+ default: -+ err = -EPERM; -+ } -+ } -+ if (!err && WARN_ON(!newdentry->d_inode)) { -+ /* -+ * Not quite sure if non-instantiated dentry is legal or not. -+ * VFS doesn't seem to care so check and warn here. -+ */ -+ err = -ENOENT; -+ } -+ return err; -+} -+ -+static int ovl_set_opaque(struct dentry *upperdentry) -+{ -+ return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); -+} -+ -+static void ovl_remove_opaque(struct dentry *upperdentry) -+{ -+ int err; -+ -+ err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr); -+ if (err) { -+ pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", -+ upperdentry->d_name.name, err); -+ } -+} -+ -+static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, -+ struct kstat *stat) -+{ -+ int err; -+ enum ovl_path_type type; -+ struct path realpath; -+ -+ type = ovl_path_real(dentry, &realpath); -+ err = vfs_getattr(&realpath, stat); -+ if (err) -+ return err; -+ -+ stat->dev = dentry->d_sb->s_dev; -+ stat->ino = dentry->d_inode->i_ino; -+ -+ /* -+ * It's probably not worth it to count subdirs to get the -+ * correct link count. nlink=1 seems to pacify 'find' and -+ * other utilities. -+ */ -+ if (type == OVL_PATH_MERGE) -+ stat->nlink = 1; -+ -+ return 0; -+} -+ -+static int ovl_create_upper(struct dentry *dentry, struct inode *inode, -+ struct kstat *stat, const char *link, -+ struct dentry *hardlink) -+{ -+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); -+ struct inode *udir = upperdir->d_inode; -+ struct dentry *newdentry; -+ int err; -+ -+ mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT); -+ newdentry = lookup_one_len(dentry->d_name.name, upperdir, -+ dentry->d_name.len); -+ err = PTR_ERR(newdentry); -+ if (IS_ERR(newdentry)) -+ goto out_unlock; -+ err = ovl_create_real(udir, newdentry, stat, link, hardlink, false); -+ if (err) -+ goto out_dput; -+ -+ ovl_dentry_version_inc(dentry->d_parent); -+ ovl_dentry_update(dentry, newdentry); -+ ovl_copyattr(newdentry->d_inode, inode); -+ d_instantiate(dentry, inode); -+ newdentry = NULL; -+out_dput: -+ dput(newdentry); -+out_unlock: -+ mutex_unlock(&udir->i_mutex); -+ return err; -+} -+ -+static int ovl_lock_rename_workdir(struct dentry *workdir, -+ struct dentry *upperdir) -+{ -+ /* Workdir should not be the same as upperdir */ -+ if (workdir == upperdir) -+ goto err; -+ -+ /* Workdir should not be subdir of upperdir and vice versa */ -+ if (lock_rename(workdir, upperdir) != NULL) -+ goto err_unlock; -+ -+ return 0; -+ -+err_unlock: -+ unlock_rename(workdir, upperdir); -+err: -+ pr_err("overlayfs: failed to lock workdir+upperdir\n"); -+ return -EIO; -+} -+ -+static struct dentry *ovl_clear_empty(struct dentry *dentry, -+ struct list_head *list) -+{ -+ struct dentry *workdir = ovl_workdir(dentry); -+ struct inode *wdir = workdir->d_inode; -+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); -+ struct inode *udir = upperdir->d_inode; -+ struct path upperpath; -+ struct dentry *upper; -+ struct dentry *opaquedir; -+ struct kstat stat; -+ int err; -+ -+ err = ovl_lock_rename_workdir(workdir, upperdir); -+ if (err) -+ goto out; -+ -+ ovl_path_upper(dentry, &upperpath); -+ err = vfs_getattr(&upperpath, &stat); -+ if (err) -+ goto out_unlock; -+ -+ err = -ESTALE; -+ if (!S_ISDIR(stat.mode)) -+ goto out_unlock; -+ upper = upperpath.dentry; -+ if (upper->d_parent->d_inode != udir) -+ goto out_unlock; -+ -+ opaquedir = ovl_lookup_temp(workdir, dentry); -+ err = PTR_ERR(opaquedir); -+ if (IS_ERR(opaquedir)) -+ goto out_unlock; -+ -+ err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true); -+ if (err) -+ goto out_dput; -+ -+ err = ovl_copy_xattr(upper, opaquedir); -+ if (err) -+ goto out_cleanup; -+ -+ err = ovl_set_opaque(opaquedir); -+ if (err) -+ goto out_cleanup; -+ -+ mutex_lock(&opaquedir->d_inode->i_mutex); -+ err = ovl_set_attr(opaquedir, &stat); -+ mutex_unlock(&opaquedir->d_inode->i_mutex); -+ if (err) -+ goto out_cleanup; -+ -+ err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE); -+ if (err) -+ goto out_cleanup; -+ -+ ovl_cleanup_whiteouts(upper, list); -+ ovl_cleanup(wdir, upper); -+ unlock_rename(workdir, upperdir); -+ -+ /* dentry's upper doesn't match now, get rid of it */ -+ d_drop(dentry); -+ -+ return opaquedir; -+ -+out_cleanup: -+ ovl_cleanup(wdir, opaquedir); -+out_dput: -+ dput(opaquedir); -+out_unlock: -+ unlock_rename(workdir, upperdir); -+out: -+ return ERR_PTR(err); -+} -+ -+static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, -+ enum ovl_path_type type) -+{ -+ int err; -+ struct dentry *ret = NULL; -+ LIST_HEAD(list); -+ -+ err = ovl_check_empty_dir(dentry, &list); -+ if (err) -+ ret = ERR_PTR(err); -+ else if (type == OVL_PATH_MERGE) -+ ret = ovl_clear_empty(dentry, &list); -+ -+ ovl_cache_free(&list); -+ -+ return ret; -+} -+ -+static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, -+ struct kstat *stat, const char *link, -+ struct dentry *hardlink) -+{ -+ struct dentry *workdir = ovl_workdir(dentry); -+ struct inode *wdir = workdir->d_inode; -+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); -+ struct inode *udir = upperdir->d_inode; -+ struct dentry *upper; -+ struct dentry *newdentry; -+ int err; -+ -+ err = ovl_lock_rename_workdir(workdir, upperdir); -+ if (err) -+ goto out; -+ -+ newdentry = ovl_lookup_temp(workdir, dentry); -+ err = PTR_ERR(newdentry); -+ if (IS_ERR(newdentry)) -+ goto out_unlock; -+ -+ upper = lookup_one_len(dentry->d_name.name, upperdir, -+ dentry->d_name.len); -+ err = PTR_ERR(upper); -+ if (IS_ERR(upper)) -+ goto out_dput; -+ -+ err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true); -+ if (err) -+ goto out_dput2; -+ -+ if (S_ISDIR(stat->mode)) { -+ err = ovl_set_opaque(newdentry); -+ if (err) -+ goto out_cleanup; -+ -+ err = ovl_do_rename(wdir, newdentry, udir, upper, -+ RENAME_EXCHANGE); -+ if (err) -+ goto out_cleanup; -+ -+ ovl_cleanup(wdir, upper); -+ } else { -+ err = ovl_do_rename(wdir, newdentry, udir, upper, 0); -+ if (err) -+ goto out_cleanup; -+ } -+ ovl_dentry_version_inc(dentry->d_parent); -+ ovl_dentry_update(dentry, newdentry); -+ ovl_copyattr(newdentry->d_inode, inode); -+ d_instantiate(dentry, inode); -+ newdentry = NULL; -+out_dput2: -+ dput(upper); -+out_dput: -+ dput(newdentry); -+out_unlock: -+ unlock_rename(workdir, upperdir); -+out: -+ return err; -+ -+out_cleanup: -+ ovl_cleanup(wdir, newdentry); -+ goto out_dput2; -+} -+ -+static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, -+ const char *link, struct dentry *hardlink) -+{ -+ int err; -+ struct inode *inode; -+ struct kstat stat = { -+ .mode = mode, -+ .rdev = rdev, -+ }; -+ -+ err = -ENOMEM; -+ inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); -+ if (!inode) -+ goto out; -+ -+ err = ovl_copy_up(dentry->d_parent); -+ if (err) -+ goto out_iput; -+ -+ if (!ovl_dentry_is_opaque(dentry)) { -+ err = ovl_create_upper(dentry, inode, &stat, link, hardlink); -+ } else { -+ const struct cred *old_cred; -+ struct cred *override_cred; -+ -+ err = -ENOMEM; -+ override_cred = prepare_creds(); -+ if (!override_cred) -+ goto out_iput; -+ -+ /* -+ * CAP_SYS_ADMIN for setting opaque xattr -+ * CAP_DAC_OVERRIDE for create in workdir, rename -+ * CAP_FOWNER for removing whiteout from sticky dir -+ */ -+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); -+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -+ cap_raise(override_cred->cap_effective, CAP_FOWNER); -+ old_cred = override_creds(override_cred); -+ -+ err = ovl_create_over_whiteout(dentry, inode, &stat, link, -+ hardlink); -+ -+ revert_creds(old_cred); -+ put_cred(override_cred); -+ } -+ -+ if (!err) -+ inode = NULL; -+out_iput: -+ iput(inode); -+out: -+ return err; -+} -+ -+static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, -+ const char *link) -+{ -+ int err; -+ -+ err = ovl_want_write(dentry); -+ if (!err) { -+ err = ovl_create_or_link(dentry, mode, rdev, link, NULL); -+ ovl_drop_write(dentry); -+ } -+ -+ return err; -+} -+ -+static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, -+ bool excl) -+{ -+ return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); -+} -+ -+static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) -+{ -+ return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); -+} -+ -+static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, -+ dev_t rdev) -+{ -+ /* Don't allow creation of "whiteout" on overlay */ -+ if (S_ISCHR(mode) && rdev == WHITEOUT_DEV) -+ return -EPERM; -+ -+ return ovl_create_object(dentry, mode, rdev, NULL); -+} -+ -+static int ovl_symlink(struct inode *dir, struct dentry *dentry, -+ const char *link) -+{ -+ return ovl_create_object(dentry, S_IFLNK, 0, link); -+} -+ -+static int ovl_link(struct dentry *old, struct inode *newdir, -+ struct dentry *new) -+{ -+ int err; -+ struct dentry *upper; -+ -+ err = ovl_want_write(old); -+ if (err) -+ goto out; -+ -+ err = ovl_copy_up(old); -+ if (err) -+ goto out_drop_write; -+ -+ upper = ovl_dentry_upper(old); -+ err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper); -+ -+out_drop_write: -+ ovl_drop_write(old); -+out: -+ return err; -+} -+ -+static int ovl_remove_and_whiteout(struct dentry *dentry, -+ enum ovl_path_type type, bool is_dir) -+{ -+ struct dentry *workdir = ovl_workdir(dentry); -+ struct inode *wdir = workdir->d_inode; -+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); -+ struct inode *udir = upperdir->d_inode; -+ struct dentry *whiteout; -+ struct dentry *upper; -+ struct dentry *opaquedir = NULL; -+ int err; -+ -+ if (is_dir) { -+ opaquedir = ovl_check_empty_and_clear(dentry, type); -+ err = PTR_ERR(opaquedir); -+ if (IS_ERR(opaquedir)) -+ goto out; -+ } -+ -+ err = ovl_lock_rename_workdir(workdir, upperdir); -+ if (err) -+ goto out_dput; -+ -+ whiteout = ovl_whiteout(workdir, dentry); -+ err = PTR_ERR(whiteout); -+ if (IS_ERR(whiteout)) -+ goto out_unlock; -+ -+ if (type == OVL_PATH_LOWER) { -+ upper = lookup_one_len(dentry->d_name.name, upperdir, -+ dentry->d_name.len); -+ err = PTR_ERR(upper); -+ if (IS_ERR(upper)) -+ goto kill_whiteout; -+ -+ err = ovl_do_rename(wdir, whiteout, udir, upper, 0); -+ dput(upper); -+ if (err) -+ goto kill_whiteout; -+ } else { -+ int flags = 0; -+ -+ upper = ovl_dentry_upper(dentry); -+ if (opaquedir) -+ upper = opaquedir; -+ err = -ESTALE; -+ if (upper->d_parent != upperdir) -+ goto kill_whiteout; -+ -+ if (is_dir) -+ flags |= RENAME_EXCHANGE; -+ -+ err = ovl_do_rename(wdir, whiteout, udir, upper, flags); -+ if (err) -+ goto kill_whiteout; -+ -+ if (is_dir) -+ ovl_cleanup(wdir, upper); -+ } -+ ovl_dentry_version_inc(dentry->d_parent); -+out_d_drop: -+ d_drop(dentry); -+ dput(whiteout); -+out_unlock: -+ unlock_rename(workdir, upperdir); -+out_dput: -+ dput(opaquedir); -+out: -+ return err; -+ -+kill_whiteout: -+ ovl_cleanup(wdir, whiteout); -+ goto out_d_drop; -+} -+ -+static int ovl_remove_upper(struct dentry *dentry, bool is_dir) -+{ -+ struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); -+ struct inode *dir = upperdir->d_inode; -+ struct dentry *upper = ovl_dentry_upper(dentry); -+ int err; -+ -+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); -+ err = -ESTALE; -+ if (upper->d_parent == upperdir) { -+ /* Don't let d_delete() think it can reset d_inode */ -+ dget(upper); -+ if (is_dir) -+ err = vfs_rmdir(dir, upper); -+ else -+ err = vfs_unlink(dir, upper, NULL); -+ dput(upper); -+ ovl_dentry_version_inc(dentry->d_parent); -+ } -+ -+ /* -+ * Keeping this dentry hashed would mean having to release -+ * upperpath/lowerpath, which could only be done if we are the -+ * sole user of this dentry. Too tricky... Just unhash for -+ * now. -+ */ -+ d_drop(dentry); -+ mutex_unlock(&dir->i_mutex); -+ -+ return err; -+} -+ -+static inline int ovl_check_sticky(struct dentry *dentry) -+{ -+ struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode; -+ struct inode *inode = ovl_dentry_real(dentry)->d_inode; -+ -+ if (check_sticky(dir, inode)) -+ return -EPERM; -+ -+ return 0; -+} -+ -+static int ovl_do_remove(struct dentry *dentry, bool is_dir) -+{ -+ enum ovl_path_type type; -+ int err; -+ -+ err = ovl_check_sticky(dentry); -+ if (err) -+ goto out; -+ -+ err = ovl_want_write(dentry); -+ if (err) -+ goto out; -+ -+ err = ovl_copy_up(dentry->d_parent); -+ if (err) -+ goto out_drop_write; -+ -+ type = ovl_path_type(dentry); -+ if (type == OVL_PATH_PURE_UPPER) { -+ err = ovl_remove_upper(dentry, is_dir); -+ } else { -+ const struct cred *old_cred; -+ struct cred *override_cred; -+ -+ err = -ENOMEM; -+ override_cred = prepare_creds(); -+ if (!override_cred) -+ goto out_drop_write; -+ -+ /* -+ * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir -+ * CAP_DAC_OVERRIDE for create in workdir, rename -+ * CAP_FOWNER for removing whiteout from sticky dir -+ * CAP_FSETID for chmod of opaque dir -+ * CAP_CHOWN for chown of opaque dir -+ */ -+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); -+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -+ cap_raise(override_cred->cap_effective, CAP_FOWNER); -+ cap_raise(override_cred->cap_effective, CAP_FSETID); -+ cap_raise(override_cred->cap_effective, CAP_CHOWN); -+ old_cred = override_creds(override_cred); -+ -+ err = ovl_remove_and_whiteout(dentry, type, is_dir); -+ -+ revert_creds(old_cred); -+ put_cred(override_cred); -+ } -+out_drop_write: -+ ovl_drop_write(dentry); -+out: -+ return err; -+} -+ -+static int ovl_unlink(struct inode *dir, struct dentry *dentry) -+{ -+ return ovl_do_remove(dentry, false); -+} -+ -+static int ovl_rmdir(struct inode *dir, struct dentry *dentry) -+{ -+ return ovl_do_remove(dentry, true); -+} -+ -+static int ovl_rename2(struct inode *olddir, struct dentry *old, -+ struct inode *newdir, struct dentry *new, -+ unsigned int flags) -+{ -+ int err; -+ enum ovl_path_type old_type; -+ enum ovl_path_type new_type; -+ struct dentry *old_upperdir; -+ struct dentry *new_upperdir; -+ struct dentry *olddentry; -+ struct dentry *newdentry; -+ struct dentry *trap; -+ bool old_opaque; -+ bool new_opaque; -+ bool new_create = false; -+ bool cleanup_whiteout = false; -+ bool overwrite = !(flags & RENAME_EXCHANGE); -+ bool is_dir = S_ISDIR(old->d_inode->i_mode); -+ bool new_is_dir = false; -+ struct dentry *opaquedir = NULL; -+ const struct cred *old_cred = NULL; -+ struct cred *override_cred = NULL; -+ -+ err = -EINVAL; -+ if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) -+ goto out; -+ -+ flags &= ~RENAME_NOREPLACE; -+ -+ err = ovl_check_sticky(old); -+ if (err) -+ goto out; -+ -+ /* Don't copy up directory trees */ -+ old_type = ovl_path_type(old); -+ err = -EXDEV; -+ if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir) -+ goto out; -+ -+ if (new->d_inode) { -+ err = ovl_check_sticky(new); -+ if (err) -+ goto out; -+ -+ if (S_ISDIR(new->d_inode->i_mode)) -+ new_is_dir = true; -+ -+ new_type = ovl_path_type(new); -+ err = -EXDEV; -+ if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) -+ goto out; -+ -+ err = 0; -+ if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { -+ if (ovl_dentry_lower(old)->d_inode == -+ ovl_dentry_lower(new)->d_inode) -+ goto out; -+ } -+ if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { -+ if (ovl_dentry_upper(old)->d_inode == -+ ovl_dentry_upper(new)->d_inode) -+ goto out; -+ } -+ } else { -+ if (ovl_dentry_is_opaque(new)) -+ new_type = OVL_PATH_UPPER; -+ else -+ new_type = OVL_PATH_PURE_UPPER; -+ } -+ -+ err = ovl_want_write(old); -+ if (err) -+ goto out; -+ -+ err = ovl_copy_up(old); -+ if (err) -+ goto out_drop_write; -+ -+ err = ovl_copy_up(new->d_parent); -+ if (err) -+ goto out_drop_write; -+ if (!overwrite) { -+ err = ovl_copy_up(new); -+ if (err) -+ goto out_drop_write; -+ } -+ -+ old_opaque = old_type != OVL_PATH_PURE_UPPER; -+ new_opaque = new_type != OVL_PATH_PURE_UPPER; -+ -+ if (old_opaque || new_opaque) { -+ err = -ENOMEM; -+ override_cred = prepare_creds(); -+ if (!override_cred) -+ goto out_drop_write; -+ -+ /* -+ * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir -+ * CAP_DAC_OVERRIDE for create in workdir -+ * CAP_FOWNER for removing whiteout from sticky dir -+ * CAP_FSETID for chmod of opaque dir -+ * CAP_CHOWN for chown of opaque dir -+ */ -+ cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); -+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -+ cap_raise(override_cred->cap_effective, CAP_FOWNER); -+ cap_raise(override_cred->cap_effective, CAP_FSETID); -+ cap_raise(override_cred->cap_effective, CAP_CHOWN); -+ old_cred = override_creds(override_cred); -+ } -+ -+ if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { -+ opaquedir = ovl_check_empty_and_clear(new, new_type); -+ err = PTR_ERR(opaquedir); -+ if (IS_ERR(opaquedir)) { -+ opaquedir = NULL; -+ goto out_revert_creds; -+ } -+ } -+ -+ if (overwrite) { -+ if (old_opaque) { -+ if (new->d_inode || !new_opaque) { -+ /* Whiteout source */ -+ flags |= RENAME_WHITEOUT; -+ } else { -+ /* Switch whiteouts */ -+ flags |= RENAME_EXCHANGE; -+ } -+ } else if (is_dir && !new->d_inode && new_opaque) { -+ flags |= RENAME_EXCHANGE; -+ cleanup_whiteout = true; -+ } -+ } -+ -+ old_upperdir = ovl_dentry_upper(old->d_parent); -+ new_upperdir = ovl_dentry_upper(new->d_parent); -+ -+ trap = lock_rename(new_upperdir, old_upperdir); -+ -+ olddentry = ovl_dentry_upper(old); -+ newdentry = ovl_dentry_upper(new); -+ if (newdentry) { -+ if (opaquedir) { -+ newdentry = opaquedir; -+ opaquedir = NULL; -+ } else { -+ dget(newdentry); -+ } -+ } else { -+ new_create = true; -+ newdentry = lookup_one_len(new->d_name.name, new_upperdir, -+ new->d_name.len); -+ err = PTR_ERR(newdentry); -+ if (IS_ERR(newdentry)) -+ goto out_unlock; -+ } -+ -+ err = -ESTALE; -+ if (olddentry->d_parent != old_upperdir) -+ goto out_dput; -+ if (newdentry->d_parent != new_upperdir) -+ goto out_dput; -+ if (olddentry == trap) -+ goto out_dput; -+ if (newdentry == trap) -+ goto out_dput; -+ -+ if (is_dir && !old_opaque && new_opaque) { -+ err = ovl_set_opaque(olddentry); -+ if (err) -+ goto out_dput; -+ } -+ if (!overwrite && new_is_dir && old_opaque && !new_opaque) { -+ err = ovl_set_opaque(newdentry); -+ if (err) -+ goto out_dput; -+ } -+ -+ if (old_opaque || new_opaque) { -+ err = ovl_do_rename(old_upperdir->d_inode, olddentry, -+ new_upperdir->d_inode, newdentry, -+ flags); -+ } else { -+ /* No debug for the plain case */ -+ BUG_ON(flags & ~RENAME_EXCHANGE); -+ err = vfs_rename(old_upperdir->d_inode, olddentry, -+ new_upperdir->d_inode, newdentry, -+ NULL, flags); -+ } -+ -+ if (err) { -+ if (is_dir && !old_opaque && new_opaque) -+ ovl_remove_opaque(olddentry); -+ if (!overwrite && new_is_dir && old_opaque && !new_opaque) -+ ovl_remove_opaque(newdentry); -+ goto out_dput; -+ } -+ -+ if (is_dir && old_opaque && !new_opaque) -+ ovl_remove_opaque(olddentry); -+ if (!overwrite && new_is_dir && !old_opaque && new_opaque) -+ ovl_remove_opaque(newdentry); -+ -+ if (old_opaque != new_opaque) { -+ ovl_dentry_set_opaque(old, new_opaque); -+ if (!overwrite) -+ ovl_dentry_set_opaque(new, old_opaque); -+ } -+ -+ if (cleanup_whiteout) -+ ovl_cleanup(old_upperdir->d_inode, newdentry); -+ -+ ovl_dentry_version_inc(old->d_parent); -+ ovl_dentry_version_inc(new->d_parent); -+ -+out_dput: -+ dput(newdentry); -+out_unlock: -+ unlock_rename(new_upperdir, old_upperdir); -+out_revert_creds: -+ if (old_opaque || new_opaque) { -+ revert_creds(old_cred); -+ put_cred(override_cred); -+ } -+out_drop_write: -+ ovl_drop_write(old); -+out: -+ dput(opaquedir); -+ return err; -+} -+ -+const struct inode_operations ovl_dir_inode_operations = { -+ .lookup = ovl_lookup, -+ .mkdir = ovl_mkdir, -+ .symlink = ovl_symlink, -+ .unlink = ovl_unlink, -+ .rmdir = ovl_rmdir, -+ .rename2 = ovl_rename2, -+ .link = ovl_link, -+ .setattr = ovl_setattr, -+ .create = ovl_create, -+ .mknod = ovl_mknod, -+ .permission = ovl_permission, -+ .getattr = ovl_dir_getattr, -+ .setxattr = ovl_setxattr, -+ .getxattr = ovl_getxattr, -+ .listxattr = ovl_listxattr, -+ .removexattr = ovl_removexattr, -+}; -diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c -new file mode 100644 -index 0000000..af2d18c ---- /dev/null -+++ b/fs/overlayfs/inode.c -@@ -0,0 +1,425 @@ -+/* -+ * -+ * Copyright (C) 2011 Novell Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ */ -+ -+#include -+#include -+#include -+#include "overlayfs.h" -+ -+static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, -+ bool no_data) -+{ -+ int err; -+ struct dentry *parent; -+ struct kstat stat; -+ struct path lowerpath; -+ -+ parent = dget_parent(dentry); -+ err = ovl_copy_up(parent); -+ if (err) -+ goto out_dput_parent; -+ -+ ovl_path_lower(dentry, &lowerpath); -+ err = vfs_getattr(&lowerpath, &stat); -+ if (err) -+ goto out_dput_parent; -+ -+ if (no_data) -+ stat.size = 0; -+ -+ err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); -+ -+out_dput_parent: -+ dput(parent); -+ return err; -+} -+ -+int ovl_setattr(struct dentry *dentry, struct iattr *attr) -+{ -+ int err; -+ struct dentry *upperdentry; -+ -+ err = ovl_want_write(dentry); -+ if (err) -+ goto out; -+ -+ upperdentry = ovl_dentry_upper(dentry); -+ if (upperdentry) { -+ mutex_lock(&upperdentry->d_inode->i_mutex); -+ err = notify_change(upperdentry, attr, NULL); -+ mutex_unlock(&upperdentry->d_inode->i_mutex); -+ } else { -+ err = ovl_copy_up_last(dentry, attr, false); -+ } -+ ovl_drop_write(dentry); -+out: -+ return err; -+} -+ -+static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, -+ struct kstat *stat) -+{ -+ struct path realpath; -+ -+ ovl_path_real(dentry, &realpath); -+ return vfs_getattr(&realpath, stat); -+} -+ -+int ovl_permission(struct inode *inode, int mask) -+{ -+ struct ovl_entry *oe; -+ struct dentry *alias = NULL; -+ struct inode *realinode; -+ struct dentry *realdentry; -+ bool is_upper; -+ int err; -+ -+ if (S_ISDIR(inode->i_mode)) { -+ oe = inode->i_private; -+ } else if (mask & MAY_NOT_BLOCK) { -+ return -ECHILD; -+ } else { -+ /* -+ * For non-directories find an alias and get the info -+ * from there. -+ */ -+ alias = d_find_any_alias(inode); -+ if (WARN_ON(!alias)) -+ return -ENOENT; -+ -+ oe = alias->d_fsdata; -+ } -+ -+ realdentry = ovl_entry_real(oe, &is_upper); -+ -+ /* Careful in RCU walk mode */ -+ realinode = ACCESS_ONCE(realdentry->d_inode); -+ if (!realinode) { -+ WARN_ON(!(mask & MAY_NOT_BLOCK)); -+ err = -ENOENT; -+ goto out_dput; -+ } -+ -+ if (mask & MAY_WRITE) { -+ umode_t mode = realinode->i_mode; -+ -+ /* -+ * Writes will always be redirected to upper layer, so -+ * ignore lower layer being read-only. -+ * -+ * If the overlay itself is read-only then proceed -+ * with the permission check, don't return EROFS. -+ * This will only happen if this is the lower layer of -+ * another overlayfs. -+ * -+ * If upper fs becomes read-only after the overlay was -+ * constructed return EROFS to prevent modification of -+ * upper layer. -+ */ -+ err = -EROFS; -+ if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && -+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) -+ goto out_dput; -+ } -+ -+ err = __inode_permission(realinode, mask); -+out_dput: -+ dput(alias); -+ return err; -+} -+ -+ -+struct ovl_link_data { -+ struct dentry *realdentry; -+ void *cookie; -+}; -+ -+static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ void *ret; -+ struct dentry *realdentry; -+ struct inode *realinode; -+ -+ realdentry = ovl_dentry_real(dentry); -+ realinode = realdentry->d_inode; -+ -+ if (WARN_ON(!realinode->i_op->follow_link)) -+ return ERR_PTR(-EPERM); -+ -+ ret = realinode->i_op->follow_link(realdentry, nd); -+ if (IS_ERR(ret)) -+ return ret; -+ -+ if (realinode->i_op->put_link) { -+ struct ovl_link_data *data; -+ -+ data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); -+ if (!data) { -+ realinode->i_op->put_link(realdentry, nd, ret); -+ return ERR_PTR(-ENOMEM); -+ } -+ data->realdentry = realdentry; -+ data->cookie = ret; -+ -+ return data; -+ } else { -+ return NULL; -+ } -+} -+ -+static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) -+{ -+ struct inode *realinode; -+ struct ovl_link_data *data = c; -+ -+ if (!data) -+ return; -+ -+ realinode = data->realdentry->d_inode; -+ realinode->i_op->put_link(data->realdentry, nd, data->cookie); -+ kfree(data); -+} -+ -+static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) -+{ -+ struct path realpath; -+ struct inode *realinode; -+ -+ ovl_path_real(dentry, &realpath); -+ realinode = realpath.dentry->d_inode; -+ -+ if (!realinode->i_op->readlink) -+ return -EINVAL; -+ -+ touch_atime(&realpath); -+ -+ return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); -+} -+ -+ -+static bool ovl_is_private_xattr(const char *name) -+{ -+ return strncmp(name, "trusted.overlay.", 14) == 0; -+} -+ -+int ovl_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int err; -+ struct dentry *upperdentry; -+ -+ err = ovl_want_write(dentry); -+ if (err) -+ goto out; -+ -+ err = -EPERM; -+ if (ovl_is_private_xattr(name)) -+ goto out_drop_write; -+ -+ err = ovl_copy_up(dentry); -+ if (err) -+ goto out_drop_write; -+ -+ upperdentry = ovl_dentry_upper(dentry); -+ err = vfs_setxattr(upperdentry, name, value, size, flags); -+ -+out_drop_write: -+ ovl_drop_write(dentry); -+out: -+ return err; -+} -+ -+ssize_t ovl_getxattr(struct dentry *dentry, const char *name, -+ void *value, size_t size) -+{ -+ if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && -+ ovl_is_private_xattr(name)) -+ return -ENODATA; -+ -+ return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); -+} -+ -+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) -+{ -+ ssize_t res; -+ int off; -+ -+ res = vfs_listxattr(ovl_dentry_real(dentry), list, size); -+ if (res <= 0 || size == 0) -+ return res; -+ -+ if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) -+ return res; -+ -+ /* filter out private xattrs */ -+ for (off = 0; off < res;) { -+ char *s = list + off; -+ size_t slen = strlen(s) + 1; -+ -+ BUG_ON(off + slen > res); -+ -+ if (ovl_is_private_xattr(s)) { -+ res -= slen; -+ memmove(s, s + slen, res - off); -+ } else { -+ off += slen; -+ } -+ } -+ -+ return res; -+} -+ -+int ovl_removexattr(struct dentry *dentry, const char *name) -+{ -+ int err; -+ struct path realpath; -+ enum ovl_path_type type; -+ -+ err = ovl_want_write(dentry); -+ if (err) -+ goto out; -+ -+ if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && -+ ovl_is_private_xattr(name)) -+ goto out_drop_write; -+ -+ type = ovl_path_real(dentry, &realpath); -+ if (type == OVL_PATH_LOWER) { -+ err = vfs_getxattr(realpath.dentry, name, NULL, 0); -+ if (err < 0) -+ goto out_drop_write; -+ -+ err = ovl_copy_up(dentry); -+ if (err) -+ goto out_drop_write; -+ -+ ovl_path_upper(dentry, &realpath); -+ } -+ -+ err = vfs_removexattr(realpath.dentry, name); -+out_drop_write: -+ ovl_drop_write(dentry); -+out: -+ return err; -+} -+ -+static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, -+ struct dentry *realdentry) -+{ -+ if (type != OVL_PATH_LOWER) -+ return false; -+ -+ if (special_file(realdentry->d_inode->i_mode)) -+ return false; -+ -+ if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) -+ return false; -+ -+ return true; -+} -+ -+static int ovl_dentry_open(struct dentry *dentry, struct file *file, -+ const struct cred *cred) -+{ -+ int err; -+ struct path realpath; -+ enum ovl_path_type type; -+ bool want_write = false; -+ -+ type = ovl_path_real(dentry, &realpath); -+ if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { -+ want_write = true; -+ err = ovl_want_write(dentry); -+ if (err) -+ goto out; -+ -+ if (file->f_flags & O_TRUNC) -+ err = ovl_copy_up_last(dentry, NULL, true); -+ else -+ err = ovl_copy_up(dentry); -+ if (err) -+ goto out_drop_write; -+ -+ ovl_path_upper(dentry, &realpath); -+ } -+ -+ err = vfs_open(&realpath, file, cred); -+out_drop_write: -+ if (want_write) -+ ovl_drop_write(dentry); -+out: -+ return err; -+} -+ -+static const struct inode_operations ovl_file_inode_operations = { -+ .setattr = ovl_setattr, -+ .permission = ovl_permission, -+ .getattr = ovl_getattr, -+ .setxattr = ovl_setxattr, -+ .getxattr = ovl_getxattr, -+ .listxattr = ovl_listxattr, -+ .removexattr = ovl_removexattr, -+ .dentry_open = ovl_dentry_open, -+}; -+ -+static const struct inode_operations ovl_symlink_inode_operations = { -+ .setattr = ovl_setattr, -+ .follow_link = ovl_follow_link, -+ .put_link = ovl_put_link, -+ .readlink = ovl_readlink, -+ .getattr = ovl_getattr, -+ .setxattr = ovl_setxattr, -+ .getxattr = ovl_getxattr, -+ .listxattr = ovl_listxattr, -+ .removexattr = ovl_removexattr, -+}; -+ -+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, -+ struct ovl_entry *oe) -+{ -+ struct inode *inode; -+ -+ inode = new_inode(sb); -+ if (!inode) -+ return NULL; -+ -+ mode &= S_IFMT; -+ -+ inode->i_ino = get_next_ino(); -+ inode->i_mode = mode; -+ inode->i_flags |= S_NOATIME | S_NOCMTIME; -+ -+ switch (mode) { -+ case S_IFDIR: -+ inode->i_private = oe; -+ inode->i_op = &ovl_dir_inode_operations; -+ inode->i_fop = &ovl_dir_operations; -+ break; -+ -+ case S_IFLNK: -+ inode->i_op = &ovl_symlink_inode_operations; -+ break; -+ -+ case S_IFREG: -+ case S_IFSOCK: -+ case S_IFBLK: -+ case S_IFCHR: -+ case S_IFIFO: -+ inode->i_op = &ovl_file_inode_operations; -+ break; -+ -+ default: -+ WARN(1, "illegal file type: %i\n", mode); -+ iput(inode); -+ inode = NULL; -+ } -+ -+ return inode; -+ -+} -diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h -new file mode 100644 -index 0000000..814bed3 ---- /dev/null -+++ b/fs/overlayfs/overlayfs.h -@@ -0,0 +1,191 @@ -+/* -+ * -+ * Copyright (C) 2011 Novell Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ */ -+ -+#include -+ -+struct ovl_entry; -+ -+enum ovl_path_type { -+ OVL_PATH_PURE_UPPER, -+ OVL_PATH_UPPER, -+ OVL_PATH_MERGE, -+ OVL_PATH_LOWER, -+}; -+ -+extern const char *ovl_opaque_xattr; -+ -+static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) -+{ -+ int err = vfs_rmdir(dir, dentry); -+ pr_debug("rmdir(%pd2) = %i\n", dentry, err); -+ return err; -+} -+ -+static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry) -+{ -+ int err = vfs_unlink(dir, dentry, NULL); -+ pr_debug("unlink(%pd2) = %i\n", dentry, err); -+ return err; -+} -+ -+static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir, -+ struct dentry *new_dentry, bool debug) -+{ -+ int err = vfs_link(old_dentry, dir, new_dentry, NULL); -+ if (debug) { -+ pr_debug("link(%pd2, %pd2) = %i\n", -+ old_dentry, new_dentry, err); -+ } -+ return err; -+} -+ -+static inline int ovl_do_create(struct inode *dir, struct dentry *dentry, -+ umode_t mode, bool debug) -+{ -+ int err = vfs_create(dir, dentry, mode, true); -+ if (debug) -+ pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); -+ return err; -+} -+ -+static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry, -+ umode_t mode, bool debug) -+{ -+ int err = vfs_mkdir(dir, dentry, mode); -+ if (debug) -+ pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); -+ return err; -+} -+ -+static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry, -+ umode_t mode, dev_t dev, bool debug) -+{ -+ int err = vfs_mknod(dir, dentry, mode, dev); -+ if (debug) { -+ pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", -+ dentry, mode, dev, err); -+ } -+ return err; -+} -+ -+static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry, -+ const char *oldname, bool debug) -+{ -+ int err = vfs_symlink(dir, dentry, oldname); -+ if (debug) -+ pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); -+ return err; -+} -+ -+static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int err = vfs_setxattr(dentry, name, value, size, flags); -+ pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", -+ dentry, name, (int) size, (char *) value, flags, err); -+ return err; -+} -+ -+static inline int ovl_do_removexattr(struct dentry *dentry, const char *name) -+{ -+ int err = vfs_removexattr(dentry, name); -+ pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); -+ return err; -+} -+ -+static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, -+ struct inode *newdir, struct dentry *newdentry, -+ unsigned int flags) -+{ -+ int err; -+ -+ pr_debug("rename2(%pd2, %pd2, 0x%x)\n", -+ olddentry, newdentry, flags); -+ -+ err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); -+ -+ if (err) { -+ pr_debug("...rename2(%pd2, %pd2, ...) = %i\n", -+ olddentry, newdentry, err); -+ } -+ return err; -+} -+ -+static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) -+{ -+ int err = vfs_whiteout(dir, dentry); -+ pr_debug("whiteout(%pd2) = %i\n", dentry, err); -+ return err; -+} -+ -+enum ovl_path_type ovl_path_type(struct dentry *dentry); -+u64 ovl_dentry_version_get(struct dentry *dentry); -+void ovl_dentry_version_inc(struct dentry *dentry); -+void ovl_path_upper(struct dentry *dentry, struct path *path); -+void ovl_path_lower(struct dentry *dentry, struct path *path); -+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); -+struct dentry *ovl_dentry_upper(struct dentry *dentry); -+struct dentry *ovl_dentry_lower(struct dentry *dentry); -+struct dentry *ovl_dentry_real(struct dentry *dentry); -+struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); -+struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); -+void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); -+struct dentry *ovl_workdir(struct dentry *dentry); -+int ovl_want_write(struct dentry *dentry); -+void ovl_drop_write(struct dentry *dentry); -+bool ovl_dentry_is_opaque(struct dentry *dentry); -+void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); -+bool ovl_is_whiteout(struct dentry *dentry); -+void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); -+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, -+ unsigned int flags); -+struct file *ovl_path_open(struct path *path, int flags); -+ -+struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, -+ struct kstat *stat, const char *link); -+ -+/* readdir.c */ -+extern const struct file_operations ovl_dir_operations; -+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); -+void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); -+void ovl_cache_free(struct list_head *list); -+ -+/* inode.c */ -+int ovl_setattr(struct dentry *dentry, struct iattr *attr); -+int ovl_permission(struct inode *inode, int mask); -+int ovl_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags); -+ssize_t ovl_getxattr(struct dentry *dentry, const char *name, -+ void *value, size_t size); -+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); -+int ovl_removexattr(struct dentry *dentry, const char *name); -+ -+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, -+ struct ovl_entry *oe); -+static inline void ovl_copyattr(struct inode *from, struct inode *to) -+{ -+ to->i_uid = from->i_uid; -+ to->i_gid = from->i_gid; -+} -+ -+/* dir.c */ -+extern const struct inode_operations ovl_dir_inode_operations; -+struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); -+int ovl_create_real(struct inode *dir, struct dentry *newdentry, -+ struct kstat *stat, const char *link, -+ struct dentry *hardlink, bool debug); -+void ovl_cleanup(struct inode *dir, struct dentry *dentry); -+ -+/* copy_up.c */ -+int ovl_copy_up(struct dentry *dentry); -+int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, -+ struct path *lowerpath, struct kstat *stat, -+ struct iattr *attr); -+int ovl_copy_xattr(struct dentry *old, struct dentry *new); -+int ovl_set_attr(struct dentry *upper, struct kstat *stat); -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -new file mode 100644 -index 0000000..c6787f8 ---- /dev/null -+++ b/fs/overlayfs/readdir.c -@@ -0,0 +1,587 @@ -+/* -+ * -+ * Copyright (C) 2011 Novell Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "overlayfs.h" -+ -+struct ovl_cache_entry { -+ const char *name; -+ unsigned int len; -+ unsigned int type; -+ u64 ino; -+ bool is_whiteout; -+ struct list_head l_node; -+ struct rb_node node; -+}; -+ -+struct ovl_dir_cache { -+ long refcount; -+ u64 version; -+ struct list_head entries; -+}; -+ -+struct ovl_readdir_data { -+ struct dir_context ctx; -+ bool is_merge; -+ struct rb_root *root; -+ struct list_head *list; -+ struct list_head *middle; -+ int count; -+ int err; -+}; -+ -+struct ovl_dir_file { -+ bool is_real; -+ bool is_upper; -+ struct ovl_dir_cache *cache; -+ struct ovl_cache_entry cursor; -+ struct file *realfile; -+ struct file *upperfile; -+}; -+ -+static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) -+{ -+ return container_of(n, struct ovl_cache_entry, node); -+} -+ -+static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, -+ const char *name, int len) -+{ -+ struct rb_node *node = root->rb_node; -+ int cmp; -+ -+ while (node) { -+ struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); -+ -+ cmp = strncmp(name, p->name, len); -+ if (cmp > 0) -+ node = p->node.rb_right; -+ else if (cmp < 0 || len < p->len) -+ node = p->node.rb_left; -+ else -+ return p; -+ } -+ -+ return NULL; -+} -+ -+static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, -+ u64 ino, unsigned int d_type) -+{ -+ struct ovl_cache_entry *p; -+ -+ p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL); -+ if (p) { -+ char *name_copy = (char *) (p + 1); -+ memcpy(name_copy, name, len); -+ name_copy[len] = '\0'; -+ p->name = name_copy; -+ p->len = len; -+ p->type = d_type; -+ p->ino = ino; -+ p->is_whiteout = false; -+ } -+ -+ return p; -+} -+ -+static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, -+ const char *name, int len, u64 ino, -+ unsigned int d_type) -+{ -+ struct rb_node **newp = &rdd->root->rb_node; -+ struct rb_node *parent = NULL; -+ struct ovl_cache_entry *p; -+ -+ while (*newp) { -+ int cmp; -+ struct ovl_cache_entry *tmp; -+ -+ parent = *newp; -+ tmp = ovl_cache_entry_from_node(*newp); -+ cmp = strncmp(name, tmp->name, len); -+ if (cmp > 0) -+ newp = &tmp->node.rb_right; -+ else if (cmp < 0 || len < tmp->len) -+ newp = &tmp->node.rb_left; -+ else -+ return 0; -+ } -+ -+ p = ovl_cache_entry_new(name, len, ino, d_type); -+ if (p == NULL) -+ return -ENOMEM; -+ -+ list_add_tail(&p->l_node, rdd->list); -+ rb_link_node(&p->node, parent, newp); -+ rb_insert_color(&p->node, rdd->root); -+ -+ return 0; -+} -+ -+static int ovl_fill_lower(struct ovl_readdir_data *rdd, -+ const char *name, int namelen, -+ loff_t offset, u64 ino, unsigned int d_type) -+{ -+ struct ovl_cache_entry *p; -+ -+ p = ovl_cache_entry_find(rdd->root, name, namelen); -+ if (p) { -+ list_move_tail(&p->l_node, rdd->middle); -+ } else { -+ p = ovl_cache_entry_new(name, namelen, ino, d_type); -+ if (p == NULL) -+ rdd->err = -ENOMEM; -+ else -+ list_add_tail(&p->l_node, rdd->middle); -+ } -+ -+ return rdd->err; -+} -+ -+void ovl_cache_free(struct list_head *list) -+{ -+ struct ovl_cache_entry *p; -+ struct ovl_cache_entry *n; -+ -+ list_for_each_entry_safe(p, n, list, l_node) -+ kfree(p); -+ -+ INIT_LIST_HEAD(list); -+} -+ -+static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) -+{ -+ struct ovl_dir_cache *cache = od->cache; -+ -+ list_del(&od->cursor.l_node); -+ WARN_ON(cache->refcount <= 0); -+ cache->refcount--; -+ if (!cache->refcount) { -+ if (ovl_dir_cache(dentry) == cache) -+ ovl_set_dir_cache(dentry, NULL); -+ -+ ovl_cache_free(&cache->entries); -+ kfree(cache); -+ } -+} -+ -+static int ovl_fill_merge(void *buf, const char *name, int namelen, -+ loff_t offset, u64 ino, unsigned int d_type) -+{ -+ struct ovl_readdir_data *rdd = buf; -+ -+ rdd->count++; -+ if (!rdd->is_merge) -+ return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); -+ else -+ return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); -+} -+ -+static inline int ovl_dir_read(struct path *realpath, -+ struct ovl_readdir_data *rdd) -+{ -+ struct file *realfile; -+ int err; -+ -+ realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); -+ if (IS_ERR(realfile)) -+ return PTR_ERR(realfile); -+ -+ rdd->ctx.pos = 0; -+ do { -+ rdd->count = 0; -+ rdd->err = 0; -+ err = iterate_dir(realfile, &rdd->ctx); -+ if (err >= 0) -+ err = rdd->err; -+ } while (!err && rdd->count); -+ fput(realfile); -+ -+ return err; -+} -+ -+static void ovl_dir_reset(struct file *file) -+{ -+ struct ovl_dir_file *od = file->private_data; -+ struct ovl_dir_cache *cache = od->cache; -+ struct dentry *dentry = file->f_path.dentry; -+ enum ovl_path_type type = ovl_path_type(dentry); -+ -+ if (cache && ovl_dentry_version_get(dentry) != cache->version) { -+ ovl_cache_put(od, dentry); -+ od->cache = NULL; -+ } -+ WARN_ON(!od->is_real && type != OVL_PATH_MERGE); -+ if (od->is_real && type == OVL_PATH_MERGE) -+ od->is_real = false; -+} -+ -+static int ovl_dir_mark_whiteouts(struct dentry *dir, -+ struct ovl_readdir_data *rdd) -+{ -+ struct ovl_cache_entry *p; -+ struct dentry *dentry; -+ const struct cred *old_cred; -+ struct cred *override_cred; -+ -+ override_cred = prepare_creds(); -+ if (!override_cred) { -+ ovl_cache_free(rdd->list); -+ return -ENOMEM; -+ } -+ -+ /* -+ * CAP_DAC_OVERRIDE for lookup -+ */ -+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -+ old_cred = override_creds(override_cred); -+ -+ mutex_lock(&dir->d_inode->i_mutex); -+ list_for_each_entry(p, rdd->list, l_node) { -+ if (!p->name) -+ continue; -+ -+ if (p->type != DT_CHR) -+ continue; -+ -+ dentry = lookup_one_len(p->name, dir, p->len); -+ if (IS_ERR(dentry)) -+ continue; -+ -+ p->is_whiteout = ovl_is_whiteout(dentry); -+ dput(dentry); -+ } -+ mutex_unlock(&dir->d_inode->i_mutex); -+ -+ revert_creds(old_cred); -+ put_cred(override_cred); -+ -+ return 0; -+} -+ -+static inline int ovl_dir_read_merged(struct path *upperpath, -+ struct path *lowerpath, -+ struct list_head *list) -+{ -+ int err; -+ struct rb_root root = RB_ROOT; -+ struct list_head middle; -+ struct ovl_readdir_data rdd = { -+ .ctx.actor = ovl_fill_merge, -+ .list = list, -+ .root = &root, -+ .is_merge = false, -+ }; -+ -+ if (upperpath->dentry) { -+ err = ovl_dir_read(upperpath, &rdd); -+ if (err) -+ goto out; -+ -+ if (lowerpath->dentry) { -+ err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); -+ if (err) -+ goto out; -+ } -+ } -+ if (lowerpath->dentry) { -+ /* -+ * Insert lowerpath entries before upperpath ones, this allows -+ * offsets to be reasonably constant -+ */ -+ list_add(&middle, rdd.list); -+ rdd.middle = &middle; -+ rdd.is_merge = true; -+ err = ovl_dir_read(lowerpath, &rdd); -+ list_del(&middle); -+ } -+out: -+ return err; -+ -+} -+ -+static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) -+{ -+ struct ovl_cache_entry *p; -+ loff_t off = 0; -+ -+ list_for_each_entry(p, &od->cache->entries, l_node) { -+ if (!p->name) -+ continue; -+ if (off >= pos) -+ break; -+ off++; -+ } -+ list_move_tail(&od->cursor.l_node, &p->l_node); -+} -+ -+static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) -+{ -+ int res; -+ struct path lowerpath; -+ struct path upperpath; -+ struct ovl_dir_cache *cache; -+ -+ cache = ovl_dir_cache(dentry); -+ if (cache && ovl_dentry_version_get(dentry) == cache->version) { -+ cache->refcount++; -+ return cache; -+ } -+ ovl_set_dir_cache(dentry, NULL); -+ -+ cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); -+ if (!cache) -+ return ERR_PTR(-ENOMEM); -+ -+ cache->refcount = 1; -+ INIT_LIST_HEAD(&cache->entries); -+ -+ ovl_path_lower(dentry, &lowerpath); -+ ovl_path_upper(dentry, &upperpath); -+ -+ res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); -+ if (res) { -+ ovl_cache_free(&cache->entries); -+ kfree(cache); -+ return ERR_PTR(res); -+ } -+ -+ cache->version = ovl_dentry_version_get(dentry); -+ ovl_set_dir_cache(dentry, cache); -+ -+ return cache; -+} -+ -+static int ovl_iterate(struct file *file, struct dir_context *ctx) -+{ -+ struct ovl_dir_file *od = file->private_data; -+ struct dentry *dentry = file->f_path.dentry; -+ -+ if (!ctx->pos) -+ ovl_dir_reset(file); -+ -+ if (od->is_real) -+ return iterate_dir(od->realfile, ctx); -+ -+ if (!od->cache) { -+ struct ovl_dir_cache *cache; -+ -+ cache = ovl_cache_get(dentry); -+ if (IS_ERR(cache)) -+ return PTR_ERR(cache); -+ -+ od->cache = cache; -+ ovl_seek_cursor(od, ctx->pos); -+ } -+ -+ while (od->cursor.l_node.next != &od->cache->entries) { -+ struct ovl_cache_entry *p; -+ -+ p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); -+ /* Skip cursors */ -+ if (p->name) { -+ if (!p->is_whiteout) { -+ if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) -+ break; -+ } -+ ctx->pos++; -+ } -+ list_move(&od->cursor.l_node, &p->l_node); -+ } -+ return 0; -+} -+ -+static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) -+{ -+ loff_t res; -+ struct ovl_dir_file *od = file->private_data; -+ -+ mutex_lock(&file_inode(file)->i_mutex); -+ if (!file->f_pos) -+ ovl_dir_reset(file); -+ -+ if (od->is_real) { -+ res = vfs_llseek(od->realfile, offset, origin); -+ file->f_pos = od->realfile->f_pos; -+ } else { -+ res = -EINVAL; -+ -+ switch (origin) { -+ case SEEK_CUR: -+ offset += file->f_pos; -+ break; -+ case SEEK_SET: -+ break; -+ default: -+ goto out_unlock; -+ } -+ if (offset < 0) -+ goto out_unlock; -+ -+ if (offset != file->f_pos) { -+ file->f_pos = offset; -+ if (od->cache) -+ ovl_seek_cursor(od, offset); -+ } -+ res = offset; -+ } -+out_unlock: -+ mutex_unlock(&file_inode(file)->i_mutex); -+ -+ return res; -+} -+ -+static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, -+ int datasync) -+{ -+ struct ovl_dir_file *od = file->private_data; -+ struct dentry *dentry = file->f_path.dentry; -+ struct file *realfile = od->realfile; -+ -+ /* -+ * Need to check if we started out being a lower dir, but got copied up -+ */ -+ if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { -+ struct inode *inode = file_inode(file); -+ -+ mutex_lock(&inode->i_mutex); -+ realfile = od->upperfile; -+ if (!realfile) { -+ struct path upperpath; -+ -+ ovl_path_upper(dentry, &upperpath); -+ realfile = ovl_path_open(&upperpath, O_RDONLY); -+ if (IS_ERR(realfile)) { -+ mutex_unlock(&inode->i_mutex); -+ return PTR_ERR(realfile); -+ } -+ od->upperfile = realfile; -+ } -+ mutex_unlock(&inode->i_mutex); -+ } -+ -+ return vfs_fsync_range(realfile, start, end, datasync); -+} -+ -+static int ovl_dir_release(struct inode *inode, struct file *file) -+{ -+ struct ovl_dir_file *od = file->private_data; -+ -+ if (od->cache) { -+ mutex_lock(&inode->i_mutex); -+ ovl_cache_put(od, file->f_path.dentry); -+ mutex_unlock(&inode->i_mutex); -+ } -+ fput(od->realfile); -+ if (od->upperfile) -+ fput(od->upperfile); -+ kfree(od); -+ -+ return 0; -+} -+ -+static int ovl_dir_open(struct inode *inode, struct file *file) -+{ -+ struct path realpath; -+ struct file *realfile; -+ struct ovl_dir_file *od; -+ enum ovl_path_type type; -+ -+ od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); -+ if (!od) -+ return -ENOMEM; -+ -+ type = ovl_path_real(file->f_path.dentry, &realpath); -+ realfile = ovl_path_open(&realpath, file->f_flags); -+ if (IS_ERR(realfile)) { -+ kfree(od); -+ return PTR_ERR(realfile); -+ } -+ INIT_LIST_HEAD(&od->cursor.l_node); -+ od->realfile = realfile; -+ od->is_real = (type != OVL_PATH_MERGE); -+ od->is_upper = (type != OVL_PATH_LOWER); -+ file->private_data = od; -+ -+ return 0; -+} -+ -+const struct file_operations ovl_dir_operations = { -+ .read = generic_read_dir, -+ .open = ovl_dir_open, -+ .iterate = ovl_iterate, -+ .llseek = ovl_dir_llseek, -+ .fsync = ovl_dir_fsync, -+ .release = ovl_dir_release, -+}; -+ -+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) -+{ -+ int err; -+ struct path lowerpath; -+ struct path upperpath; -+ struct ovl_cache_entry *p; -+ -+ ovl_path_upper(dentry, &upperpath); -+ ovl_path_lower(dentry, &lowerpath); -+ -+ err = ovl_dir_read_merged(&upperpath, &lowerpath, list); -+ if (err) -+ return err; -+ -+ err = 0; -+ -+ list_for_each_entry(p, list, l_node) { -+ if (p->is_whiteout) -+ continue; -+ -+ if (p->name[0] == '.') { -+ if (p->len == 1) -+ continue; -+ if (p->len == 2 && p->name[1] == '.') -+ continue; -+ } -+ err = -ENOTEMPTY; -+ break; -+ } -+ -+ return err; -+} -+ -+void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) -+{ -+ struct ovl_cache_entry *p; -+ -+ mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT); -+ list_for_each_entry(p, list, l_node) { -+ struct dentry *dentry; -+ -+ if (!p->is_whiteout) -+ continue; -+ -+ dentry = lookup_one_len(p->name, upper, p->len); -+ if (IS_ERR(dentry)) { -+ pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", -+ upper->d_name.name, p->len, p->name, -+ (int) PTR_ERR(dentry)); -+ continue; -+ } -+ ovl_cleanup(upper->d_inode, dentry); -+ dput(dentry); -+ } -+ mutex_unlock(&upper->d_inode->i_mutex); -+} -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -new file mode 100644 -index 0000000..227710a ---- /dev/null -+++ b/fs/overlayfs/super.c -@@ -0,0 +1,727 @@ -+/* -+ * -+ * Copyright (C) 2011 Novell Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "overlayfs.h" -+ -+MODULE_AUTHOR("Miklos Szeredi "); -+MODULE_DESCRIPTION("Overlay filesystem"); -+MODULE_LICENSE("GPL"); -+ -+/* private information held for overlayfs's superblock */ -+struct ovl_fs { -+ struct vfsmount *upper_mnt; -+ struct vfsmount *lower_mnt; -+ struct dentry *workdir; -+}; -+ -+struct ovl_dir_cache; -+ -+/* private information held for every overlayfs dentry */ -+struct ovl_entry { -+ struct dentry *__upperdentry; -+ struct dentry *lowerdentry; -+ struct ovl_dir_cache *cache; -+ union { -+ struct { -+ u64 version; -+ bool opaque; -+ }; -+ struct rcu_head rcu; -+ }; -+}; -+ -+const char *ovl_opaque_xattr = "trusted.overlay.opaque"; -+ -+ -+enum ovl_path_type ovl_path_type(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ if (oe->__upperdentry) { -+ if (oe->lowerdentry) { -+ if (S_ISDIR(dentry->d_inode->i_mode)) -+ return OVL_PATH_MERGE; -+ else -+ return OVL_PATH_UPPER; -+ } else { -+ if (oe->opaque) -+ return OVL_PATH_UPPER; -+ else -+ return OVL_PATH_PURE_UPPER; -+ } -+ } else { -+ return OVL_PATH_LOWER; -+ } -+} -+ -+static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) -+{ -+ struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); -+ /* -+ * Make sure to order reads to upperdentry wrt ovl_dentry_update() -+ */ -+ smp_read_barrier_depends(); -+ return upperdentry; -+} -+ -+void ovl_path_upper(struct dentry *dentry, struct path *path) -+{ -+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ path->mnt = ofs->upper_mnt; -+ path->dentry = ovl_upperdentry_dereference(oe); -+} -+ -+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) -+{ -+ -+ enum ovl_path_type type = ovl_path_type(dentry); -+ -+ if (type == OVL_PATH_LOWER) -+ ovl_path_lower(dentry, path); -+ else -+ ovl_path_upper(dentry, path); -+ -+ return type; -+} -+ -+struct dentry *ovl_dentry_upper(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ return ovl_upperdentry_dereference(oe); -+} -+ -+struct dentry *ovl_dentry_lower(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ return oe->lowerdentry; -+} -+ -+struct dentry *ovl_dentry_real(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ struct dentry *realdentry; -+ -+ realdentry = ovl_upperdentry_dereference(oe); -+ if (!realdentry) -+ realdentry = oe->lowerdentry; -+ -+ return realdentry; -+} -+ -+struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) -+{ -+ struct dentry *realdentry; -+ -+ realdentry = ovl_upperdentry_dereference(oe); -+ if (realdentry) { -+ *is_upper = true; -+ } else { -+ realdentry = oe->lowerdentry; -+ *is_upper = false; -+ } -+ return realdentry; -+} -+ -+struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ return oe->cache; -+} -+ -+void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ oe->cache = cache; -+} -+ -+void ovl_path_lower(struct dentry *dentry, struct path *path) -+{ -+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ path->mnt = ofs->lower_mnt; -+ path->dentry = oe->lowerdentry; -+} -+ -+int ovl_want_write(struct dentry *dentry) -+{ -+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; -+ return mnt_want_write(ofs->upper_mnt); -+} -+ -+void ovl_drop_write(struct dentry *dentry) -+{ -+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; -+ mnt_drop_write(ofs->upper_mnt); -+} -+ -+struct dentry *ovl_workdir(struct dentry *dentry) -+{ -+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; -+ return ofs->workdir; -+} -+ -+bool ovl_dentry_is_opaque(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ return oe->opaque; -+} -+ -+void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ oe->opaque = opaque; -+} -+ -+void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); -+ WARN_ON(oe->__upperdentry); -+ BUG_ON(!upperdentry->d_inode); -+ /* -+ * Make sure upperdentry is consistent before making it visible to -+ * ovl_upperdentry_dereference(). -+ */ -+ smp_wmb(); -+ oe->__upperdentry = upperdentry; -+} -+ -+void ovl_dentry_version_inc(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); -+ oe->version++; -+} -+ -+u64 ovl_dentry_version_get(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); -+ return oe->version; -+} -+ -+bool ovl_is_whiteout(struct dentry *dentry) -+{ -+ struct inode *inode = dentry->d_inode; -+ -+ return inode && IS_WHITEOUT(inode); -+} -+ -+static bool ovl_is_opaquedir(struct dentry *dentry) -+{ -+ int res; -+ char val; -+ struct inode *inode = dentry->d_inode; -+ -+ if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) -+ return false; -+ -+ res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1); -+ if (res == 1 && val == 'y') -+ return true; -+ -+ return false; -+} -+ -+static void ovl_dentry_release(struct dentry *dentry) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ if (oe) { -+ dput(oe->__upperdentry); -+ dput(oe->lowerdentry); -+ kfree_rcu(oe, rcu); -+ } -+} -+ -+static const struct dentry_operations ovl_dentry_operations = { -+ .d_release = ovl_dentry_release, -+}; -+ -+static struct ovl_entry *ovl_alloc_entry(void) -+{ -+ return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); -+} -+ -+static inline struct dentry *ovl_lookup_real(struct dentry *dir, -+ struct qstr *name) -+{ -+ struct dentry *dentry; -+ -+ mutex_lock(&dir->d_inode->i_mutex); -+ dentry = lookup_one_len(name->name, dir, name->len); -+ mutex_unlock(&dir->d_inode->i_mutex); -+ -+ if (IS_ERR(dentry)) { -+ if (PTR_ERR(dentry) == -ENOENT) -+ dentry = NULL; -+ } else if (!dentry->d_inode) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+} -+ -+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, -+ unsigned int flags) -+{ -+ struct ovl_entry *oe; -+ struct dentry *upperdir; -+ struct dentry *lowerdir; -+ struct dentry *upperdentry = NULL; -+ struct dentry *lowerdentry = NULL; -+ struct inode *inode = NULL; -+ int err; -+ -+ err = -ENOMEM; -+ oe = ovl_alloc_entry(); -+ if (!oe) -+ goto out; -+ -+ upperdir = ovl_dentry_upper(dentry->d_parent); -+ lowerdir = ovl_dentry_lower(dentry->d_parent); -+ -+ if (upperdir) { -+ upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); -+ err = PTR_ERR(upperdentry); -+ if (IS_ERR(upperdentry)) -+ goto out_put_dir; -+ -+ if (lowerdir && upperdentry) { -+ if (ovl_is_whiteout(upperdentry)) { -+ dput(upperdentry); -+ upperdentry = NULL; -+ oe->opaque = true; -+ } else if (ovl_is_opaquedir(upperdentry)) { -+ oe->opaque = true; -+ } -+ } -+ } -+ if (lowerdir && !oe->opaque) { -+ lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); -+ err = PTR_ERR(lowerdentry); -+ if (IS_ERR(lowerdentry)) -+ goto out_dput_upper; -+ } -+ -+ if (lowerdentry && upperdentry && -+ (!S_ISDIR(upperdentry->d_inode->i_mode) || -+ !S_ISDIR(lowerdentry->d_inode->i_mode))) { -+ dput(lowerdentry); -+ lowerdentry = NULL; -+ oe->opaque = true; -+ } -+ -+ if (lowerdentry || upperdentry) { -+ struct dentry *realdentry; -+ -+ realdentry = upperdentry ? upperdentry : lowerdentry; -+ err = -ENOMEM; -+ inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, -+ oe); -+ if (!inode) -+ goto out_dput; -+ ovl_copyattr(realdentry->d_inode, inode); -+ } -+ -+ oe->__upperdentry = upperdentry; -+ oe->lowerdentry = lowerdentry; -+ -+ dentry->d_fsdata = oe; -+ d_add(dentry, inode); -+ -+ return NULL; -+ -+out_dput: -+ dput(lowerdentry); -+out_dput_upper: -+ dput(upperdentry); -+out_put_dir: -+ kfree(oe); -+out: -+ return ERR_PTR(err); -+} -+ -+struct file *ovl_path_open(struct path *path, int flags) -+{ -+ return dentry_open(path, flags, current_cred()); -+} -+ -+static void ovl_put_super(struct super_block *sb) -+{ -+ struct ovl_fs *ufs = sb->s_fs_info; -+ -+ dput(ufs->workdir); -+ mntput(ufs->upper_mnt); -+ mntput(ufs->lower_mnt); -+ -+ kfree(ufs); -+} -+ -+static const struct super_operations ovl_super_operations = { -+ .put_super = ovl_put_super, -+}; -+ -+struct ovl_config { -+ char *lowerdir; -+ char *upperdir; -+ char *workdir; -+}; -+ -+enum { -+ OPT_LOWERDIR, -+ OPT_UPPERDIR, -+ OPT_WORKDIR, -+ OPT_ERR, -+}; -+ -+static const match_table_t ovl_tokens = { -+ {OPT_LOWERDIR, "lowerdir=%s"}, -+ {OPT_UPPERDIR, "upperdir=%s"}, -+ {OPT_WORKDIR, "workdir=%s"}, -+ {OPT_ERR, NULL} -+}; -+ -+static int ovl_parse_opt(char *opt, struct ovl_config *config) -+{ -+ char *p; -+ -+ config->upperdir = NULL; -+ config->lowerdir = NULL; -+ config->workdir = NULL; -+ -+ while ((p = strsep(&opt, ",")) != NULL) { -+ int token; -+ substring_t args[MAX_OPT_ARGS]; -+ -+ if (!*p) -+ continue; -+ -+ token = match_token(p, ovl_tokens, args); -+ switch (token) { -+ case OPT_UPPERDIR: -+ kfree(config->upperdir); -+ config->upperdir = match_strdup(&args[0]); -+ if (!config->upperdir) -+ return -ENOMEM; -+ break; -+ -+ case OPT_LOWERDIR: -+ kfree(config->lowerdir); -+ config->lowerdir = match_strdup(&args[0]); -+ if (!config->lowerdir) -+ return -ENOMEM; -+ break; -+ -+ case OPT_WORKDIR: -+ kfree(config->workdir); -+ config->workdir = match_strdup(&args[0]); -+ if (!config->workdir) -+ return -ENOMEM; -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ } -+ return 0; -+} -+ -+#define OVL_WORKDIR_NAME "work" -+ -+static struct dentry *ovl_workdir_create(struct vfsmount *mnt, -+ struct dentry *dentry) -+{ -+ struct inode *dir = dentry->d_inode; -+ struct dentry *work; -+ int err; -+ bool retried = false; -+ -+ err = mnt_want_write(mnt); -+ if (err) -+ return ERR_PTR(err); -+ -+ mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); -+retry: -+ work = lookup_one_len(OVL_WORKDIR_NAME, dentry, -+ strlen(OVL_WORKDIR_NAME)); -+ -+ if (!IS_ERR(work)) { -+ struct kstat stat = { -+ .mode = S_IFDIR | 0, -+ }; -+ -+ if (work->d_inode) { -+ err = -EEXIST; -+ if (retried) -+ goto out_dput; -+ -+ retried = true; -+ ovl_cleanup(dir, work); -+ dput(work); -+ goto retry; -+ } -+ -+ err = ovl_create_real(dir, work, &stat, NULL, NULL, true); -+ if (err) -+ goto out_dput; -+ } -+out_unlock: -+ mutex_unlock(&dir->i_mutex); -+ mnt_drop_write(mnt); -+ -+ return work; -+ -+out_dput: -+ dput(work); -+ work = ERR_PTR(err); -+ goto out_unlock; -+} -+ -+static int ovl_mount_dir(const char *name, struct path *path) -+{ -+ int err; -+ -+ err = kern_path(name, LOOKUP_FOLLOW, path); -+ if (err) { -+ pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); -+ err = -EINVAL; -+ } -+ return err; -+} -+ -+static bool ovl_is_allowed_fs_type(struct dentry *root) -+{ -+ const struct dentry_operations *dop = root->d_op; -+ -+ /* -+ * We don't support: -+ * - automount filesystems -+ * - filesystems with revalidate (FIXME for lower layer) -+ * - filesystems with case insensitive names -+ */ -+ if (dop && -+ (dop->d_manage || dop->d_automount || -+ dop->d_revalidate || dop->d_weak_revalidate || -+ dop->d_compare || dop->d_hash)) { -+ return false; -+ } -+ return true; -+} -+ -+/* Workdir should not be subdir of upperdir and vice versa */ -+static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) -+{ -+ bool ok = false; -+ -+ if (workdir != upperdir) { -+ ok = (lock_rename(workdir, upperdir) == NULL); -+ unlock_rename(workdir, upperdir); -+ } -+ return ok; -+} -+ -+static int ovl_fill_super(struct super_block *sb, void *data, int silent) -+{ -+ struct path lowerpath; -+ struct path upperpath; -+ struct path workpath; -+ struct inode *root_inode; -+ struct dentry *root_dentry; -+ struct ovl_entry *oe; -+ struct ovl_fs *ufs; -+ struct ovl_config config; -+ int err; -+ -+ err = ovl_parse_opt((char *) data, &config); -+ if (err) -+ goto out; -+ -+ /* FIXME: workdir is not needed for a R/O mount */ -+ err = -EINVAL; -+ if (!config.upperdir || !config.lowerdir || !config.workdir) { -+ pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); -+ goto out_free_config; -+ } -+ -+ err = -ENOMEM; -+ ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL); -+ if (!ufs) -+ goto out_free_config; -+ -+ oe = ovl_alloc_entry(); -+ if (oe == NULL) -+ goto out_free_ufs; -+ -+ err = ovl_mount_dir(config.upperdir, &upperpath); -+ if (err) -+ goto out_free_oe; -+ -+ err = ovl_mount_dir(config.lowerdir, &lowerpath); -+ if (err) -+ goto out_put_upperpath; -+ -+ err = ovl_mount_dir(config.workdir, &workpath); -+ if (err) -+ goto out_put_lowerpath; -+ -+ err = -EINVAL; -+ if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || -+ !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || -+ !S_ISDIR(workpath.dentry->d_inode->i_mode)) { -+ pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); -+ goto out_put_workpath; -+ } -+ -+ if (upperpath.mnt != workpath.mnt) { -+ pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); -+ goto out_put_workpath; -+ } -+ if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { -+ pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); -+ goto out_put_workpath; -+ } -+ -+ if (!ovl_is_allowed_fs_type(upperpath.dentry)) { -+ pr_err("overlayfs: filesystem of upperdir is not supported\n"); -+ goto out_put_workpath; -+ } -+ -+ if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { -+ pr_err("overlayfs: filesystem of lowerdir is not supported\n"); -+ goto out_put_workpath; -+ } -+ -+ ufs->upper_mnt = clone_private_mount(&upperpath); -+ err = PTR_ERR(ufs->upper_mnt); -+ if (IS_ERR(ufs->upper_mnt)) { -+ pr_err("overlayfs: failed to clone upperpath\n"); -+ goto out_put_workpath; -+ } -+ -+ ufs->lower_mnt = clone_private_mount(&lowerpath); -+ err = PTR_ERR(ufs->lower_mnt); -+ if (IS_ERR(ufs->lower_mnt)) { -+ pr_err("overlayfs: failed to clone lowerpath\n"); -+ goto out_put_upper_mnt; -+ } -+ -+ ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); -+ err = PTR_ERR(ufs->workdir); -+ if (IS_ERR(ufs->workdir)) { -+ pr_err("overlayfs: failed to create directory %s/%s\n", -+ config.workdir, OVL_WORKDIR_NAME); -+ goto out_put_lower_mnt; -+ } -+ -+ /* -+ * Make lower_mnt R/O. That way fchmod/fchown on lower file -+ * will fail instead of modifying lower fs. -+ */ -+ ufs->lower_mnt->mnt_flags |= MNT_READONLY; -+ -+ /* If the upper fs is r/o, we mark overlayfs r/o too */ -+ if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) -+ sb->s_flags |= MS_RDONLY; -+ -+ sb->s_d_op = &ovl_dentry_operations; -+ -+ err = -ENOMEM; -+ root_inode = ovl_new_inode(sb, S_IFDIR, oe); -+ if (!root_inode) -+ goto out_put_workdir; -+ -+ root_dentry = d_make_root(root_inode); -+ if (!root_dentry) -+ goto out_put_workdir; -+ -+ mntput(upperpath.mnt); -+ mntput(lowerpath.mnt); -+ path_put(&workpath); -+ -+ oe->__upperdentry = upperpath.dentry; -+ oe->lowerdentry = lowerpath.dentry; -+ -+ root_dentry->d_fsdata = oe; -+ -+ sb->s_op = &ovl_super_operations; -+ sb->s_root = root_dentry; -+ sb->s_fs_info = ufs; -+ -+ return 0; -+ -+out_put_workdir: -+ dput(ufs->workdir); -+out_put_lower_mnt: -+ mntput(ufs->lower_mnt); -+out_put_upper_mnt: -+ mntput(ufs->upper_mnt); -+out_put_workpath: -+ path_put(&workpath); -+out_put_lowerpath: -+ path_put(&lowerpath); -+out_put_upperpath: -+ path_put(&upperpath); -+out_free_oe: -+ kfree(oe); -+out_free_ufs: -+ kfree(ufs); -+out_free_config: -+ kfree(config.lowerdir); -+ kfree(config.upperdir); -+ kfree(config.workdir); -+out: -+ return err; -+} -+ -+static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, -+ const char *dev_name, void *raw_data) -+{ -+ return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); -+} -+ -+static struct file_system_type ovl_fs_type = { -+ .owner = THIS_MODULE, -+ .name = "overlayfs", -+ .mount = ovl_mount, -+ .kill_sb = kill_anon_super, -+}; -+MODULE_ALIAS_FS("overlayfs"); -+ -+static int __init ovl_init(void) -+{ -+ return register_filesystem(&ovl_fs_type); -+} -+ -+static void __exit ovl_exit(void) -+{ -+ unregister_filesystem(&ovl_fs_type); -+} -+ -+module_init(ovl_init); -+module_exit(ovl_exit); --- -2.7.4 - - -From a38696f51b951dddbba71e7a3cabb47d9347fa06 Mon Sep 17 00:00:00 2001 -From: Andy Whitcroft -Date: Fri, 24 Oct 2014 00:14:38 +0200 -Subject: [PATCH 21/73] overlayfs: add statfs support - -Add support for statfs to the overlayfs filesystem. As the upper layer -is the target of all write operations assume that the space in that -filesystem is the space in the overlayfs. There will be some inaccuracy as -overwriting a file will copy it up and consume space we were not expecting, -but it is better than nothing. - -Use the upper layer dentry and mount from the overlayfs root inode, -passing the statfs call to that filesystem. - -Signed-off-by: Andy Whitcroft -Signed-off-by: Miklos Szeredi -(cherry picked from commit cc2596392af3b1404421aaef828a255303c46f93) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 227710a..aaf562b 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -16,17 +16,21 @@ - #include - #include - #include -+#include - #include "overlayfs.h" - - MODULE_AUTHOR("Miklos Szeredi "); - MODULE_DESCRIPTION("Overlay filesystem"); - MODULE_LICENSE("GPL"); - -+#define OVERLAYFS_SUPER_MAGIC 0x794c764f -+ - /* private information held for overlayfs's superblock */ - struct ovl_fs { - struct vfsmount *upper_mnt; - struct vfsmount *lower_mnt; - struct dentry *workdir; -+ long lower_namelen; - }; - - struct ovl_dir_cache; -@@ -383,8 +387,35 @@ static void ovl_put_super(struct super_block *sb) - kfree(ufs); - } - -+/** -+ * ovl_statfs -+ * @sb: The overlayfs super block -+ * @buf: The struct kstatfs to fill in with stats -+ * -+ * Get the filesystem statistics. As writes always target the upper layer -+ * filesystem pass the statfs to the same filesystem. -+ */ -+static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) -+{ -+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info; -+ struct dentry *root_dentry = dentry->d_sb->s_root; -+ struct path path; -+ int err; -+ -+ ovl_path_upper(root_dentry, &path); -+ -+ err = vfs_statfs(&path, buf); -+ if (!err) { -+ buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); -+ buf->f_type = OVERLAYFS_SUPER_MAGIC; -+ } -+ -+ return err; -+} -+ - static const struct super_operations ovl_super_operations = { - .put_super = ovl_put_super, -+ .statfs = ovl_statfs, - }; - - struct ovl_config { -@@ -556,6 +587,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - struct ovl_entry *oe; - struct ovl_fs *ufs; - struct ovl_config config; -+ struct kstatfs statfs; - int err; - - err = ovl_parse_opt((char *) data, &config); -@@ -617,6 +649,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - goto out_put_workpath; - } - -+ err = vfs_statfs(&lowerpath, &statfs); -+ if (err) { -+ pr_err("overlayfs: statfs failed on lowerpath\n"); -+ goto out_put_workpath; -+ } -+ ufs->lower_namelen = statfs.f_namelen; -+ - ufs->upper_mnt = clone_private_mount(&upperpath); - err = PTR_ERR(ufs->upper_mnt); - if (IS_ERR(ufs->upper_mnt)) { -@@ -669,6 +708,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - - root_dentry->d_fsdata = oe; - -+ sb->s_magic = OVERLAYFS_SUPER_MAGIC; - sb->s_op = &ovl_super_operations; - sb->s_root = root_dentry; - sb->s_fs_info = ufs; --- -2.7.4 - - -From 2428e59706e6221ccebeee9c79870988485e2b06 Mon Sep 17 00:00:00 2001 -From: Erez Zadok -Date: Fri, 24 Oct 2014 00:14:38 +0200 -Subject: [PATCH 22/73] overlayfs: implement show_options - -This is useful because of the stacking nature of overlayfs. Users like to -find out (via /proc/mounts) which lower/upper directory were used at mount -time. - -AV: even failing ovl_parse_opt() could've done some kstrdup() -AV: failure of ovl_alloc_entry() should end up with ENOMEM, not EINVAL - -Signed-off-by: Erez Zadok -Signed-off-by: Miklos Szeredi -(cherry picked from commit f45827e84186af152492c6d0dcf4105b4a605f9b) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 76 +++++++++++++++++++++++++++++++++------------------- - 1 file changed, 48 insertions(+), 28 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index aaf562b..7dcc24e 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include "overlayfs.h" - - MODULE_AUTHOR("Miklos Szeredi "); -@@ -25,12 +26,20 @@ MODULE_LICENSE("GPL"); - - #define OVERLAYFS_SUPER_MAGIC 0x794c764f - -+struct ovl_config { -+ char *lowerdir; -+ char *upperdir; -+ char *workdir; -+}; -+ - /* private information held for overlayfs's superblock */ - struct ovl_fs { - struct vfsmount *upper_mnt; - struct vfsmount *lower_mnt; - struct dentry *workdir; - long lower_namelen; -+ /* pathnames of lower and upper dirs, for show_options */ -+ struct ovl_config config; - }; - - struct ovl_dir_cache; -@@ -384,6 +393,9 @@ static void ovl_put_super(struct super_block *sb) - mntput(ufs->upper_mnt); - mntput(ufs->lower_mnt); - -+ kfree(ufs->config.lowerdir); -+ kfree(ufs->config.upperdir); -+ kfree(ufs->config.workdir); - kfree(ufs); - } - -@@ -413,15 +425,27 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) - return err; - } - -+/** -+ * ovl_show_options -+ * -+ * Prints the mount options for a given superblock. -+ * Returns zero; does not fail. -+ */ -+static int ovl_show_options(struct seq_file *m, struct dentry *dentry) -+{ -+ struct super_block *sb = dentry->d_sb; -+ struct ovl_fs *ufs = sb->s_fs_info; -+ -+ seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); -+ seq_printf(m, ",upperdir=%s", ufs->config.upperdir); -+ seq_printf(m, ",workdir=%s", ufs->config.workdir); -+ return 0; -+} -+ - static const struct super_operations ovl_super_operations = { - .put_super = ovl_put_super, - .statfs = ovl_statfs, --}; -- --struct ovl_config { -- char *lowerdir; -- char *upperdir; -- char *workdir; -+ .show_options = ovl_show_options, - }; - - enum { -@@ -442,10 +466,6 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) - { - char *p; - -- config->upperdir = NULL; -- config->lowerdir = NULL; -- config->workdir = NULL; -- - while ((p = strsep(&opt, ",")) != NULL) { - int token; - substring_t args[MAX_OPT_ARGS]; -@@ -586,39 +606,40 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - struct dentry *root_dentry; - struct ovl_entry *oe; - struct ovl_fs *ufs; -- struct ovl_config config; - struct kstatfs statfs; - int err; - -- err = ovl_parse_opt((char *) data, &config); -- if (err) -+ err = -ENOMEM; -+ ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); -+ if (!ufs) - goto out; - -+ err = ovl_parse_opt((char *) data, &ufs->config); -+ if (err) -+ goto out_free_config; -+ - /* FIXME: workdir is not needed for a R/O mount */ - err = -EINVAL; -- if (!config.upperdir || !config.lowerdir || !config.workdir) { -+ if (!ufs->config.upperdir || !ufs->config.lowerdir || -+ !ufs->config.workdir) { - pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); - goto out_free_config; - } - - err = -ENOMEM; -- ufs = kmalloc(sizeof(struct ovl_fs), GFP_KERNEL); -- if (!ufs) -- goto out_free_config; -- - oe = ovl_alloc_entry(); - if (oe == NULL) -- goto out_free_ufs; -+ goto out_free_config; - -- err = ovl_mount_dir(config.upperdir, &upperpath); -+ err = ovl_mount_dir(ufs->config.upperdir, &upperpath); - if (err) - goto out_free_oe; - -- err = ovl_mount_dir(config.lowerdir, &lowerpath); -+ err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); - if (err) - goto out_put_upperpath; - -- err = ovl_mount_dir(config.workdir, &workpath); -+ err = ovl_mount_dir(ufs->config.workdir, &workpath); - if (err) - goto out_put_lowerpath; - -@@ -674,7 +695,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - err = PTR_ERR(ufs->workdir); - if (IS_ERR(ufs->workdir)) { - pr_err("overlayfs: failed to create directory %s/%s\n", -- config.workdir, OVL_WORKDIR_NAME); -+ ufs->config.workdir, OVL_WORKDIR_NAME); - goto out_put_lower_mnt; - } - -@@ -729,12 +750,11 @@ out_put_upperpath: - path_put(&upperpath); - out_free_oe: - kfree(oe); --out_free_ufs: -- kfree(ufs); - out_free_config: -- kfree(config.lowerdir); -- kfree(config.upperdir); -- kfree(config.workdir); -+ kfree(ufs->config.lowerdir); -+ kfree(ufs->config.upperdir); -+ kfree(ufs->config.workdir); -+ kfree(ufs); - out: - return err; - } --- -2.7.4 - - -From 8f963c31b87794a5c16a6ea155f8001cdd1d93c7 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 24 Oct 2014 00:14:39 +0200 -Subject: [PATCH 23/73] fs: limit filesystem stacking depth - -Add a simple read-only counter to super_block that indicates how deep this -is in the stack of filesystems. Previously ecryptfs was the only stackable -filesystem and it explicitly disallowed multiple layers of itself. - -Overlayfs, however, can be stacked recursively and also may be stacked -on top of ecryptfs or vice versa. - -To limit the kernel stack usage we must limit the depth of the -filesystem stack. Initially the limit is set to 2. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 69c433ed2ecd2d3264efd7afec4439524b319121) -Signed-off-by: Alex Shi ---- - fs/ecryptfs/main.c | 7 +++++++ - fs/overlayfs/super.c | 9 +++++++++ - include/linux/fs.h | 11 +++++++++++ - 3 files changed, 27 insertions(+) - -diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c -index 34eb843..d9eb84b 100644 ---- a/fs/ecryptfs/main.c -+++ b/fs/ecryptfs/main.c -@@ -576,6 +576,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags - s->s_maxbytes = path.dentry->d_sb->s_maxbytes; - s->s_blocksize = path.dentry->d_sb->s_blocksize; - s->s_magic = ECRYPTFS_SUPER_MAGIC; -+ s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; -+ -+ rc = -EINVAL; -+ if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { -+ pr_err("eCryptfs: maximum fs stacking depth exceeded\n"); -+ goto out_free; -+ } - - inode = ecryptfs_get_inode(path.dentry->d_inode, s); - rc = PTR_ERR(inode); -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 7dcc24e..08b704c 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -677,6 +677,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - } - ufs->lower_namelen = statfs.f_namelen; - -+ sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, -+ lowerpath.mnt->mnt_sb->s_stack_depth) + 1; -+ -+ err = -EINVAL; -+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { -+ pr_err("overlayfs: maximum fs stacking depth exceeded\n"); -+ goto out_put_workpath; -+ } -+ - ufs->upper_mnt = clone_private_mount(&upperpath); - err = PTR_ERR(ufs->upper_mnt); - if (IS_ERR(ufs->upper_mnt)) { -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 18492c8..429af8d 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -256,6 +256,12 @@ struct iattr { - */ - #include - -+/* -+ * Maximum number of layers of fs stack. Needs to be limited to -+ * prevent kernel stack overflow -+ */ -+#define FILESYSTEM_MAX_STACK_DEPTH 2 -+ - /** - * enum positive_aop_returns - aop return codes with specific semantics - * -@@ -1336,6 +1342,11 @@ struct super_block { - struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; - struct list_lru s_inode_lru ____cacheline_aligned_in_smp; - struct rcu_head rcu; -+ -+ /* -+ * Indicates how deep in a filesystem stack this SB is -+ */ -+ int s_stack_depth; - }; - - extern struct timespec current_fs_time(struct super_block *sb); --- -2.7.4 - - -From a5613f10c5230eb50a863caf596774c50ec8f3e9 Mon Sep 17 00:00:00 2001 -From: Al Viro -Date: Thu, 23 Oct 2014 22:56:05 -0400 -Subject: [PATCH 24/73] overlayfs: don't hold ->i_mutex over opening the real - directory - -just use it to serialize the assignment - -Signed-off-by: Al Viro -(cherry picked from commit 3d268c9b136f51385f9d041f3f2424501b257388) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 19 +++++++++++++------ - 1 file changed, 13 insertions(+), 6 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index c6787f8..b7d9fb0 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -458,20 +458,27 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, - if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { - struct inode *inode = file_inode(file); - -- mutex_lock(&inode->i_mutex); - realfile = od->upperfile; - if (!realfile) { - struct path upperpath; - - ovl_path_upper(dentry, &upperpath); - realfile = ovl_path_open(&upperpath, O_RDONLY); -- if (IS_ERR(realfile)) { -- mutex_unlock(&inode->i_mutex); -- return PTR_ERR(realfile); -+ mutex_lock(&inode->i_mutex); -+ if (!od->upperfile) { -+ if (IS_ERR(realfile)) { -+ mutex_unlock(&inode->i_mutex); -+ return PTR_ERR(realfile); -+ } -+ od->upperfile = realfile; -+ } else { -+ /* somebody has beaten us to it */ -+ if (!IS_ERR(realfile)) -+ fput(realfile); -+ realfile = od->upperfile; - } -- od->upperfile = realfile; -+ mutex_unlock(&inode->i_mutex); - } -- mutex_unlock(&inode->i_mutex); - } - - return vfs_fsync_range(realfile, start, end, datasync); --- -2.7.4 - - -From c0913e8f810834e7f4645050bfcb5e1c9f06fd53 Mon Sep 17 00:00:00 2001 -From: Al Viro -Date: Thu, 23 Oct 2014 22:58:56 -0400 -Subject: [PATCH 25/73] overlayfs: make ovl_cache_entry->name an array instead - of pointer - -Signed-off-by: Al Viro -(cherry picked from commit 68bf8611076a8e4bee8bc8d03ff28bd1e9a9c631) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 11 +++++------ - 1 file changed, 5 insertions(+), 6 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index b7d9fb0..9c9872b 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -18,13 +18,13 @@ - #include "overlayfs.h" - - struct ovl_cache_entry { -- const char *name; - unsigned int len; - unsigned int type; - u64 ino; - bool is_whiteout; - struct list_head l_node; - struct rb_node node; -+ char name[]; - }; - - struct ovl_dir_cache { -@@ -82,13 +82,12 @@ static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, - u64 ino, unsigned int d_type) - { - struct ovl_cache_entry *p; -+ size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); - -- p = kmalloc(sizeof(*p) + len + 1, GFP_KERNEL); -+ p = kmalloc(size, GFP_KERNEL); - if (p) { -- char *name_copy = (char *) (p + 1); -- memcpy(name_copy, name, len); -- name_copy[len] = '\0'; -- p->name = name_copy; -+ memcpy(p->name, name, len); -+ p->name[len] = '\0'; - p->len = len; - p->type = d_type; - p->ino = ino; --- -2.7.4 - - -From 90887f5c1bb98d0d51933e4d2238b5cc888b56cf Mon Sep 17 00:00:00 2001 -From: Al Viro -Date: Thu, 23 Oct 2014 23:00:53 -0400 -Subject: [PATCH 26/73] overlayfs: embed root into overlay_readdir_data - -no sense having it a pointer - all instances have it pointing to -local variable in the same stack frame - -Signed-off-by: Al Viro -(cherry picked from commit 49be4fb9cc3431fc4ebc71c764db848483b2a16c) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 11 +++++------ - 1 file changed, 5 insertions(+), 6 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 9c9872b..a9ee2c1 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -36,7 +36,7 @@ struct ovl_dir_cache { - struct ovl_readdir_data { - struct dir_context ctx; - bool is_merge; -- struct rb_root *root; -+ struct rb_root root; - struct list_head *list; - struct list_head *middle; - int count; -@@ -101,7 +101,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, - const char *name, int len, u64 ino, - unsigned int d_type) - { -- struct rb_node **newp = &rdd->root->rb_node; -+ struct rb_node **newp = &rdd->root.rb_node; - struct rb_node *parent = NULL; - struct ovl_cache_entry *p; - -@@ -126,7 +126,7 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, - - list_add_tail(&p->l_node, rdd->list); - rb_link_node(&p->node, parent, newp); -- rb_insert_color(&p->node, rdd->root); -+ rb_insert_color(&p->node, &rdd->root); - - return 0; - } -@@ -137,7 +137,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, - { - struct ovl_cache_entry *p; - -- p = ovl_cache_entry_find(rdd->root, name, namelen); -+ p = ovl_cache_entry_find(&rdd->root, name, namelen); - if (p) { - list_move_tail(&p->l_node, rdd->middle); - } else { -@@ -277,12 +277,11 @@ static inline int ovl_dir_read_merged(struct path *upperpath, - struct list_head *list) - { - int err; -- struct rb_root root = RB_ROOT; - struct list_head middle; - struct ovl_readdir_data rdd = { - .ctx.actor = ovl_fill_merge, - .list = list, -- .root = &root, -+ .root = RB_ROOT, - .is_merge = false, - }; - --- -2.7.4 - - -From 258e15586ba135f8534ab416c647d2f195e12df1 Mon Sep 17 00:00:00 2001 -From: Al Viro -Date: Thu, 23 Oct 2014 23:03:03 -0400 -Subject: [PATCH 27/73] overlayfs: embed middle into overlay_readdir_data - -same story... - -Signed-off-by: Al Viro -(cherry picked from commit db6ec212b53abc29a5bb6ac8c810010fc28d5191) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 12 +++++------- - 1 file changed, 5 insertions(+), 7 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index a9ee2c1..910553f 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -38,7 +38,7 @@ struct ovl_readdir_data { - bool is_merge; - struct rb_root root; - struct list_head *list; -- struct list_head *middle; -+ struct list_head middle; - int count; - int err; - }; -@@ -139,13 +139,13 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, - - p = ovl_cache_entry_find(&rdd->root, name, namelen); - if (p) { -- list_move_tail(&p->l_node, rdd->middle); -+ list_move_tail(&p->l_node, &rdd->middle); - } else { - p = ovl_cache_entry_new(name, namelen, ino, d_type); - if (p == NULL) - rdd->err = -ENOMEM; - else -- list_add_tail(&p->l_node, rdd->middle); -+ list_add_tail(&p->l_node, &rdd->middle); - } - - return rdd->err; -@@ -277,7 +277,6 @@ static inline int ovl_dir_read_merged(struct path *upperpath, - struct list_head *list) - { - int err; -- struct list_head middle; - struct ovl_readdir_data rdd = { - .ctx.actor = ovl_fill_merge, - .list = list, -@@ -301,11 +300,10 @@ static inline int ovl_dir_read_merged(struct path *upperpath, - * Insert lowerpath entries before upperpath ones, this allows - * offsets to be reasonably constant - */ -- list_add(&middle, rdd.list); -- rdd.middle = &middle; -+ list_add(&rdd.middle, rdd.list); - rdd.is_merge = true; - err = ovl_dir_read(lowerpath, &rdd); -- list_del(&middle); -+ list_del(&rdd.middle); - } - out: - return err; --- -2.7.4 - - -From 313adf61d46e2fc1092a03540f33b1f1345b8c44 Mon Sep 17 00:00:00 2001 -From: Al Viro -Date: Tue, 28 Oct 2014 18:27:28 -0400 -Subject: [PATCH 28/73] overlayfs: barriers for opening upper-layer directory - -make sure that - a) all stores done by opening struct file don't leak past storing -the reference in od->upperfile - b) the lockless side has read dependency barrier - -Signed-off-by: Al Viro -(cherry picked from commit d45f00ae43e63eff1b3d79df20610ae1ef645ebd) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 910553f..8c8ce9d 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -454,12 +454,13 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, - if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { - struct inode *inode = file_inode(file); - -- realfile = od->upperfile; -+ realfile =lockless_dereference(od->upperfile); - if (!realfile) { - struct path upperpath; - - ovl_path_upper(dentry, &upperpath); - realfile = ovl_path_open(&upperpath, O_RDONLY); -+ smp_mb__before_spinlock(); - mutex_lock(&inode->i_mutex); - if (!od->upperfile) { - if (IS_ERR(realfile)) { --- -2.7.4 - - -From 2b3b22f12809d5406745005ea0c20c4945dfe789 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Mon, 27 Oct 2014 13:48:48 +0100 -Subject: [PATCH 29/73] ovl: fix check for cursor - -ovl_cache_entry.name is now an array not a pointer, so it makes no sense -test for it being NULL. - -Detected by coverity. - -From: Miklos Szeredi -Fixes: 68bf8611076a ("overlayfs: make ovl_cache_entry->name an array instead of -+pointer") -Signed-off-by: Miklos Szeredi -Signed-off-by: Al Viro - -(cherry picked from commit c2096537d40f026672c4c6adfcd7247ce5799604) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 8c8ce9d..3fbf0d3 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -21,9 +21,10 @@ struct ovl_cache_entry { - unsigned int len; - unsigned int type; - u64 ino; -- bool is_whiteout; - struct list_head l_node; - struct rb_node node; -+ bool is_whiteout; -+ bool is_cursor; - char name[]; - }; - -@@ -251,7 +252,7 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, - - mutex_lock(&dir->d_inode->i_mutex); - list_for_each_entry(p, rdd->list, l_node) { -- if (!p->name) -+ if (p->is_cursor) - continue; - - if (p->type != DT_CHR) -@@ -307,7 +308,6 @@ static inline int ovl_dir_read_merged(struct path *upperpath, - } - out: - return err; -- - } - - static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) -@@ -316,7 +316,7 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) - loff_t off = 0; - - list_for_each_entry(p, &od->cache->entries, l_node) { -- if (!p->name) -+ if (p->is_cursor) - continue; - if (off >= pos) - break; -@@ -389,7 +389,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) - - p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); - /* Skip cursors */ -- if (p->name) { -+ if (!p->is_cursor) { - if (!p->is_whiteout) { - if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) - break; -@@ -519,6 +519,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file) - od->realfile = realfile; - od->is_real = (type != OVL_PATH_MERGE); - od->is_upper = (type != OVL_PATH_LOWER); -+ od->cursor.is_cursor = true; - file->private_data = od; - - return 0; --- -2.7.4 - - -From 9ba0656673b59bcae46ce2a7849005a07d494fac Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Mon, 27 Oct 2014 15:42:01 +0100 -Subject: [PATCH 30/73] overlayfs: fix lockdep misannotation - -In an overlay directory that shadows an empty lower directory, say -/mnt/a/empty102, do: - - touch /mnt/a/empty102/x - unlink /mnt/a/empty102/x - rmdir /mnt/a/empty102 - -It's actually harmless, but needs another level of nesting between -I_MUTEX_CHILD and I_MUTEX_NORMAL. - -Signed-off-by: Miklos Szeredi -Tested-by: David Howells -Signed-off-by: Al Viro -(cherry picked from commit d1b72cc6d8cb766c802fdc70a5edc2f0ba8a2b57) -Signed-off-by: Alex Shi ---- - fs/namei.c | 2 +- - fs/overlayfs/readdir.c | 2 +- - include/linux/fs.h | 9 ++++++--- - 3 files changed, 8 insertions(+), 5 deletions(-) - -diff --git a/fs/namei.c b/fs/namei.c -index a047265e..0fbf150 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -2473,7 +2473,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) - } - - mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); -- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); -+ mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2); - return NULL; - } - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 3fbf0d3..401f084 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -571,7 +571,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) - { - struct ovl_cache_entry *p; - -- mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT); -+ mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD); - list_for_each_entry(p, list, l_node) { - struct dentry *dentry; - -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 429af8d..1ec29cc 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -641,11 +641,13 @@ static inline int inode_unhashed(struct inode *inode) - * 2: child/target - * 3: xattr - * 4: second non-directory -- * The last is for certain operations (such as rename) which lock two -+ * 5: second parent (when locking independent directories in rename) -+ * -+ * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two - * non-directories at once. - * - * The locking order between these classes is -- * parent -> child -> normal -> xattr -> second non-directory -+ * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory - */ - enum inode_i_mutex_lock_class - { -@@ -653,7 +655,8 @@ enum inode_i_mutex_lock_class - I_MUTEX_PARENT, - I_MUTEX_CHILD, - I_MUTEX_XATTR, -- I_MUTEX_NONDIR2 -+ I_MUTEX_NONDIR2, -+ I_MUTEX_PARENT2, - }; - - void lock_two_nondirectories(struct inode *, struct inode*); --- -2.7.4 - - -From a996f76df429a061985a5e7b1904c6574315a818 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Fri, 31 Oct 2014 20:02:42 +0100 -Subject: [PATCH 31/73] ovl: initialize ->is_cursor - -Signed-off-by: Miklos Szeredi -Signed-off-by: Al Viro -(cherry picked from commit 9f2f7d4c8dfcf4617af5de6ea381b91deac3db48) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 401f084..4e9d7c1 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -93,6 +93,7 @@ static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, - p->type = d_type; - p->ino = ino; - p->is_whiteout = false; -+ p->is_cursor = false; - } - - return p; --- -2.7.4 - - -From 30d1df13eef6be298b5f783a2b36b82cf1c0f033 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 30 Oct 2014 17:37:34 +0100 -Subject: [PATCH 32/73] vfs: make first argument of dir_context.actor typed - -Signed-off-by: Miklos Szeredi -Signed-off-by: Al Viro -(cherry picked from commit ac7576f4b1da8c9c6bc1ae026c2b9e86ae617ba5) -Signed-off-by: Alex Shi ---- - arch/alpha/kernel/osf_sys.c | 7 ++++--- - arch/parisc/hpux/fs.c | 7 ++++--- - drivers/staging/lustre/lustre/llite/llite_nfs.c | 8 +++++--- - fs/afs/dir.c | 9 +++++---- - fs/compat.c | 21 +++++++++++++-------- - fs/ecryptfs/file.c | 6 +++--- - fs/exportfs/expfs.c | 5 +++-- - fs/fat/dir.c | 5 +++-- - fs/gfs2/export.c | 8 +++++--- - fs/hppfs/hppfs.c | 5 +++-- - fs/nfsd/nfs4recover.c | 5 +++-- - fs/nfsd/vfs.c | 12 +++++++----- - fs/nfsd/vfs.h | 4 ++-- - fs/ocfs2/dir.c | 8 +++++--- - fs/ocfs2/journal.c | 8 +++++--- - fs/overlayfs/readdir.c | 8 +++++--- - fs/readdir.c | 21 ++++++++++++--------- - fs/reiserfs/xattr.c | 15 +++++++++------ - include/linux/fs.h | 5 ++++- - 19 files changed, 100 insertions(+), 67 deletions(-) - -diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c -index 1402fcc..98f57d3 100644 ---- a/arch/alpha/kernel/osf_sys.c -+++ b/arch/alpha/kernel/osf_sys.c -@@ -104,11 +104,12 @@ struct osf_dirent_callback { - }; - - static int --osf_filldir(void *__buf, const char *name, int namlen, loff_t offset, -- u64 ino, unsigned int d_type) -+osf_filldir(struct dir_context *ctx, const char *name, int namlen, -+ loff_t offset, u64 ino, unsigned int d_type) - { - struct osf_dirent __user *dirent; -- struct osf_dirent_callback *buf = (struct osf_dirent_callback *) __buf; -+ struct osf_dirent_callback *buf = -+ container_of(ctx, struct osf_dirent_callback, ctx); - unsigned int reclen = ALIGN(NAME_OFFSET + namlen + 1, sizeof(u32)); - unsigned int d_ino; - -diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c -index 2bedafe..97a7bf8 100644 ---- a/arch/parisc/hpux/fs.c -+++ b/arch/parisc/hpux/fs.c -@@ -56,11 +56,12 @@ struct getdents_callback { - - #define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) - --static int filldir(void * __buf, const char * name, int namlen, loff_t offset, -- u64 ino, unsigned d_type) -+static int filldir(struct dir_context *ctx, const char *name, int namlen, -+ loff_t offset, u64 ino, unsigned d_type) - { - struct hpux_dirent __user * dirent; -- struct getdents_callback * buf = (struct getdents_callback *) __buf; -+ struct getdents_callback *buf = -+ container_of(ctx, struct getdents_callback, ctx); - ino_t d_ino; - int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(long)); - -diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c -index 1767c74..ed35a88 100644 ---- a/drivers/staging/lustre/lustre/llite/llite_nfs.c -+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c -@@ -205,13 +205,15 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen, - return LUSTRE_NFS_FID; - } - --static int ll_nfs_get_name_filldir(void *cookie, const char *name, int namelen, -- loff_t hash, u64 ino, unsigned type) -+static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name, -+ int namelen, loff_t hash, u64 ino, -+ unsigned type) - { - /* It is hack to access lde_fid for comparison with lgd_fid. - * So the input 'name' must be part of the 'lu_dirent'. */ - struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name); -- struct ll_getname_data *lgd = cookie; -+ struct ll_getname_data *lgd = -+ container_of(ctx, struct ll_getname_data, ctx); - struct lu_fid fid; - - fid_le_to_cpu(&fid, &lde->lde_fid); -diff --git a/fs/afs/dir.c b/fs/afs/dir.c -index 5293003..5479af6 100644 ---- a/fs/afs/dir.c -+++ b/fs/afs/dir.c -@@ -26,7 +26,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx); - static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); - static int afs_d_delete(const struct dentry *dentry); - static void afs_d_release(struct dentry *dentry); --static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, -+static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, - loff_t fpos, u64 ino, unsigned dtype); - static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl); -@@ -391,10 +391,11 @@ static int afs_readdir(struct file *file, struct dir_context *ctx) - * - if afs_dir_iterate_block() spots this function, it'll pass the FID - * uniquifier through dtype - */ --static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, -- loff_t fpos, u64 ino, unsigned dtype) -+static int afs_lookup_filldir(struct dir_context *ctx, const char *name, -+ int nlen, loff_t fpos, u64 ino, unsigned dtype) - { -- struct afs_lookup_cookie *cookie = _cookie; -+ struct afs_lookup_cookie *cookie = -+ container_of(ctx, struct afs_lookup_cookie, ctx); - - _enter("{%s,%u},%s,%u,,%llu,%u", - cookie->name.name, cookie->name.len, name, nlen, -diff --git a/fs/compat.c b/fs/compat.c -index 6af20de..14da9b3 100644 ---- a/fs/compat.c -+++ b/fs/compat.c -@@ -837,10 +837,12 @@ struct compat_readdir_callback { - int result; - }; - --static int compat_fillonedir(void *__buf, const char *name, int namlen, -- loff_t offset, u64 ino, unsigned int d_type) -+static int compat_fillonedir(struct dir_context *ctx, const char *name, -+ int namlen, loff_t offset, u64 ino, -+ unsigned int d_type) - { -- struct compat_readdir_callback *buf = __buf; -+ struct compat_readdir_callback *buf = -+ container_of(ctx, struct compat_readdir_callback, ctx); - struct compat_old_linux_dirent __user *dirent; - compat_ulong_t d_ino; - -@@ -905,11 +907,12 @@ struct compat_getdents_callback { - int error; - }; - --static int compat_filldir(void *__buf, const char *name, int namlen, -+static int compat_filldir(struct dir_context *ctx, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) - { - struct compat_linux_dirent __user * dirent; -- struct compat_getdents_callback *buf = __buf; -+ struct compat_getdents_callback *buf = -+ container_of(ctx, struct compat_getdents_callback, ctx); - compat_ulong_t d_ino; - int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + - namlen + 2, sizeof(compat_long_t)); -@@ -991,11 +994,13 @@ struct compat_getdents_callback64 { - int error; - }; - --static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, -- u64 ino, unsigned int d_type) -+static int compat_filldir64(struct dir_context *ctx, const char *name, -+ int namlen, loff_t offset, u64 ino, -+ unsigned int d_type) - { - struct linux_dirent64 __user *dirent; -- struct compat_getdents_callback64 *buf = __buf; -+ struct compat_getdents_callback64 *buf = -+ container_of(ctx, struct compat_getdents_callback64, ctx); - int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, - sizeof(u64)); - u64 off; -diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c -index 03df502..121a948 100644 ---- a/fs/ecryptfs/file.c -+++ b/fs/ecryptfs/file.c -@@ -76,11 +76,11 @@ struct ecryptfs_getdents_callback { - - /* Inspired by generic filldir in fs/readdir.c */ - static int --ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, -- loff_t offset, u64 ino, unsigned int d_type) -+ecryptfs_filldir(struct dir_context *ctx, const char *lower_name, -+ int lower_namelen, loff_t offset, u64 ino, unsigned int d_type) - { - struct ecryptfs_getdents_callback *buf = -- (struct ecryptfs_getdents_callback *)dirent; -+ container_of(ctx, struct ecryptfs_getdents_callback, ctx); - size_t name_size; - char *name; - int rc; -diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c -index 48a359d..59d339c 100644 ---- a/fs/exportfs/expfs.c -+++ b/fs/exportfs/expfs.c -@@ -241,10 +241,11 @@ struct getdents_callback { - * A rather strange filldir function to capture - * the name matching the specified inode number. - */ --static int filldir_one(void * __buf, const char * name, int len, -+static int filldir_one(struct dir_context *ctx, const char *name, int len, - loff_t pos, u64 ino, unsigned int d_type) - { -- struct getdents_callback *buf = __buf; -+ struct getdents_callback *buf = -+ container_of(ctx, struct getdents_callback, ctx); - int result = 0; - - buf->sequence++; -diff --git a/fs/fat/dir.c b/fs/fat/dir.c -index 3963ede..c5d6bb9 100644 ---- a/fs/fat/dir.c -+++ b/fs/fat/dir.c -@@ -702,10 +702,11 @@ static int fat_readdir(struct file *file, struct dir_context *ctx) - } - - #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ --static int func(void *__buf, const char *name, int name_len, \ -+static int func(struct dir_context *ctx, const char *name, int name_len, \ - loff_t offset, u64 ino, unsigned int d_type) \ - { \ -- struct fat_ioctl_filldir_callback *buf = __buf; \ -+ struct fat_ioctl_filldir_callback *buf = \ -+ container_of(ctx, struct fat_ioctl_filldir_callback, ctx); \ - struct dirent_type __user *d1 = buf->dirent; \ - struct dirent_type __user *d2 = d1 + 1; \ - \ -diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c -index 8b9b377..c41d255 100644 ---- a/fs/gfs2/export.c -+++ b/fs/gfs2/export.c -@@ -69,10 +69,12 @@ struct get_name_filldir { - char *name; - }; - --static int get_name_filldir(void *opaque, const char *name, int length, -- loff_t offset, u64 inum, unsigned int type) -+static int get_name_filldir(struct dir_context *ctx, const char *name, -+ int length, loff_t offset, u64 inum, -+ unsigned int type) - { -- struct get_name_filldir *gnfd = opaque; -+ struct get_name_filldir *gnfd = -+ container_of(ctx, struct get_name_filldir, ctx); - - if (inum != gnfd->inum.no_addr) - return 0; -diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c -index 4338ff3..5f27551 100644 ---- a/fs/hppfs/hppfs.c -+++ b/fs/hppfs/hppfs.c -@@ -548,10 +548,11 @@ struct hppfs_dirent { - struct dentry *dentry; - }; - --static int hppfs_filldir(void *d, const char *name, int size, -+static int hppfs_filldir(struct dir_context *ctx, const char *name, int size, - loff_t offset, u64 inode, unsigned int type) - { -- struct hppfs_dirent *dirent = d; -+ struct hppfs_dirent *dirent = -+ container_of(ctx, struct hppfs_dirent, ctx); - - if (file_removed(dirent->dentry, name)) - return 0; -diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c -index 9c271f4..674a5d5 100644 ---- a/fs/nfsd/nfs4recover.c -+++ b/fs/nfsd/nfs4recover.c -@@ -244,10 +244,11 @@ struct nfs4_dir_ctx { - }; - - static int --nfsd4_build_namelist(void *arg, const char *name, int namlen, -+nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) - { -- struct nfs4_dir_ctx *ctx = arg; -+ struct nfs4_dir_ctx *ctx = -+ container_of(__ctx, struct nfs4_dir_ctx, ctx); - struct name_list *entry; - - if (namlen != HEXDIR_LEN - 1) -diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c -index 464f813..3953a20 100644 ---- a/fs/nfsd/vfs.c -+++ b/fs/nfsd/vfs.c -@@ -1808,10 +1808,12 @@ struct readdir_data { - int full; - }; - --static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, -- loff_t offset, u64 ino, unsigned int d_type) -+static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, -+ int namlen, loff_t offset, u64 ino, -+ unsigned int d_type) - { -- struct readdir_data *buf = __buf; -+ struct readdir_data *buf = -+ container_of(ctx, struct readdir_data, ctx); - struct buffered_dirent *de = (void *)(buf->dirent + buf->used); - unsigned int reclen; - -@@ -1831,7 +1833,7 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, - return 0; - } - --static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, -+static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, - struct readdir_cd *cdp, loff_t *offsetp) - { - struct buffered_dirent *de; -@@ -1915,7 +1917,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, - */ - __be32 - nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, -- struct readdir_cd *cdp, filldir_t func) -+ struct readdir_cd *cdp, nfsd_filldir_t func) - { - __be32 err; - struct file *file; -diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h -index fbe90bd..ea760b1 100644 ---- a/fs/nfsd/vfs.h -+++ b/fs/nfsd/vfs.h -@@ -36,7 +36,7 @@ - /* - * Callback function for readdir - */ --typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); -+typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); - - /* nfsd/vfs.c */ - int nfsd_racache_init(int); -@@ -89,7 +89,7 @@ __be32 nfsd_rename(struct svc_rqst *, - __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, - char *name, int len); - __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, -- loff_t *, struct readdir_cd *, filldir_t); -+ loff_t *, struct readdir_cd *, nfsd_filldir_t); - __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, - struct kstatfs *, int access); - -diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c -index 91a7e85..478e14d 100644 ---- a/fs/ocfs2/dir.c -+++ b/fs/ocfs2/dir.c -@@ -2073,10 +2073,12 @@ struct ocfs2_empty_dir_priv { - unsigned seen_other; - unsigned dx_dir; - }; --static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, -- loff_t pos, u64 ino, unsigned type) -+static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name, -+ int name_len, loff_t pos, u64 ino, -+ unsigned type) - { -- struct ocfs2_empty_dir_priv *p = priv; -+ struct ocfs2_empty_dir_priv *p = -+ container_of(ctx, struct ocfs2_empty_dir_priv, ctx); - - /* - * Check the positions of "." and ".." records to be sure -diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c -index 44fc3e5..a1b7dca 100644 ---- a/fs/ocfs2/journal.c -+++ b/fs/ocfs2/journal.c -@@ -1981,10 +1981,12 @@ struct ocfs2_orphan_filldir_priv { - struct ocfs2_super *osb; - }; - --static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, -- loff_t pos, u64 ino, unsigned type) -+static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name, -+ int name_len, loff_t pos, u64 ino, -+ unsigned type) - { -- struct ocfs2_orphan_filldir_priv *p = priv; -+ struct ocfs2_orphan_filldir_priv *p = -+ container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx); - struct inode *iter; - - if (name_len == 1 && !strncmp(".", name, 1)) -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 4e9d7c1..301f64a 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -180,10 +180,12 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) - } - } - --static int ovl_fill_merge(void *buf, const char *name, int namelen, -- loff_t offset, u64 ino, unsigned int d_type) -+static int ovl_fill_merge(struct dir_context *ctx, const char *name, -+ int namelen, loff_t offset, u64 ino, -+ unsigned int d_type) - { -- struct ovl_readdir_data *rdd = buf; -+ struct ovl_readdir_data *rdd = -+ container_of(ctx, struct ovl_readdir_data, ctx); - - rdd->count++; - if (!rdd->is_merge) -diff --git a/fs/readdir.c b/fs/readdir.c -index 5b53d99..e21af53 100644 ---- a/fs/readdir.c -+++ b/fs/readdir.c -@@ -72,10 +72,11 @@ struct readdir_callback { - int result; - }; - --static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, -- u64 ino, unsigned int d_type) -+static int fillonedir(struct dir_context *ctx, const char *name, int namlen, -+ loff_t offset, u64 ino, unsigned int d_type) - { -- struct readdir_callback *buf = (struct readdir_callback *) __buf; -+ struct readdir_callback *buf = -+ container_of(ctx, struct readdir_callback, ctx); - struct old_linux_dirent __user * dirent; - unsigned long d_ino; - -@@ -146,11 +147,12 @@ struct getdents_callback { - int error; - }; - --static int filldir(void * __buf, const char * name, int namlen, loff_t offset, -- u64 ino, unsigned int d_type) -+static int filldir(struct dir_context *ctx, const char *name, int namlen, -+ loff_t offset, u64 ino, unsigned int d_type) - { - struct linux_dirent __user * dirent; -- struct getdents_callback * buf = (struct getdents_callback *) __buf; -+ struct getdents_callback *buf = -+ container_of(ctx, struct getdents_callback, ctx); - unsigned long d_ino; - int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, - sizeof(long)); -@@ -230,11 +232,12 @@ struct getdents_callback64 { - int error; - }; - --static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, -- u64 ino, unsigned int d_type) -+static int filldir64(struct dir_context *ctx, const char *name, int namlen, -+ loff_t offset, u64 ino, unsigned int d_type) - { - struct linux_dirent64 __user *dirent; -- struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; -+ struct getdents_callback64 *buf = -+ container_of(ctx, struct getdents_callback64, ctx); - int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, - sizeof(u64)); - -diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c -index 5cdfbd6..d8b7acf 100644 ---- a/fs/reiserfs/xattr.c -+++ b/fs/reiserfs/xattr.c -@@ -177,10 +177,11 @@ struct reiserfs_dentry_buf { - }; - - static int --fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, -- u64 ino, unsigned int d_type) -+fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, -+ loff_t offset, u64 ino, unsigned int d_type) - { -- struct reiserfs_dentry_buf *dbuf = buf; -+ struct reiserfs_dentry_buf *dbuf = -+ container_of(ctx, struct reiserfs_dentry_buf, ctx); - struct dentry *dentry; - WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); - -@@ -794,10 +795,12 @@ struct listxattr_buf { - struct dentry *dentry; - }; - --static int listxattr_filler(void *buf, const char *name, int namelen, -- loff_t offset, u64 ino, unsigned int d_type) -+static int listxattr_filler(struct dir_context *ctx, const char *name, -+ int namelen, loff_t offset, u64 ino, -+ unsigned int d_type) - { -- struct listxattr_buf *b = (struct listxattr_buf *)buf; -+ struct listxattr_buf *b = -+ container_of(ctx, struct listxattr_buf, ctx); - size_t size; - if (name[0] != '.' || - (namelen != 1 && (name[1] != '.' || namelen != 2))) { -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 1ec29cc..e1cee8b 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -1530,7 +1530,10 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); - * This allows the kernel to read directories into kernel space or - * to have different dirent layouts depending on the binary type. - */ --typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); -+struct dir_context; -+typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, -+ unsigned); -+ - struct dir_context { - const filldir_t actor; - loff_t pos; --- -2.7.4 - - -From 68a4dee66f6231bb2a5058c58e9e9ebea8149b5c Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Tue, 4 Nov 2014 16:11:03 +0100 -Subject: [PATCH 33/73] ovl: don't poison cursor - -ovl_cache_put() can be called from ovl_dir_reset() if the cache needs to be -rebuilt. We did list_del() on the cursor, which results in an Oops on the -poisoned pointer in ovl_seek_cursor(). - -Reported-by: Jordi Pujol Palomer -Signed-off-by: Miklos Szeredi -Tested-by: Jordi Pujol Palomer -Signed-off-by: Al Viro -(cherry picked from commit 3f822c6264954660babce757fb45792fd3af273e) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 301f64a..fdb63de 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -168,7 +168,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) - { - struct ovl_dir_cache *cache = od->cache; - -- list_del(&od->cursor.l_node); -+ list_del_init(&od->cursor.l_node); - WARN_ON(cache->refcount <= 0); - cache->refcount--; - if (!cache->refcount) { --- -2.7.4 - - -From 5c5ce110a498fc025c7ca85165ba2f5fa1ddef43 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 20 Nov 2014 16:39:59 +0100 -Subject: [PATCH 34/73] ovl: rename filesystem type to "overlay" - -Some distributions carry an "old" format of overlayfs while mainline has a -"new" format. - -The distros will possibly want to keep the old overlayfs alongside the new -for compatibility reasons. - -To make it possible to differentiate the two versions change the name of -the new one from "overlayfs" to "overlay". - -Signed-off-by: Miklos Szeredi -Reported-by: Serge Hallyn -Cc: Andy Whitcroft -(cherry picked from commit ef94b1864d1ed5be54376404bb23d22ed0481feb) -Signed-off-by: Alex Shi ---- - Documentation/filesystems/overlayfs.txt | 198 ++++++++++++++++++++++++++++++++ - MAINTAINERS | 7 ++ - fs/Makefile | 2 +- - fs/overlayfs/Kconfig | 2 +- - fs/overlayfs/Makefile | 4 +- - fs/overlayfs/super.c | 6 +- - 6 files changed, 212 insertions(+), 7 deletions(-) - create mode 100644 Documentation/filesystems/overlayfs.txt - -diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt -new file mode 100644 -index 0000000..a27c950 ---- /dev/null -+++ b/Documentation/filesystems/overlayfs.txt -@@ -0,0 +1,198 @@ -+Written by: Neil Brown -+ -+Overlay Filesystem -+================== -+ -+This document describes a prototype for a new approach to providing -+overlay-filesystem functionality in Linux (sometimes referred to as -+union-filesystems). An overlay-filesystem tries to present a -+filesystem which is the result over overlaying one filesystem on top -+of the other. -+ -+The result will inevitably fail to look exactly like a normal -+filesystem for various technical reasons. The expectation is that -+many use cases will be able to ignore these differences. -+ -+This approach is 'hybrid' because the objects that appear in the -+filesystem do not all appear to belong to that filesystem. In many -+cases an object accessed in the union will be indistinguishable -+from accessing the corresponding object from the original filesystem. -+This is most obvious from the 'st_dev' field returned by stat(2). -+ -+While directories will report an st_dev from the overlay-filesystem, -+all non-directory objects will report an st_dev from the lower or -+upper filesystem that is providing the object. Similarly st_ino will -+only be unique when combined with st_dev, and both of these can change -+over the lifetime of a non-directory object. Many applications and -+tools ignore these values and will not be affected. -+ -+Upper and Lower -+--------------- -+ -+An overlay filesystem combines two filesystems - an 'upper' filesystem -+and a 'lower' filesystem. When a name exists in both filesystems, the -+object in the 'upper' filesystem is visible while the object in the -+'lower' filesystem is either hidden or, in the case of directories, -+merged with the 'upper' object. -+ -+It would be more correct to refer to an upper and lower 'directory -+tree' rather than 'filesystem' as it is quite possible for both -+directory trees to be in the same filesystem and there is no -+requirement that the root of a filesystem be given for either upper or -+lower. -+ -+The lower filesystem can be any filesystem supported by Linux and does -+not need to be writable. The lower filesystem can even be another -+overlayfs. The upper filesystem will normally be writable and if it -+is it must support the creation of trusted.* extended attributes, and -+must provide valid d_type in readdir responses, so NFS is not suitable. -+ -+A read-only overlay of two read-only filesystems may use any -+filesystem type. -+ -+Directories -+----------- -+ -+Overlaying mainly involves directories. If a given name appears in both -+upper and lower filesystems and refers to a non-directory in either, -+then the lower object is hidden - the name refers only to the upper -+object. -+ -+Where both upper and lower objects are directories, a merged directory -+is formed. -+ -+At mount time, the two directories given as mount options "lowerdir" and -+"upperdir" are combined into a merged directory: -+ -+ mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\ -+workdir=/work /merged -+ -+The "workdir" needs to be an empty directory on the same filesystem -+as upperdir. -+ -+Then whenever a lookup is requested in such a merged directory, the -+lookup is performed in each actual directory and the combined result -+is cached in the dentry belonging to the overlay filesystem. If both -+actual lookups find directories, both are stored and a merged -+directory is created, otherwise only one is stored: the upper if it -+exists, else the lower. -+ -+Only the lists of names from directories are merged. Other content -+such as metadata and extended attributes are reported for the upper -+directory only. These attributes of the lower directory are hidden. -+ -+whiteouts and opaque directories -+-------------------------------- -+ -+In order to support rm and rmdir without changing the lower -+filesystem, an overlay filesystem needs to record in the upper filesystem -+that files have been removed. This is done using whiteouts and opaque -+directories (non-directories are always opaque). -+ -+A whiteout is created as a character device with 0/0 device number. -+When a whiteout is found in the upper level of a merged directory, any -+matching name in the lower level is ignored, and the whiteout itself -+is also hidden. -+ -+A directory is made opaque by setting the xattr "trusted.overlay.opaque" -+to "y". Where the upper filesystem contains an opaque directory, any -+directory in the lower filesystem with the same name is ignored. -+ -+readdir -+------- -+ -+When a 'readdir' request is made on a merged directory, the upper and -+lower directories are each read and the name lists merged in the -+obvious way (upper is read first, then lower - entries that already -+exist are not re-added). This merged name list is cached in the -+'struct file' and so remains as long as the file is kept open. If the -+directory is opened and read by two processes at the same time, they -+will each have separate caches. A seekdir to the start of the -+directory (offset 0) followed by a readdir will cause the cache to be -+discarded and rebuilt. -+ -+This means that changes to the merged directory do not appear while a -+directory is being read. This is unlikely to be noticed by many -+programs. -+ -+seek offsets are assigned sequentially when the directories are read. -+Thus if -+ - read part of a directory -+ - remember an offset, and close the directory -+ - re-open the directory some time later -+ - seek to the remembered offset -+ -+there may be little correlation between the old and new locations in -+the list of filenames, particularly if anything has changed in the -+directory. -+ -+Readdir on directories that are not merged is simply handled by the -+underlying directory (upper or lower). -+ -+ -+Non-directories -+--------------- -+ -+Objects that are not directories (files, symlinks, device-special -+files etc.) are presented either from the upper or lower filesystem as -+appropriate. When a file in the lower filesystem is accessed in a way -+the requires write-access, such as opening for write access, changing -+some metadata etc., the file is first copied from the lower filesystem -+to the upper filesystem (copy_up). Note that creating a hard-link -+also requires copy_up, though of course creation of a symlink does -+not. -+ -+The copy_up may turn out to be unnecessary, for example if the file is -+opened for read-write but the data is not modified. -+ -+The copy_up process first makes sure that the containing directory -+exists in the upper filesystem - creating it and any parents as -+necessary. It then creates the object with the same metadata (owner, -+mode, mtime, symlink-target etc.) and then if the object is a file, the -+data is copied from the lower to the upper filesystem. Finally any -+extended attributes are copied up. -+ -+Once the copy_up is complete, the overlay filesystem simply -+provides direct access to the newly created file in the upper -+filesystem - future operations on the file are barely noticed by the -+overlay filesystem (though an operation on the name of the file such as -+rename or unlink will of course be noticed and handled). -+ -+ -+Non-standard behavior -+--------------------- -+ -+The copy_up operation essentially creates a new, identical file and -+moves it over to the old name. The new file may be on a different -+filesystem, so both st_dev and st_ino of the file may change. -+ -+Any open files referring to this inode will access the old data and -+metadata. Similarly any file locks obtained before copy_up will not -+apply to the copied up file. -+ -+On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and -+fsetxattr(2) will fail with EROFS. -+ -+If a file with multiple hard links is copied up, then this will -+"break" the link. Changes will not be propagated to other names -+referring to the same inode. -+ -+Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory -+object in overlayfs will not contain valid absolute paths, only -+relative paths leading up to the filesystem's root. This will be -+fixed in the future. -+ -+Some operations are not atomic, for example a crash during copy_up or -+rename will leave the filesystem in an inconsistent state. This will -+be addressed in the future. -+ -+Changes to underlying filesystems -+--------------------------------- -+ -+Offline changes, when the overlay is not mounted, are allowed to either -+the upper or the lower trees. -+ -+Changes to the underlying filesystems while part of a mounted overlay -+filesystem are not allowed. If the underlying filesystem is changed, -+the behavior of the overlay is undefined, though it will not result in -+a crash or deadlock. -diff --git a/MAINTAINERS b/MAINTAINERS -index 900d98e..976dae0 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -6452,6 +6452,13 @@ F: drivers/scsi/osd/ - F: include/scsi/osd_* - F: fs/exofs/ - -+OVERLAY FILESYSTEM -+M: Miklos Szeredi -+L: linux-fsdevel@vger.kernel.org -+S: Supported -+F: fs/overlayfs/* -+F: Documentation/filesystems/overlayfs.txt -+ - P54 WIRELESS DRIVER - M: Christian Lamparter - L: linux-wireless@vger.kernel.org -diff --git a/fs/Makefile b/fs/Makefile -index 5eb9caf..b595440 100644 ---- a/fs/Makefile -+++ b/fs/Makefile -@@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ - obj-$(CONFIG_AUTOFS4_FS) += autofs4/ - obj-$(CONFIG_ADFS_FS) += adfs/ - obj-$(CONFIG_FUSE_FS) += fuse/ --obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ -+obj-$(CONFIG_OVERLAY_FS) += overlayfs/ - obj-$(CONFIG_UDF_FS) += udf/ - obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ - obj-$(CONFIG_OMFS_FS) += omfs/ -diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig -index e601259..3435581 100644 ---- a/fs/overlayfs/Kconfig -+++ b/fs/overlayfs/Kconfig -@@ -1,4 +1,4 @@ --config OVERLAYFS_FS -+config OVERLAY_FS - tristate "Overlay filesystem support" - help - An overlay filesystem combines two filesystems - an 'upper' filesystem -diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile -index 8f91889..900daed 100644 ---- a/fs/overlayfs/Makefile -+++ b/fs/overlayfs/Makefile -@@ -2,6 +2,6 @@ - # Makefile for the overlay filesystem. - # - --obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o -+obj-$(CONFIG_OVERLAY_FS) += overlay.o - --overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o -+overlay-objs := super.o inode.o dir.o readdir.o copy_up.o -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 08b704c..b92bd18 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi "); - MODULE_DESCRIPTION("Overlay filesystem"); - MODULE_LICENSE("GPL"); - --#define OVERLAYFS_SUPER_MAGIC 0x794c764f -+#define OVERLAYFS_SUPER_MAGIC 0x794c7630 - - struct ovl_config { - char *lowerdir; -@@ -776,11 +776,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, - - static struct file_system_type ovl_fs_type = { - .owner = THIS_MODULE, -- .name = "overlayfs", -+ .name = "overlay", - .mount = ovl_mount, - .kill_sb = kill_anon_super, - }; --MODULE_ALIAS_FS("overlayfs"); -+MODULE_ALIAS_FS("overlay"); - - static int __init ovl_init(void) - { --- -2.7.4 - - -From 67b24788bb53d0b36bad84205ab940718003bffc Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 20 Nov 2014 16:39:59 +0100 -Subject: [PATCH 35/73] ovl: fix remove/copy-up race - -ovl_remove_and_whiteout() needs to check if upper dentry exists or not -after having locked upper parent directory. - -Previously we used a "type" value computed before locking the upper parent -directory, which is susceptible to racing with copy-up. - -There's a similar check in ovl_check_empty_and_clear(). This one is not -actually racy, since copy-up doesn't change the "emptyness" property of a -directory. Add a comment to this effect, and check the existence of upper -dentry locally to make the code cleaner. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit a105d685a8483985a01776411de191a726b48132) -Signed-off-by: Alex Shi ---- - fs/overlayfs/dir.c | 31 +++++++++++++++++++------------ - 1 file changed, 19 insertions(+), 12 deletions(-) - -diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c -index 15cd91a..8ffc4b9 100644 ---- a/fs/overlayfs/dir.c -+++ b/fs/overlayfs/dir.c -@@ -284,8 +284,7 @@ out: - return ERR_PTR(err); - } - --static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, -- enum ovl_path_type type) -+static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) - { - int err; - struct dentry *ret = NULL; -@@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, - err = ovl_check_empty_dir(dentry, &list); - if (err) - ret = ERR_PTR(err); -- else if (type == OVL_PATH_MERGE) -- ret = ovl_clear_empty(dentry, &list); -+ else { -+ /* -+ * If no upperdentry then skip clearing whiteouts. -+ * -+ * Can race with copy-up, since we don't hold the upperdir -+ * mutex. Doesn't matter, since copy-up can't create a -+ * non-empty directory from an empty one. -+ */ -+ if (ovl_dentry_upper(dentry)) -+ ret = ovl_clear_empty(dentry, &list); -+ } - - ovl_cache_free(&list); - -@@ -487,8 +495,7 @@ out: - return err; - } - --static int ovl_remove_and_whiteout(struct dentry *dentry, -- enum ovl_path_type type, bool is_dir) -+static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) - { - struct dentry *workdir = ovl_workdir(dentry); - struct inode *wdir = workdir->d_inode; -@@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, - int err; - - if (is_dir) { -- opaquedir = ovl_check_empty_and_clear(dentry, type); -+ opaquedir = ovl_check_empty_and_clear(dentry); - err = PTR_ERR(opaquedir); - if (IS_ERR(opaquedir)) - goto out; -@@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, - if (IS_ERR(whiteout)) - goto out_unlock; - -- if (type == OVL_PATH_LOWER) { -+ upper = ovl_dentry_upper(dentry); -+ if (!upper) { - upper = lookup_one_len(dentry->d_name.name, upperdir, -- dentry->d_name.len); -+ dentry->d_name.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto kill_whiteout; -@@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, - } else { - int flags = 0; - -- upper = ovl_dentry_upper(dentry); - if (opaquedir) - upper = opaquedir; - err = -ESTALE; -@@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) - cap_raise(override_cred->cap_effective, CAP_CHOWN); - old_cred = override_creds(override_cred); - -- err = ovl_remove_and_whiteout(dentry, type, is_dir); -+ err = ovl_remove_and_whiteout(dentry, is_dir); - - revert_creds(old_cred); - put_cred(override_cred); -@@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, - } - - if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { -- opaquedir = ovl_check_empty_and_clear(new, new_type); -+ opaquedir = ovl_check_empty_and_clear(new); - err = PTR_ERR(opaquedir); - if (IS_ERR(opaquedir)) { - opaquedir = NULL; --- -2.7.4 - - -From ee3679843c9d6dec75e1d260cb6f041aa36ccbed Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 20 Nov 2014 16:40:00 +0100 -Subject: [PATCH 36/73] ovl: fix race in private xattr checks - -Xattr operations can race with copy up. This does not matter as long as -we consistently fiter out "trunsted.overlay.opaque" attribute on upper -directories. - -Previously we checked parent against OVL_PATH_MERGE. This is too general, -and prone to race with copy-up. I.e. we found the parent to be on the -lower layer but ovl_dentry_real() would return the copied-up dentry, -possibly with the "opaque" attribute. - -So instead use ovl_path_real() and decide to filter the attributes based on -the actual type of the dentry we'll use. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 521484639ec19a6f1ed56de6993feb255f5f676c) -Signed-off-by: Alex Shi ---- - fs/overlayfs/inode.c | 27 ++++++++++++++++++--------- - 1 file changed, 18 insertions(+), 9 deletions(-) - -diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c -index af2d18c..07d74b2 100644 ---- a/fs/overlayfs/inode.c -+++ b/fs/overlayfs/inode.c -@@ -235,26 +235,36 @@ out: - return err; - } - -+static bool ovl_need_xattr_filter(struct dentry *dentry, -+ enum ovl_path_type type) -+{ -+ return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); -+} -+ - ssize_t ovl_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) - { -- if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && -- ovl_is_private_xattr(name)) -+ struct path realpath; -+ enum ovl_path_type type = ovl_path_real(dentry, &realpath); -+ -+ if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) - return -ENODATA; - -- return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); -+ return vfs_getxattr(realpath.dentry, name, value, size); - } - - ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) - { -+ struct path realpath; -+ enum ovl_path_type type = ovl_path_real(dentry, &realpath); - ssize_t res; - int off; - -- res = vfs_listxattr(ovl_dentry_real(dentry), list, size); -+ res = vfs_listxattr(realpath.dentry, list, size); - if (res <= 0 || size == 0) - return res; - -- if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) -+ if (!ovl_need_xattr_filter(dentry, type)) - return res; - - /* filter out private xattrs */ -@@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name) - { - int err; - struct path realpath; -- enum ovl_path_type type; -+ enum ovl_path_type type = ovl_path_real(dentry, &realpath); - - err = ovl_want_write(dentry); - if (err) - goto out; - -- if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && -- ovl_is_private_xattr(name)) -+ err = -ENODATA; -+ if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) - goto out_drop_write; - -- type = ovl_path_real(dentry, &realpath); - if (type == OVL_PATH_LOWER) { - err = vfs_getxattr(realpath.dentry, name, NULL, 0); - if (err < 0) --- -2.7.4 - - -From 0ca1a2c9b8357ebf224d1714a6a1e3635e8e5862 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 20 Nov 2014 16:40:00 +0100 -Subject: [PATCH 37/73] ovl: allow filenames with comma - -Allow option separator (comma) to be escaped with backslash. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 91c77947133f7aef851b625701e182d3f99d14a9) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 45 insertions(+), 3 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index b92bd18..eee7a62 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -462,11 +462,34 @@ static const match_table_t ovl_tokens = { - {OPT_ERR, NULL} - }; - -+static char *ovl_next_opt(char **s) -+{ -+ char *sbegin = *s; -+ char *p; -+ -+ if (sbegin == NULL) -+ return NULL; -+ -+ for (p = sbegin; *p; p++) { -+ if (*p == '\\') { -+ p++; -+ if (!*p) -+ break; -+ } else if (*p == ',') { -+ *p = '\0'; -+ *s = p + 1; -+ return sbegin; -+ } -+ } -+ *s = NULL; -+ return sbegin; -+} -+ - static int ovl_parse_opt(char *opt, struct ovl_config *config) - { - char *p; - -- while ((p = strsep(&opt, ",")) != NULL) { -+ while ((p = ovl_next_opt(&opt)) != NULL) { - int token; - substring_t args[MAX_OPT_ARGS]; - -@@ -554,15 +577,34 @@ out_dput: - goto out_unlock; - } - -+static void ovl_unescape(char *s) -+{ -+ char *d = s; -+ -+ for (;; s++, d++) { -+ if (*s == '\\') -+ s++; -+ *d = *s; -+ if (!*s) -+ break; -+ } -+} -+ - static int ovl_mount_dir(const char *name, struct path *path) - { - int err; -+ char *tmp = kstrdup(name, GFP_KERNEL); -+ -+ if (!tmp) -+ return -ENOMEM; - -- err = kern_path(name, LOOKUP_FOLLOW, path); -+ ovl_unescape(tmp); -+ err = kern_path(tmp, LOOKUP_FOLLOW, path); - if (err) { -- pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); -+ pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); - err = -EINVAL; - } -+ kfree(tmp); - return err; - } - --- -2.7.4 - - -From a4180dfef4dc3c948d524dfe0c168cb59317df56 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 20 Nov 2014 16:40:01 +0100 -Subject: [PATCH 38/73] ovl: use lockless_dereference() for upperdentry - -Don't open code lockless_dereference() in ovl_upperdentry_dereference(). - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 71d509280f7e92eb60ae6b7c78c20afafff060c7) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index eee7a62..f16d318 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) - - static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) - { -- struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); -- /* -- * Make sure to order reads to upperdentry wrt ovl_dentry_update() -- */ -- smp_read_barrier_depends(); -- return upperdentry; -+ return lockless_dereference(oe->__upperdentry); - } - - void ovl_path_upper(struct dentry *dentry, struct path *path) --- -2.7.4 - - -From d3a45e42dd711ce4e2095f14d77865b78c749190 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 20 Nov 2014 16:40:01 +0100 -Subject: [PATCH 39/73] ovl: pass dentry into ovl_dir_read_merged() - -Pass dentry into ovl_dir_read_merged() insted of upperpath and lowerpath. -This cleans up callers and paves the way for multi-layer directory reads. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit c9f00fdb9ab3999cb2fb582ad82a5db9e70c82f5) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 35 ++++++++++++++--------------------- - 1 file changed, 14 insertions(+), 21 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index fdb63de..6d5d9fd 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -276,11 +276,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, - return 0; - } - --static inline int ovl_dir_read_merged(struct path *upperpath, -- struct path *lowerpath, -- struct list_head *list) -+static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) - { - int err; -+ struct path lowerpath; -+ struct path upperpath; - struct ovl_readdir_data rdd = { - .ctx.actor = ovl_fill_merge, - .list = list, -@@ -288,25 +288,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath, - .is_merge = false, - }; - -- if (upperpath->dentry) { -- err = ovl_dir_read(upperpath, &rdd); -+ ovl_path_lower(dentry, &lowerpath); -+ ovl_path_upper(dentry, &upperpath); -+ -+ if (upperpath.dentry) { -+ err = ovl_dir_read(&upperpath, &rdd); - if (err) - goto out; - -- if (lowerpath->dentry) { -- err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); -+ if (lowerpath.dentry) { -+ err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); - if (err) - goto out; - } - } -- if (lowerpath->dentry) { -+ if (lowerpath.dentry) { - /* - * Insert lowerpath entries before upperpath ones, this allows - * offsets to be reasonably constant - */ - list_add(&rdd.middle, rdd.list); - rdd.is_merge = true; -- err = ovl_dir_read(lowerpath, &rdd); -+ err = ovl_dir_read(&lowerpath, &rdd); - list_del(&rdd.middle); - } - out: -@@ -331,8 +334,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) - static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) - { - int res; -- struct path lowerpath; -- struct path upperpath; - struct ovl_dir_cache *cache; - - cache = ovl_dir_cache(dentry); -@@ -349,10 +350,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) - cache->refcount = 1; - INIT_LIST_HEAD(&cache->entries); - -- ovl_path_lower(dentry, &lowerpath); -- ovl_path_upper(dentry, &upperpath); -- -- res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); -+ res = ovl_dir_read_merged(dentry, &cache->entries); - if (res) { - ovl_cache_free(&cache->entries); - kfree(cache); -@@ -540,14 +538,9 @@ const struct file_operations ovl_dir_operations = { - int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) - { - int err; -- struct path lowerpath; -- struct path upperpath; - struct ovl_cache_entry *p; - -- ovl_path_upper(dentry, &upperpath); -- ovl_path_lower(dentry, &lowerpath); -- -- err = ovl_dir_read_merged(&upperpath, &lowerpath, list); -+ err = ovl_dir_read_merged(dentry, list); - if (err) - return err; - --- -2.7.4 - - -From effb810d716f07d643385ba8206ab981a4b20dd9 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 20 Nov 2014 16:40:02 +0100 -Subject: [PATCH 40/73] ovl: ovl_dir_fsync() cleanup - -Check against !OVL_PATH_LOWER instead of OVL_PATH_MERGE. For a copied up -directory the two are currently equivalent. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 7676895f4736421ebafc48de5078e25ea69e88ee) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 6d5d9fd..c020599 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -452,10 +452,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, - /* - * Need to check if we started out being a lower dir, but got copied up - */ -- if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { -+ if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { - struct inode *inode = file_inode(file); - -- realfile =lockless_dereference(od->upperfile); -+ realfile = lockless_dereference(od->upperfile); - if (!realfile) { - struct path upperpath; - --- -2.7.4 - - -From 8f7458c9c66418c8b9af323ba29c8f023ffe559e Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:42 +0100 -Subject: [PATCH 41/73] ovl: check whiteout while reading directory - -Don't make a separate pass for checking whiteouts, since we can do it while -reading the upper directory. - -This will make it easier to handle multiple layers. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 49c21e1cacd74a8c83407c70ad860c994e606e25) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 77 ++++++++++++++++++-------------------------------- - 1 file changed, 28 insertions(+), 49 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index c020599..b361719 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -40,6 +40,7 @@ struct ovl_readdir_data { - struct rb_root root; - struct list_head *list; - struct list_head middle; -+ struct dentry *dir; - int count; - int err; - }; -@@ -126,6 +127,32 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, - if (p == NULL) - return -ENOMEM; - -+ if (d_type == DT_CHR) { -+ struct dentry *dentry; -+ const struct cred *old_cred; -+ struct cred *override_cred; -+ -+ override_cred = prepare_creds(); -+ if (!override_cred) { -+ kfree(p); -+ return -ENOMEM; -+ } -+ -+ /* -+ * CAP_DAC_OVERRIDE for lookup -+ */ -+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -+ old_cred = override_creds(override_cred); -+ -+ dentry = lookup_one_len(name, rdd->dir, len); -+ if (!IS_ERR(dentry)) { -+ p->is_whiteout = ovl_is_whiteout(dentry); -+ dput(dentry); -+ } -+ revert_creds(old_cred); -+ put_cred(override_cred); -+ } -+ - list_add_tail(&p->l_node, rdd->list); - rb_link_node(&p->node, parent, newp); - rb_insert_color(&p->node, &rdd->root); -@@ -233,49 +260,6 @@ static void ovl_dir_reset(struct file *file) - od->is_real = false; - } - --static int ovl_dir_mark_whiteouts(struct dentry *dir, -- struct ovl_readdir_data *rdd) --{ -- struct ovl_cache_entry *p; -- struct dentry *dentry; -- const struct cred *old_cred; -- struct cred *override_cred; -- -- override_cred = prepare_creds(); -- if (!override_cred) { -- ovl_cache_free(rdd->list); -- return -ENOMEM; -- } -- -- /* -- * CAP_DAC_OVERRIDE for lookup -- */ -- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -- old_cred = override_creds(override_cred); -- -- mutex_lock(&dir->d_inode->i_mutex); -- list_for_each_entry(p, rdd->list, l_node) { -- if (p->is_cursor) -- continue; -- -- if (p->type != DT_CHR) -- continue; -- -- dentry = lookup_one_len(p->name, dir, p->len); -- if (IS_ERR(dentry)) -- continue; -- -- p->is_whiteout = ovl_is_whiteout(dentry); -- dput(dentry); -- } -- mutex_unlock(&dir->d_inode->i_mutex); -- -- revert_creds(old_cred); -- put_cred(override_cred); -- -- return 0; --} -- - static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) - { - int err; -@@ -292,15 +276,10 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) - ovl_path_upper(dentry, &upperpath); - - if (upperpath.dentry) { -+ rdd.dir = upperpath.dentry; - err = ovl_dir_read(&upperpath, &rdd); - if (err) - goto out; -- -- if (lowerpath.dentry) { -- err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); -- if (err) -- goto out; -- } - } - if (lowerpath.dentry) { - /* --- -2.7.4 - - -From 8688583b8896b61fb799a3e7a6272b8beb87ec9e Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:42 +0100 -Subject: [PATCH 42/73] ovl: make path-type a bitmap - -OVL_PATH_PURE_UPPER -> __OVL_PATH_UPPER | __OVL_PATH_PURE -OVL_PATH_UPPER -> __OVL_PATH_UPPER -OVL_PATH_MERGE -> __OVL_PATH_UPPER | __OVL_PATH_MERGE -OVL_PATH_LOWER -> 0 - -Multiple R/O layers will allow __OVL_PATH_MERGE without __OVL_PATH_UPPER. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 1afaba1ecb5299cdd0f69b5bad98b0185fe71e79) -Signed-off-by: Alex Shi ---- - fs/overlayfs/copy_up.c | 4 ++-- - fs/overlayfs/dir.c | 22 +++++++++++----------- - fs/overlayfs/inode.c | 9 ++++++--- - fs/overlayfs/overlayfs.h | 13 +++++++++---- - fs/overlayfs/readdir.c | 10 +++++----- - fs/overlayfs/super.c | 19 ++++++++----------- - 6 files changed, 41 insertions(+), 36 deletions(-) - -diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c -index ea10a87..a5bfd60 100644 ---- a/fs/overlayfs/copy_up.c -+++ b/fs/overlayfs/copy_up.c -@@ -385,7 +385,7 @@ int ovl_copy_up(struct dentry *dentry) - struct kstat stat; - enum ovl_path_type type = ovl_path_type(dentry); - -- if (type != OVL_PATH_LOWER) -+ if (OVL_TYPE_UPPER(type)) - break; - - next = dget(dentry); -@@ -394,7 +394,7 @@ int ovl_copy_up(struct dentry *dentry) - parent = dget_parent(next); - - type = ovl_path_type(parent); -- if (type != OVL_PATH_LOWER) -+ if (OVL_TYPE_UPPER(type)) - break; - - dput(next); -diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c -index 8ffc4b9..ab50bd1 100644 ---- a/fs/overlayfs/dir.c -+++ b/fs/overlayfs/dir.c -@@ -152,7 +152,7 @@ static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, - * correct link count. nlink=1 seems to pacify 'find' and - * other utilities. - */ -- if (type == OVL_PATH_MERGE) -+ if (OVL_TYPE_MERGE(type)) - stat->nlink = 1; - - return 0; -@@ -630,7 +630,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) - goto out_drop_write; - - type = ovl_path_type(dentry); -- if (type == OVL_PATH_PURE_UPPER) { -+ if (OVL_TYPE_PURE_UPPER(type)) { - err = ovl_remove_upper(dentry, is_dir); - } else { - const struct cred *old_cred; -@@ -712,7 +712,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, - /* Don't copy up directory trees */ - old_type = ovl_path_type(old); - err = -EXDEV; -- if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir) -+ if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir) - goto out; - - if (new->d_inode) { -@@ -725,25 +725,25 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, - - new_type = ovl_path_type(new); - err = -EXDEV; -- if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) -+ if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) - goto out; - - err = 0; -- if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) { -+ if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) { - if (ovl_dentry_lower(old)->d_inode == - ovl_dentry_lower(new)->d_inode) - goto out; - } -- if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) { -+ if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) { - if (ovl_dentry_upper(old)->d_inode == - ovl_dentry_upper(new)->d_inode) - goto out; - } - } else { - if (ovl_dentry_is_opaque(new)) -- new_type = OVL_PATH_UPPER; -+ new_type = __OVL_PATH_UPPER; - else -- new_type = OVL_PATH_PURE_UPPER; -+ new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE; - } - - err = ovl_want_write(old); -@@ -763,8 +763,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, - goto out_drop_write; - } - -- old_opaque = old_type != OVL_PATH_PURE_UPPER; -- new_opaque = new_type != OVL_PATH_PURE_UPPER; -+ old_opaque = !OVL_TYPE_PURE_UPPER(old_type); -+ new_opaque = !OVL_TYPE_PURE_UPPER(new_type); - - if (old_opaque || new_opaque) { - err = -ENOMEM; -@@ -787,7 +787,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, - old_cred = override_creds(override_cred); - } - -- if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { -+ if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) { - opaquedir = ovl_check_empty_and_clear(new); - err = PTR_ERR(opaquedir); - if (IS_ERR(opaquedir)) { -diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c -index 07d74b2..48492f1 100644 ---- a/fs/overlayfs/inode.c -+++ b/fs/overlayfs/inode.c -@@ -238,7 +238,10 @@ out: - static bool ovl_need_xattr_filter(struct dentry *dentry, - enum ovl_path_type type) - { -- return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); -+ if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER) -+ return S_ISDIR(dentry->d_inode->i_mode); -+ else -+ return false; - } - - ssize_t ovl_getxattr(struct dentry *dentry, const char *name, -@@ -299,7 +302,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name) - if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) - goto out_drop_write; - -- if (type == OVL_PATH_LOWER) { -+ if (!OVL_TYPE_UPPER(type)) { - err = vfs_getxattr(realpath.dentry, name, NULL, 0); - if (err < 0) - goto out_drop_write; -@@ -321,7 +324,7 @@ out: - static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, - struct dentry *realdentry) - { -- if (type != OVL_PATH_LOWER) -+ if (OVL_TYPE_UPPER(type)) - return false; - - if (special_file(realdentry->d_inode->i_mode)) -diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h -index 814bed3..d39eaa8 100644 ---- a/fs/overlayfs/overlayfs.h -+++ b/fs/overlayfs/overlayfs.h -@@ -12,12 +12,17 @@ - struct ovl_entry; - - enum ovl_path_type { -- OVL_PATH_PURE_UPPER, -- OVL_PATH_UPPER, -- OVL_PATH_MERGE, -- OVL_PATH_LOWER, -+ __OVL_PATH_PURE = (1 << 0), -+ __OVL_PATH_UPPER = (1 << 1), -+ __OVL_PATH_MERGE = (1 << 2), - }; - -+#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) -+#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) -+#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE) -+#define OVL_TYPE_MERGE_OR_LOWER(type) \ -+ (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) -+ - extern const char *ovl_opaque_xattr; - - static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index b361719..fb18287 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -255,8 +255,8 @@ static void ovl_dir_reset(struct file *file) - ovl_cache_put(od, dentry); - od->cache = NULL; - } -- WARN_ON(!od->is_real && type != OVL_PATH_MERGE); -- if (od->is_real && type == OVL_PATH_MERGE) -+ WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); -+ if (od->is_real && OVL_TYPE_MERGE(type)) - od->is_real = false; - } - -@@ -431,7 +431,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, - /* - * Need to check if we started out being a lower dir, but got copied up - */ -- if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { -+ if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { - struct inode *inode = file_inode(file); - - realfile = lockless_dereference(od->upperfile); -@@ -497,8 +497,8 @@ static int ovl_dir_open(struct inode *inode, struct file *file) - } - INIT_LIST_HEAD(&od->cursor.l_node); - od->realfile = realfile; -- od->is_real = (type != OVL_PATH_MERGE); -- od->is_upper = (type != OVL_PATH_LOWER); -+ od->is_real = !OVL_TYPE_MERGE(type); -+ od->is_upper = OVL_TYPE_UPPER(type); - od->cursor.is_cursor = true; - file->private_data = od; - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index f16d318..821719c 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -64,22 +64,19 @@ const char *ovl_opaque_xattr = "trusted.overlay.opaque"; - enum ovl_path_type ovl_path_type(struct dentry *dentry) - { - struct ovl_entry *oe = dentry->d_fsdata; -+ enum ovl_path_type type = 0; - - if (oe->__upperdentry) { -+ type = __OVL_PATH_UPPER; -+ - if (oe->lowerdentry) { - if (S_ISDIR(dentry->d_inode->i_mode)) -- return OVL_PATH_MERGE; -- else -- return OVL_PATH_UPPER; -- } else { -- if (oe->opaque) -- return OVL_PATH_UPPER; -- else -- return OVL_PATH_PURE_UPPER; -+ type |= __OVL_PATH_MERGE; -+ } else if (!oe->opaque) { -+ type |= __OVL_PATH_PURE; - } -- } else { -- return OVL_PATH_LOWER; - } -+ return type; - } - - static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) -@@ -101,7 +98,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) - - enum ovl_path_type type = ovl_path_type(dentry); - -- if (type == OVL_PATH_LOWER) -+ if (!OVL_TYPE_UPPER(type)) - ovl_path_lower(dentry, path); - else - ovl_path_upper(dentry, path); --- -2.7.4 - - -From 046f00ecd2379495e451658be72234ded7bee154 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:43 +0100 -Subject: [PATCH 43/73] ovl: dont replace opaque dir - -When removing an empty opaque directory, then it makes no sense to replace -it with an exact replica of itself before removal. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 263b4a0fee43f1239c4d6f3c3a62fb5a20d84f2e) -Signed-off-by: Alex Shi ---- - fs/overlayfs/dir.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c -index ab50bd1..dcae3ac 100644 ---- a/fs/overlayfs/dir.c -+++ b/fs/overlayfs/dir.c -@@ -506,7 +506,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) - struct dentry *opaquedir = NULL; - int err; - -- if (is_dir) { -+ if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { - opaquedir = ovl_check_empty_and_clear(dentry); - err = PTR_ERR(opaquedir); - if (IS_ERR(opaquedir)) --- -2.7.4 - - -From d669aefff21050073a65d12d8711c7c8bbad65b9 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:43 +0100 -Subject: [PATCH 44/73] ovl: add mutli-layer infrastructure - -Add multiple lower layers to 'struct ovl_fs' and 'struct ovl_entry'. - -ovl_entry will have an array of paths, instead of just the dentry. This -allows a compact array containing just the layers which exist at current -point in the tree (which is expected to be a small number for the majority -of dentries). - -The number of layers is not limited by this infrastructure. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit dd662667e6d3e55b42798a6e6e7f37dddc639460) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 98 ++++++++++++++++++++++++++++++++++------------------ - 1 file changed, 64 insertions(+), 34 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 821719c..460d866 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -35,7 +35,8 @@ struct ovl_config { - /* private information held for overlayfs's superblock */ - struct ovl_fs { - struct vfsmount *upper_mnt; -- struct vfsmount *lower_mnt; -+ unsigned numlower; -+ struct vfsmount **lower_mnt; - struct dentry *workdir; - long lower_namelen; - /* pathnames of lower and upper dirs, for show_options */ -@@ -47,7 +48,6 @@ struct ovl_dir_cache; - /* private information held for every overlayfs dentry */ - struct ovl_entry { - struct dentry *__upperdentry; -- struct dentry *lowerdentry; - struct ovl_dir_cache *cache; - union { - struct { -@@ -56,10 +56,16 @@ struct ovl_entry { - }; - struct rcu_head rcu; - }; -+ unsigned numlower; -+ struct path lowerstack[]; - }; - - const char *ovl_opaque_xattr = "trusted.overlay.opaque"; - -+static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) -+{ -+ return oe->numlower ? oe->lowerstack[0].dentry : NULL; -+} - - enum ovl_path_type ovl_path_type(struct dentry *dentry) - { -@@ -69,7 +75,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) - if (oe->__upperdentry) { - type = __OVL_PATH_UPPER; - -- if (oe->lowerdentry) { -+ if (oe->numlower) { - if (S_ISDIR(dentry->d_inode->i_mode)) - type |= __OVL_PATH_MERGE; - } else if (!oe->opaque) { -@@ -117,7 +123,7 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry) - { - struct ovl_entry *oe = dentry->d_fsdata; - -- return oe->lowerdentry; -+ return __ovl_dentry_lower(oe); - } - - struct dentry *ovl_dentry_real(struct dentry *dentry) -@@ -127,7 +133,7 @@ struct dentry *ovl_dentry_real(struct dentry *dentry) - - realdentry = ovl_upperdentry_dereference(oe); - if (!realdentry) -- realdentry = oe->lowerdentry; -+ realdentry = __ovl_dentry_lower(oe); - - return realdentry; - } -@@ -140,7 +146,7 @@ struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) - if (realdentry) { - *is_upper = true; - } else { -- realdentry = oe->lowerdentry; -+ realdentry = __ovl_dentry_lower(oe); - *is_upper = false; - } - return realdentry; -@@ -162,11 +168,9 @@ void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) - - void ovl_path_lower(struct dentry *dentry, struct path *path) - { -- struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - struct ovl_entry *oe = dentry->d_fsdata; - -- path->mnt = ofs->lower_mnt; -- path->dentry = oe->lowerdentry; -+ *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; - } - - int ovl_want_write(struct dentry *dentry) -@@ -258,8 +262,11 @@ static void ovl_dentry_release(struct dentry *dentry) - struct ovl_entry *oe = dentry->d_fsdata; - - if (oe) { -+ unsigned int i; -+ - dput(oe->__upperdentry); -- dput(oe->lowerdentry); -+ for (i = 0; i < oe->numlower; i++) -+ dput(oe->lowerstack[i].dentry); - kfree_rcu(oe, rcu); - } - } -@@ -268,9 +275,15 @@ static const struct dentry_operations ovl_dentry_operations = { - .d_release = ovl_dentry_release, - }; - --static struct ovl_entry *ovl_alloc_entry(void) -+static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) - { -- return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL); -+ size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); -+ struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); -+ -+ if (oe) -+ oe->numlower = numlower; -+ -+ return oe; - } - - static inline struct dentry *ovl_lookup_real(struct dentry *dir, -@@ -297,19 +310,19 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - { - struct ovl_entry *oe; - struct dentry *upperdir; -- struct dentry *lowerdir; -+ struct path lowerdir; - struct dentry *upperdentry = NULL; - struct dentry *lowerdentry = NULL; - struct inode *inode = NULL; - int err; - - err = -ENOMEM; -- oe = ovl_alloc_entry(); -+ oe = ovl_alloc_entry(1); - if (!oe) - goto out; - - upperdir = ovl_dentry_upper(dentry->d_parent); -- lowerdir = ovl_dentry_lower(dentry->d_parent); -+ ovl_path_lower(dentry->d_parent, &lowerdir); - - if (upperdir) { - upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); -@@ -317,7 +330,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - if (IS_ERR(upperdentry)) - goto out_put_dir; - -- if (lowerdir && upperdentry) { -+ if (lowerdir.dentry && upperdentry) { - if (ovl_is_whiteout(upperdentry)) { - dput(upperdentry); - upperdentry = NULL; -@@ -327,8 +340,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - } - } - } -- if (lowerdir && !oe->opaque) { -- lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name); -+ if (lowerdir.dentry && !oe->opaque) { -+ lowerdentry = ovl_lookup_real(lowerdir.dentry, &dentry->d_name); - err = PTR_ERR(lowerdentry); - if (IS_ERR(lowerdentry)) - goto out_dput_upper; -@@ -355,8 +368,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - } - - oe->__upperdentry = upperdentry; -- oe->lowerdentry = lowerdentry; -- -+ if (lowerdentry) { -+ oe->lowerstack[0].dentry = lowerdentry; -+ oe->lowerstack[0].mnt = lowerdir.mnt; -+ } else { -+ oe->numlower = 0; -+ } - dentry->d_fsdata = oe; - d_add(dentry, inode); - -@@ -380,10 +397,12 @@ struct file *ovl_path_open(struct path *path, int flags) - static void ovl_put_super(struct super_block *sb) - { - struct ovl_fs *ufs = sb->s_fs_info; -+ unsigned i; - - dput(ufs->workdir); - mntput(ufs->upper_mnt); -- mntput(ufs->lower_mnt); -+ for (i = 0; i < ufs->numlower; i++) -+ mntput(ufs->lower_mnt[i]); - - kfree(ufs->config.lowerdir); - kfree(ufs->config.upperdir); -@@ -641,6 +660,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - struct ovl_entry *oe; - struct ovl_fs *ufs; - struct kstatfs statfs; -+ struct vfsmount *mnt; -+ unsigned int i; - int err; - - err = -ENOMEM; -@@ -661,7 +682,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - } - - err = -ENOMEM; -- oe = ovl_alloc_entry(); -+ oe = ovl_alloc_entry(1); - if (oe == NULL) - goto out_free_config; - -@@ -727,12 +748,24 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - goto out_put_workpath; - } - -- ufs->lower_mnt = clone_private_mount(&lowerpath); -- err = PTR_ERR(ufs->lower_mnt); -- if (IS_ERR(ufs->lower_mnt)) { -- pr_err("overlayfs: failed to clone lowerpath\n"); -+ ufs->lower_mnt = kcalloc(1, sizeof(struct vfsmount *), GFP_KERNEL); -+ if (ufs->lower_mnt == NULL) - goto out_put_upper_mnt; -+ -+ mnt = clone_private_mount(&lowerpath); -+ err = PTR_ERR(mnt); -+ if (IS_ERR(mnt)) { -+ pr_err("overlayfs: failed to clone lowerpath\n"); -+ goto out_put_lower_mnt; - } -+ /* -+ * Make lower_mnt R/O. That way fchmod/fchown on lower file -+ * will fail instead of modifying lower fs. -+ */ -+ mnt->mnt_flags |= MNT_READONLY; -+ -+ ufs->lower_mnt[0] = mnt; -+ ufs->numlower = 1; - - ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); - err = PTR_ERR(ufs->workdir); -@@ -742,12 +775,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - goto out_put_lower_mnt; - } - -- /* -- * Make lower_mnt R/O. That way fchmod/fchown on lower file -- * will fail instead of modifying lower fs. -- */ -- ufs->lower_mnt->mnt_flags |= MNT_READONLY; -- - /* If the upper fs is r/o, we mark overlayfs r/o too */ - if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) - sb->s_flags |= MS_RDONLY; -@@ -768,7 +795,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - path_put(&workpath); - - oe->__upperdentry = upperpath.dentry; -- oe->lowerdentry = lowerpath.dentry; -+ oe->lowerstack[0].dentry = lowerpath.dentry; -+ oe->lowerstack[0].mnt = ufs->lower_mnt[0]; - - root_dentry->d_fsdata = oe; - -@@ -782,7 +810,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - out_put_workdir: - dput(ufs->workdir); - out_put_lower_mnt: -- mntput(ufs->lower_mnt); -+ for (i = 0; i < ufs->numlower; i++) -+ mntput(ufs->lower_mnt[i]); -+ kfree(ufs->lower_mnt); - out_put_upper_mnt: - mntput(ufs->upper_mnt); - out_put_workpath: --- -2.7.4 - - -From 8216b4cc31e60690afef76cbe1a310064f8727d7 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:43 +0100 -Subject: [PATCH 45/73] ovl: helper to iterate layers - -Add helper to iterate through all the layers, starting from the upper layer -(if exists) and continuing down through the lower layers. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 5ef88da56a77bfb3b9631f5e5775f3bff86b6219) -Signed-off-by: Alex Shi ---- - fs/overlayfs/overlayfs.h | 1 + - fs/overlayfs/super.c | 21 +++++++++++++++++++++ - 2 files changed, 22 insertions(+) - -diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h -index d39eaa8..d176b67 100644 ---- a/fs/overlayfs/overlayfs.h -+++ b/fs/overlayfs/overlayfs.h -@@ -135,6 +135,7 @@ void ovl_dentry_version_inc(struct dentry *dentry); - void ovl_path_upper(struct dentry *dentry, struct path *path); - void ovl_path_lower(struct dentry *dentry, struct path *path); - enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); -+int ovl_path_next(int idx, struct dentry *dentry, struct path *path); - struct dentry *ovl_dentry_upper(struct dentry *dentry); - struct dentry *ovl_dentry_lower(struct dentry *dentry); - struct dentry *ovl_dentry_real(struct dentry *dentry); -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 460d866..07e4c57 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -305,6 +305,27 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir, - return dentry; - } - -+/* -+ * Returns next layer in stack starting from top. -+ * Returns -1 if this is the last layer. -+ */ -+int ovl_path_next(int idx, struct dentry *dentry, struct path *path) -+{ -+ struct ovl_entry *oe = dentry->d_fsdata; -+ -+ BUG_ON(idx < 0); -+ if (idx == 0) { -+ ovl_path_upper(dentry, path); -+ if (path->dentry) -+ return oe->numlower ? 1 : -1; -+ idx++; -+ } -+ BUG_ON(idx > oe->numlower); -+ *path = oe->lowerstack[idx - 1]; -+ -+ return (idx < oe->numlower) ? idx + 1 : -1; -+} -+ - struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) - { --- -2.7.4 - - -From 3dbcd51370dba076be4dc8c45d2bf19db48201b8 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:44 +0100 -Subject: [PATCH 46/73] ovl: multi-layer readdir - -If multiple lower layers exist, merge them as well in readdir according to -the same rules as merging upper with lower. I.e. take whiteouts and opaque -directories into account on all but the lowers layer. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 9d7459d834c28f55c82f1737f638a6c90e0c0e0f) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 43 +++++++++++++++++++++---------------------- - fs/overlayfs/super.c | 3 +++ - 2 files changed, 24 insertions(+), 22 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index fb18287..5ef05149 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -263,35 +263,34 @@ static void ovl_dir_reset(struct file *file) - static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) - { - int err; -- struct path lowerpath; -- struct path upperpath; -+ struct path realpath; - struct ovl_readdir_data rdd = { - .ctx.actor = ovl_fill_merge, - .list = list, - .root = RB_ROOT, - .is_merge = false, - }; -- -- ovl_path_lower(dentry, &lowerpath); -- ovl_path_upper(dentry, &upperpath); -- -- if (upperpath.dentry) { -- rdd.dir = upperpath.dentry; -- err = ovl_dir_read(&upperpath, &rdd); -- if (err) -- goto out; -- } -- if (lowerpath.dentry) { -- /* -- * Insert lowerpath entries before upperpath ones, this allows -- * offsets to be reasonably constant -- */ -- list_add(&rdd.middle, rdd.list); -- rdd.is_merge = true; -- err = ovl_dir_read(&lowerpath, &rdd); -- list_del(&rdd.middle); -+ int idx, next; -+ -+ for (idx = 0; idx != -1; idx = next) { -+ next = ovl_path_next(idx, dentry, &realpath); -+ -+ if (next != -1) { -+ rdd.dir = realpath.dentry; -+ err = ovl_dir_read(&realpath, &rdd); -+ if (err) -+ break; -+ } else { -+ /* -+ * Insert lowest layer entries before upper ones, this -+ * allows offsets to be reasonably constant -+ */ -+ list_add(&rdd.middle, rdd.list); -+ rdd.is_merge = true; -+ err = ovl_dir_read(&realpath, &rdd); -+ list_del(&rdd.middle); -+ } - } --out: - return err; - } - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 07e4c57..c245043 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -81,6 +81,9 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) - } else if (!oe->opaque) { - type |= __OVL_PATH_PURE; - } -+ } else { -+ if (oe->numlower > 1) -+ type |= __OVL_PATH_MERGE; - } - return type; - } --- -2.7.4 - - -From 1dabc18b8b2115693ec868cb84fda26052a8ee83 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:44 +0100 -Subject: [PATCH 47/73] ovl: multi-layer lookup - -Look up dentry in all relevant layers. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 3d3c6b89399a1b5e8a59ffbb8cb2a7797a9ef154) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 143 +++++++++++++++++++++++++++++++++------------------ - 1 file changed, 94 insertions(+), 49 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index c245043..f72b82f 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -333,82 +333,127 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) - { - struct ovl_entry *oe; -- struct dentry *upperdir; -- struct path lowerdir; -- struct dentry *upperdentry = NULL; -- struct dentry *lowerdentry = NULL; -+ struct ovl_entry *poe = dentry->d_parent->d_fsdata; -+ struct path *stack = NULL; -+ struct dentry *upperdir, *upperdentry = NULL; -+ unsigned int ctr = 0; - struct inode *inode = NULL; -+ bool upperopaque = false; -+ struct dentry *this, *prev = NULL; -+ unsigned int i; - int err; - -- err = -ENOMEM; -- oe = ovl_alloc_entry(1); -- if (!oe) -- goto out; -- -- upperdir = ovl_dentry_upper(dentry->d_parent); -- ovl_path_lower(dentry->d_parent, &lowerdir); -- -+ upperdir = ovl_upperdentry_dereference(poe); - if (upperdir) { -- upperdentry = ovl_lookup_real(upperdir, &dentry->d_name); -- err = PTR_ERR(upperdentry); -- if (IS_ERR(upperdentry)) -- goto out_put_dir; -- -- if (lowerdir.dentry && upperdentry) { -- if (ovl_is_whiteout(upperdentry)) { -- dput(upperdentry); -- upperdentry = NULL; -- oe->opaque = true; -- } else if (ovl_is_opaquedir(upperdentry)) { -- oe->opaque = true; -+ this = ovl_lookup_real(upperdir, &dentry->d_name); -+ err = PTR_ERR(this); -+ if (IS_ERR(this)) -+ goto out; -+ -+ /* -+ * If this is not the lowermost layer, check whiteout and opaque -+ * directory. -+ */ -+ if (poe->numlower && this) { -+ if (ovl_is_whiteout(this)) { -+ dput(this); -+ this = NULL; -+ upperopaque = true; -+ } else if (ovl_is_opaquedir(this)) { -+ upperopaque = true; - } - } -+ upperdentry = prev = this; - } -- if (lowerdir.dentry && !oe->opaque) { -- lowerdentry = ovl_lookup_real(lowerdir.dentry, &dentry->d_name); -- err = PTR_ERR(lowerdentry); -- if (IS_ERR(lowerdentry)) -- goto out_dput_upper; -+ -+ if (!upperopaque && poe->numlower) { -+ err = -ENOMEM; -+ stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL); -+ if (!stack) -+ goto out_put_upper; - } - -- if (lowerdentry && upperdentry && -- (!S_ISDIR(upperdentry->d_inode->i_mode) || -- !S_ISDIR(lowerdentry->d_inode->i_mode))) { -- dput(lowerdentry); -- lowerdentry = NULL; -- oe->opaque = true; -+ for (i = 0; !upperopaque && i < poe->numlower; i++) { -+ bool opaque = false; -+ struct path lowerpath = poe->lowerstack[i]; -+ -+ opaque = false; -+ this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); -+ err = PTR_ERR(this); -+ if (IS_ERR(this)) -+ goto out_put; -+ if (!this) -+ continue; -+ -+ /* -+ * If this is not the lowermost layer, check whiteout and opaque -+ * directory. -+ */ -+ if (i < poe->numlower - 1) { -+ if (ovl_is_whiteout(this)) { -+ dput(this); -+ break; -+ } else if (ovl_is_opaquedir(this)) { -+ opaque = true; -+ } -+ } -+ /* -+ * If this is a non-directory then stop here. -+ * -+ * FIXME: check for opaqueness maybe better done in remove code. -+ */ -+ if (!S_ISDIR(this->d_inode->i_mode)) { -+ opaque = true; -+ } else if (prev && (!S_ISDIR(prev->d_inode->i_mode) || -+ !S_ISDIR(this->d_inode->i_mode))) { -+ if (prev == upperdentry) -+ upperopaque = true; -+ dput(this); -+ break; -+ } -+ stack[ctr].dentry = this; -+ stack[ctr].mnt = lowerpath.mnt; -+ ctr++; -+ prev = this; -+ if (opaque) -+ break; - } - -- if (lowerdentry || upperdentry) { -+ oe = ovl_alloc_entry(ctr); -+ err = -ENOMEM; -+ if (!oe) -+ goto out_put; -+ -+ if (upperdentry || ctr) { - struct dentry *realdentry; - -- realdentry = upperdentry ? upperdentry : lowerdentry; -+ realdentry = upperdentry ? upperdentry : stack[0].dentry; -+ - err = -ENOMEM; - inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, - oe); - if (!inode) -- goto out_dput; -+ goto out_free_oe; - ovl_copyattr(realdentry->d_inode, inode); - } - -+ oe->opaque = upperopaque; - oe->__upperdentry = upperdentry; -- if (lowerdentry) { -- oe->lowerstack[0].dentry = lowerdentry; -- oe->lowerstack[0].mnt = lowerdir.mnt; -- } else { -- oe->numlower = 0; -- } -+ memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); -+ kfree(stack); - dentry->d_fsdata = oe; - d_add(dentry, inode); - - return NULL; - --out_dput: -- dput(lowerdentry); --out_dput_upper: -- dput(upperdentry); --out_put_dir: -+out_free_oe: - kfree(oe); -+out_put: -+ for (i = 0; i < ctr; i++) -+ dput(stack[i].dentry); -+ kfree(stack); -+out_put_upper: -+ dput(upperdentry); - out: - return ERR_PTR(err); - } --- -2.7.4 - - -From 34d7a5b7b3a920178f662fa768bb724eb41be925 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:45 +0100 -Subject: [PATCH 48/73] ovl: check whiteout on lowest layer as well - -Not checking whiteouts on lowest layer was an optimization (there's nothing -to white out there), but it could result in inconsitent behavior when a -layer previously used as upper/middle is later used as lowest. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 3e01cee3b980f96463cb6f378ab05303a99903d9) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 79 +++++++++++++++++++++++++------------------------- - fs/overlayfs/super.c | 27 +++++++---------- - 2 files changed, 50 insertions(+), 56 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 5ef05149..2b8315b 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -80,23 +80,50 @@ static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, - return NULL; - } - --static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len, -+static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, -+ const char *name, int len, - u64 ino, unsigned int d_type) - { - struct ovl_cache_entry *p; - size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); - - p = kmalloc(size, GFP_KERNEL); -- if (p) { -- memcpy(p->name, name, len); -- p->name[len] = '\0'; -- p->len = len; -- p->type = d_type; -- p->ino = ino; -- p->is_whiteout = false; -- p->is_cursor = false; -- } -+ if (!p) -+ return NULL; -+ -+ memcpy(p->name, name, len); -+ p->name[len] = '\0'; -+ p->len = len; -+ p->type = d_type; -+ p->ino = ino; -+ p->is_whiteout = false; -+ p->is_cursor = false; -+ -+ if (d_type == DT_CHR) { -+ struct dentry *dentry; -+ const struct cred *old_cred; -+ struct cred *override_cred; -+ -+ override_cred = prepare_creds(); -+ if (!override_cred) { -+ kfree(p); -+ return NULL; -+ } - -+ /* -+ * CAP_DAC_OVERRIDE for lookup -+ */ -+ cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -+ old_cred = override_creds(override_cred); -+ -+ dentry = lookup_one_len(name, dir, len); -+ if (!IS_ERR(dentry)) { -+ p->is_whiteout = ovl_is_whiteout(dentry); -+ dput(dentry); -+ } -+ revert_creds(old_cred); -+ put_cred(override_cred); -+ } - return p; - } - -@@ -123,36 +150,10 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, - return 0; - } - -- p = ovl_cache_entry_new(name, len, ino, d_type); -+ p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); - if (p == NULL) - return -ENOMEM; - -- if (d_type == DT_CHR) { -- struct dentry *dentry; -- const struct cred *old_cred; -- struct cred *override_cred; -- -- override_cred = prepare_creds(); -- if (!override_cred) { -- kfree(p); -- return -ENOMEM; -- } -- -- /* -- * CAP_DAC_OVERRIDE for lookup -- */ -- cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); -- old_cred = override_creds(override_cred); -- -- dentry = lookup_one_len(name, rdd->dir, len); -- if (!IS_ERR(dentry)) { -- p->is_whiteout = ovl_is_whiteout(dentry); -- dput(dentry); -- } -- revert_creds(old_cred); -- put_cred(override_cred); -- } -- - list_add_tail(&p->l_node, rdd->list); - rb_link_node(&p->node, parent, newp); - rb_insert_color(&p->node, &rdd->root); -@@ -170,7 +171,7 @@ static int ovl_fill_lower(struct ovl_readdir_data *rdd, - if (p) { - list_move_tail(&p->l_node, &rdd->middle); - } else { -- p = ovl_cache_entry_new(name, namelen, ino, d_type); -+ p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); - if (p == NULL) - rdd->err = -ENOMEM; - else -@@ -231,6 +232,7 @@ static inline int ovl_dir_read(struct path *realpath, - if (IS_ERR(realfile)) - return PTR_ERR(realfile); - -+ rdd->dir = realpath->dentry; - rdd->ctx.pos = 0; - do { - rdd->count = 0; -@@ -276,7 +278,6 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) - next = ovl_path_next(idx, dentry, &realpath); - - if (next != -1) { -- rdd.dir = realpath.dentry; - err = ovl_dir_read(&realpath, &rdd); - if (err) - break; -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index f72b82f..5dbc678 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -350,16 +350,12 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - if (IS_ERR(this)) - goto out; - -- /* -- * If this is not the lowermost layer, check whiteout and opaque -- * directory. -- */ -- if (poe->numlower && this) { -+ if (this) { - if (ovl_is_whiteout(this)) { - dput(this); - this = NULL; - upperopaque = true; -- } else if (ovl_is_opaquedir(this)) { -+ } else if (poe->numlower && ovl_is_opaquedir(this)) { - upperopaque = true; - } - } -@@ -384,19 +380,16 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - goto out_put; - if (!this) - continue; -- -+ if (ovl_is_whiteout(this)) { -+ dput(this); -+ break; -+ } - /* -- * If this is not the lowermost layer, check whiteout and opaque -- * directory. -+ * Only makes sense to check opaque dir if this is not the -+ * lowermost layer. - */ -- if (i < poe->numlower - 1) { -- if (ovl_is_whiteout(this)) { -- dput(this); -- break; -- } else if (ovl_is_opaquedir(this)) { -- opaque = true; -- } -- } -+ if (i < poe->numlower - 1 && ovl_is_opaquedir(this)) -+ opaque = true; - /* - * If this is a non-directory then stop here. - * --- -2.7.4 - - -From 5dfc4d41f976aa0916abe3298c71e585e78e5491 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:45 +0100 -Subject: [PATCH 49/73] ovl: lookup ENAMETOOLONG on lower means ENOENT - -"Suppose you have in one of the lower layers a filesystem with -->lookup()-enforced upper limit on name length. Pretty much every local fs -has one, but... they are not all equal. 255 characters is the common upper -limit, but e.g. jffs2 stops at 254, minixfs upper limit is somewhere from -14 to 60, depending upon version, etc. You are doing a lookup for -something that is present in upper layer, but happens to be too long for -one of the lower layers. Too bad - ENAMETOOLONG for you..." - -Reported-by: Al Viro -Signed-off-by: Miklos Szeredi -(cherry picked from commit 09e10322b71716adf567d453889ef0871cf226b9) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 5dbc678..110c968 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -376,8 +376,14 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - opaque = false; - this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); - err = PTR_ERR(this); -- if (IS_ERR(this)) -+ if (IS_ERR(this)) { -+ /* -+ * If it's positive, then treat ENAMETOOLONG as ENOENT. -+ */ -+ if (err == -ENAMETOOLONG && (upperdentry || ctr)) -+ continue; - goto out_put; -+ } - if (!this) - continue; - if (ovl_is_whiteout(this)) { --- -2.7.4 - - -From f3f4d3482d4f4e8edd9ef8a862676004a07c2e43 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:46 +0100 -Subject: [PATCH 50/73] ovl: allow statfs if no upper layer - -Handle "no upper layer" case in statfs. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 4ebc581828d5d0fe189ca06cef8b7a63cb4583d5) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 110c968..cc7a0f3 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -484,7 +484,7 @@ static void ovl_put_super(struct super_block *sb) - * @buf: The struct kstatfs to fill in with stats - * - * Get the filesystem statistics. As writes always target the upper layer -- * filesystem pass the statfs to the same filesystem. -+ * filesystem pass the statfs to the upper filesystem (if it exists) - */ - static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) - { -@@ -493,7 +493,7 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) - struct path path; - int err; - -- ovl_path_upper(root_dentry, &path); -+ ovl_path_real(root_dentry, &path); - - err = vfs_statfs(&path, buf); - if (!err) { --- -2.7.4 - - -From ed3dcf4bb43b6d737b85ba134d25d1483fa12c8e Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:48 +0100 -Subject: [PATCH 51/73] ovl: mount: change order of initialization - -Move allocation of root entry above to where it's needed. - -Move initializations related to upperdir and workdir near each other. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 3b7a9a249a93e68b7bb318de40e64d3b68ba1a6d) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 70 ++++++++++++++++++++++++---------------------------- - 1 file changed, 32 insertions(+), 38 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index cc7a0f3..a177028 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -723,7 +723,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - struct path lowerpath; - struct path upperpath; - struct path workpath; -- struct inode *root_inode; - struct dentry *root_dentry; - struct ovl_entry *oe; - struct ovl_fs *ufs; -@@ -749,54 +748,49 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - goto out_free_config; - } - -- err = -ENOMEM; -- oe = ovl_alloc_entry(1); -- if (oe == NULL) -- goto out_free_config; -- - err = ovl_mount_dir(ufs->config.upperdir, &upperpath); - if (err) -- goto out_free_oe; -+ goto out_free_config; - -- err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); -+ err = ovl_mount_dir(ufs->config.workdir, &workpath); - if (err) - goto out_put_upperpath; - -- err = ovl_mount_dir(ufs->config.workdir, &workpath); -+ err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); - if (err) -- goto out_put_lowerpath; -+ goto out_put_workpath; - - err = -EINVAL; - if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || - !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || - !S_ISDIR(workpath.dentry->d_inode->i_mode)) { - pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); -- goto out_put_workpath; -+ goto out_put_lowerpath; - } - - if (upperpath.mnt != workpath.mnt) { - pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); -- goto out_put_workpath; -+ goto out_put_lowerpath; - } - if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { - pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); -- goto out_put_workpath; -+ goto out_put_lowerpath; - } - - if (!ovl_is_allowed_fs_type(upperpath.dentry)) { - pr_err("overlayfs: filesystem of upperdir is not supported\n"); -- goto out_put_workpath; -+ goto out_put_lowerpath; - } - - if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { - pr_err("overlayfs: filesystem of lowerdir is not supported\n"); -- goto out_put_workpath; -+ goto out_put_lowerpath; - } - - err = vfs_statfs(&lowerpath, &statfs); - if (err) { - pr_err("overlayfs: statfs failed on lowerpath\n"); -- goto out_put_workpath; -+ goto out_put_lowerpath; - } - ufs->lower_namelen = statfs.f_namelen; - -@@ -806,19 +800,27 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - err = -EINVAL; - if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { - pr_err("overlayfs: maximum fs stacking depth exceeded\n"); -- goto out_put_workpath; -+ goto out_put_lowerpath; - } - - ufs->upper_mnt = clone_private_mount(&upperpath); - err = PTR_ERR(ufs->upper_mnt); - if (IS_ERR(ufs->upper_mnt)) { - pr_err("overlayfs: failed to clone upperpath\n"); -- goto out_put_workpath; -+ goto out_put_lowerpath; -+ } -+ -+ ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); -+ err = PTR_ERR(ufs->workdir); -+ if (IS_ERR(ufs->workdir)) { -+ pr_err("overlayfs: failed to create directory %s/%s\n", -+ ufs->config.workdir, OVL_WORKDIR_NAME); -+ goto out_put_upper_mnt; - } - - ufs->lower_mnt = kcalloc(1, sizeof(struct vfsmount *), GFP_KERNEL); - if (ufs->lower_mnt == NULL) -- goto out_put_upper_mnt; -+ goto out_put_workdir; - - mnt = clone_private_mount(&lowerpath); - err = PTR_ERR(mnt); -@@ -835,14 +837,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - ufs->lower_mnt[0] = mnt; - ufs->numlower = 1; - -- ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); -- err = PTR_ERR(ufs->workdir); -- if (IS_ERR(ufs->workdir)) { -- pr_err("overlayfs: failed to create directory %s/%s\n", -- ufs->config.workdir, OVL_WORKDIR_NAME); -- goto out_put_lower_mnt; -- } -- - /* If the upper fs is r/o, we mark overlayfs r/o too */ - if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) - sb->s_flags |= MS_RDONLY; -@@ -850,13 +844,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - sb->s_d_op = &ovl_dentry_operations; - - err = -ENOMEM; -- root_inode = ovl_new_inode(sb, S_IFDIR, oe); -- if (!root_inode) -- goto out_put_workdir; -+ oe = ovl_alloc_entry(1); -+ if (!oe) -+ goto out_put_lower_mnt; - -- root_dentry = d_make_root(root_inode); -+ root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, oe)); - if (!root_dentry) -- goto out_put_workdir; -+ goto out_free_oe; - - mntput(upperpath.mnt); - mntput(lowerpath.mnt); -@@ -875,22 +869,22 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - - return 0; - --out_put_workdir: -- dput(ufs->workdir); -+out_free_oe: -+ kfree(oe); - out_put_lower_mnt: - for (i = 0; i < ufs->numlower; i++) - mntput(ufs->lower_mnt[i]); - kfree(ufs->lower_mnt); -+out_put_workdir: -+ dput(ufs->workdir); - out_put_upper_mnt: - mntput(ufs->upper_mnt); --out_put_workpath: -- path_put(&workpath); - out_put_lowerpath: - path_put(&lowerpath); -+out_put_workpath: -+ path_put(&workpath); - out_put_upperpath: - path_put(&upperpath); --out_free_oe: -- kfree(oe); - out_free_config: - kfree(ufs->config.lowerdir); - kfree(ufs->config.upperdir); --- -2.7.4 - - -From 4db5f1d8cd9d89c8810c8ed10a4e50d1f0debbd1 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:49 +0100 -Subject: [PATCH 52/73] ovl: improve mount helpers - -Move common checks into ovl_mount_dir() helper. - -Create helper for looking up lower directories. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit ab508822cab4c84f07373cd6ad107a1fd1362831) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 125 ++++++++++++++++++++++++++++++--------------------- - 1 file changed, 73 insertions(+), 52 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index a177028..592370f 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -669,24 +669,6 @@ static void ovl_unescape(char *s) - } - } - --static int ovl_mount_dir(const char *name, struct path *path) --{ -- int err; -- char *tmp = kstrdup(name, GFP_KERNEL); -- -- if (!tmp) -- return -ENOMEM; -- -- ovl_unescape(tmp); -- err = kern_path(tmp, LOOKUP_FOLLOW, path); -- if (err) { -- pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); -- err = -EINVAL; -- } -- kfree(tmp); -- return err; --} -- - static bool ovl_is_allowed_fs_type(struct dentry *root) - { - const struct dentry_operations *dop = root->d_op; -@@ -706,6 +688,71 @@ static bool ovl_is_allowed_fs_type(struct dentry *root) - return true; - } - -+static int ovl_mount_dir_noesc(const char *name, struct path *path) -+{ -+ int err; -+ -+ err = kern_path(name, LOOKUP_FOLLOW, path); -+ if (err) { -+ pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); -+ goto out; -+ } -+ err = -EINVAL; -+ if (!ovl_is_allowed_fs_type(path->dentry)) { -+ pr_err("overlayfs: filesystem on '%s' not supported\n", name); -+ goto out_put; -+ } -+ if (!S_ISDIR(path->dentry->d_inode->i_mode)) { -+ pr_err("overlayfs: '%s' not a directory\n", name); -+ goto out_put; -+ } -+ return 0; -+ -+out_put: -+ path_put(path); -+out: -+ return err; -+} -+ -+static int ovl_mount_dir(const char *name, struct path *path) -+{ -+ int err = -ENOMEM; -+ char *tmp = kstrdup(name, GFP_KERNEL); -+ -+ if (tmp) { -+ ovl_unescape(tmp); -+ err = ovl_mount_dir_noesc(tmp, path); -+ kfree(tmp); -+ } -+ return err; -+} -+ -+static int ovl_lower_dir(const char *name, struct path *path, long *namelen, -+ int *stack_depth) -+{ -+ int err; -+ struct kstatfs statfs; -+ -+ err = ovl_mount_dir(name, path); -+ if (err) -+ goto out; -+ -+ err = vfs_statfs(path, &statfs); -+ if (err) { -+ pr_err("overlayfs: statfs failed on '%s'\n", name); -+ goto out_put; -+ } -+ *namelen = max(*namelen, statfs.f_namelen); -+ *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); -+ -+ return 0; -+ -+out_put: -+ path_put(path); -+out: -+ return err; -+} -+ - /* Workdir should not be subdir of upperdir and vice versa */ - static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) - { -@@ -726,7 +773,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - struct dentry *root_dentry; - struct ovl_entry *oe; - struct ovl_fs *ufs; -- struct kstatfs statfs; - struct vfsmount *mnt; - unsigned int i; - int err; -@@ -756,48 +802,23 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - if (err) - goto out_put_upperpath; - -- err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath); -- if (err) -- goto out_put_workpath; -- -- err = -EINVAL; -- if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) || -- !S_ISDIR(lowerpath.dentry->d_inode->i_mode) || -- !S_ISDIR(workpath.dentry->d_inode->i_mode)) { -- pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n"); -- goto out_put_lowerpath; -- } -- - if (upperpath.mnt != workpath.mnt) { - pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); -- goto out_put_lowerpath; -+ goto out_put_workpath; - } - if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { - pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); -- goto out_put_lowerpath; -- } -- -- if (!ovl_is_allowed_fs_type(upperpath.dentry)) { -- pr_err("overlayfs: filesystem of upperdir is not supported\n"); -- goto out_put_lowerpath; -- } -- -- if (!ovl_is_allowed_fs_type(lowerpath.dentry)) { -- pr_err("overlayfs: filesystem of lowerdir is not supported\n"); -- goto out_put_lowerpath; -- } -- -- err = vfs_statfs(&lowerpath, &statfs); -- if (err) { -- pr_err("overlayfs: statfs failed on lowerpath\n"); -- goto out_put_lowerpath; -+ goto out_put_workpath; - } -- ufs->lower_namelen = statfs.f_namelen; -+ sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; - -- sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth, -- lowerpath.mnt->mnt_sb->s_stack_depth) + 1; -+ err = ovl_lower_dir(ufs->config.lowerdir, &lowerpath, -+ &ufs->lower_namelen, &sb->s_stack_depth); -+ if (err) -+ goto out_put_workpath; - - err = -EINVAL; -+ sb->s_stack_depth++; - if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { - pr_err("overlayfs: maximum fs stacking depth exceeded\n"); - goto out_put_lowerpath; --- -2.7.4 - - -From b153140225155c1de019eefe5aadde93365f7a66 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:51 +0100 -Subject: [PATCH 53/73] ovl: make upperdir optional - -Make "upperdir=" mount option optional. If "upperdir=" is not given, then -the "workdir=" option is also optional (and ignored if given). - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 53a08cb9b8bccfe58f1228c7c27baf34a83da78b) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 83 +++++++++++++++++++++++++++++----------------------- - 1 file changed, 47 insertions(+), 36 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 592370f..35bb0ad 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -516,8 +516,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) - struct ovl_fs *ufs = sb->s_fs_info; - - seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); -- seq_printf(m, ",upperdir=%s", ufs->config.upperdir); -- seq_printf(m, ",workdir=%s", ufs->config.workdir); -+ if (ufs->config.upperdir) { -+ seq_printf(m, ",upperdir=%s", ufs->config.upperdir); -+ seq_printf(m, ",workdir=%s", ufs->config.workdir); -+ } - return 0; - } - -@@ -768,8 +770,8 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) - static int ovl_fill_super(struct super_block *sb, void *data, int silent) - { - struct path lowerpath; -- struct path upperpath; -- struct path workpath; -+ struct path upperpath = { NULL, NULL }; -+ struct path workpath = { NULL, NULL }; - struct dentry *root_dentry; - struct ovl_entry *oe; - struct ovl_fs *ufs; -@@ -786,31 +788,38 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - if (err) - goto out_free_config; - -- /* FIXME: workdir is not needed for a R/O mount */ - err = -EINVAL; -- if (!ufs->config.upperdir || !ufs->config.lowerdir || -- !ufs->config.workdir) { -- pr_err("overlayfs: missing upperdir or lowerdir or workdir\n"); -+ if (!ufs->config.lowerdir) { -+ pr_err("overlayfs: missing 'lowerdir'\n"); - goto out_free_config; - } - -- err = ovl_mount_dir(ufs->config.upperdir, &upperpath); -- if (err) -- goto out_free_config; -+ sb->s_stack_depth = 0; -+ if (ufs->config.upperdir) { -+ /* FIXME: workdir is not needed for a R/O mount */ -+ if (!ufs->config.workdir) { -+ pr_err("overlayfs: missing 'workdir'\n"); -+ goto out_free_config; -+ } - -- err = ovl_mount_dir(ufs->config.workdir, &workpath); -- if (err) -- goto out_put_upperpath; -+ err = ovl_mount_dir(ufs->config.upperdir, &upperpath); -+ if (err) -+ goto out_free_config; - -- if (upperpath.mnt != workpath.mnt) { -- pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); -- goto out_put_workpath; -- } -- if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { -- pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); -- goto out_put_workpath; -+ err = ovl_mount_dir(ufs->config.workdir, &workpath); -+ if (err) -+ goto out_put_upperpath; -+ -+ if (upperpath.mnt != workpath.mnt) { -+ pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); -+ goto out_put_workpath; -+ } -+ if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { -+ pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); -+ goto out_put_workpath; -+ } -+ sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; - } -- sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; - - err = ovl_lower_dir(ufs->config.lowerdir, &lowerpath, - &ufs->lower_namelen, &sb->s_stack_depth); -@@ -824,19 +833,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - goto out_put_lowerpath; - } - -- ufs->upper_mnt = clone_private_mount(&upperpath); -- err = PTR_ERR(ufs->upper_mnt); -- if (IS_ERR(ufs->upper_mnt)) { -- pr_err("overlayfs: failed to clone upperpath\n"); -- goto out_put_lowerpath; -- } -+ if (ufs->config.upperdir) { -+ ufs->upper_mnt = clone_private_mount(&upperpath); -+ err = PTR_ERR(ufs->upper_mnt); -+ if (IS_ERR(ufs->upper_mnt)) { -+ pr_err("overlayfs: failed to clone upperpath\n"); -+ goto out_put_lowerpath; -+ } - -- ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); -- err = PTR_ERR(ufs->workdir); -- if (IS_ERR(ufs->workdir)) { -- pr_err("overlayfs: failed to create directory %s/%s\n", -- ufs->config.workdir, OVL_WORKDIR_NAME); -- goto out_put_upper_mnt; -+ ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); -+ err = PTR_ERR(ufs->workdir); -+ if (IS_ERR(ufs->workdir)) { -+ pr_err("overlayfs: failed to create directory %s/%s\n", -+ ufs->config.workdir, OVL_WORKDIR_NAME); -+ goto out_put_upper_mnt; -+ } - } - - ufs->lower_mnt = kcalloc(1, sizeof(struct vfsmount *), GFP_KERNEL); -@@ -858,8 +869,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - ufs->lower_mnt[0] = mnt; - ufs->numlower = 1; - -- /* If the upper fs is r/o, we mark overlayfs r/o too */ -- if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY) -+ /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ -+ if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) - sb->s_flags |= MS_RDONLY; - - sb->s_d_op = &ovl_dentry_operations; --- -2.7.4 - - -From cb19636311dcb8206f668af9cbb68e9bafaeea14 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:52 +0100 -Subject: [PATCH 54/73] ovl: support multiple lower layers - -Allow "lowerdir=" option to contain multiple lower directories separated by -a colon (e.g. "lowerdir=/bin:/usr/bin"). Colon characters in filenames can -be escaped with a backslash. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit a78d9f0d5d5ca9054703376c7c23c901807ddd87) -Signed-off-by: Alex Shi ---- - Documentation/filesystems/overlayfs.txt | 12 ++++ - fs/overlayfs/super.c | 110 ++++++++++++++++++++++++-------- - 2 files changed, 95 insertions(+), 27 deletions(-) - -diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt -index a27c950..b370928 100644 ---- a/Documentation/filesystems/overlayfs.txt -+++ b/Documentation/filesystems/overlayfs.txt -@@ -159,6 +159,18 @@ overlay filesystem (though an operation on the name of the file such as - rename or unlink will of course be noticed and handled). - - -+Multiple lower layers -+--------------------- -+ -+Multiple lower layers can now be given using the the colon (":") as a -+separator character between the directory names. For example: -+ -+ mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged -+ -+As the example shows, "upperdir=" and "workdir=" may be omitted. In that case -+the overlay will be read-only. -+ -+ - Non-standard behavior - --------------------- - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 35bb0ad..5c495a1 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -60,6 +60,8 @@ struct ovl_entry { - struct path lowerstack[]; - }; - -+#define OVL_MAX_STACK 500 -+ - const char *ovl_opaque_xattr = "trusted.overlay.opaque"; - - static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) -@@ -692,8 +694,12 @@ static bool ovl_is_allowed_fs_type(struct dentry *root) - - static int ovl_mount_dir_noesc(const char *name, struct path *path) - { -- int err; -+ int err = -EINVAL; - -+ if (!*name) { -+ pr_err("overlayfs: empty lowerdir\n"); -+ goto out; -+ } - err = kern_path(name, LOOKUP_FOLLOW, path); - if (err) { - pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); -@@ -735,7 +741,7 @@ static int ovl_lower_dir(const char *name, struct path *path, long *namelen, - int err; - struct kstatfs statfs; - -- err = ovl_mount_dir(name, path); -+ err = ovl_mount_dir_noesc(name, path); - if (err) - goto out; - -@@ -767,15 +773,38 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) - return ok; - } - -+static unsigned int ovl_split_lowerdirs(char *str) -+{ -+ unsigned int ctr = 1; -+ char *s, *d; -+ -+ for (s = d = str;; s++, d++) { -+ if (*s == '\\') { -+ s++; -+ } else if (*s == ':') { -+ *d = '\0'; -+ ctr++; -+ continue; -+ } -+ *d = *s; -+ if (!*s) -+ break; -+ } -+ return ctr; -+} -+ - static int ovl_fill_super(struct super_block *sb, void *data, int silent) - { -- struct path lowerpath; - struct path upperpath = { NULL, NULL }; - struct path workpath = { NULL, NULL }; - struct dentry *root_dentry; - struct ovl_entry *oe; - struct ovl_fs *ufs; -- struct vfsmount *mnt; -+ struct path *stack = NULL; -+ char *lowertmp; -+ char *lower; -+ unsigned int numlower; -+ unsigned int stacklen = 0; - unsigned int i; - int err; - -@@ -820,13 +849,31 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - } - sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; - } -- -- err = ovl_lower_dir(ufs->config.lowerdir, &lowerpath, -- &ufs->lower_namelen, &sb->s_stack_depth); -- if (err) -+ err = -ENOMEM; -+ lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); -+ if (!lowertmp) - goto out_put_workpath; - - err = -EINVAL; -+ stacklen = ovl_split_lowerdirs(lowertmp); -+ if (stacklen > OVL_MAX_STACK) -+ goto out_free_lowertmp; -+ -+ stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); -+ if (!stack) -+ goto out_free_lowertmp; -+ -+ lower = lowertmp; -+ for (numlower = 0; numlower < stacklen; numlower++) { -+ err = ovl_lower_dir(lower, &stack[numlower], -+ &ufs->lower_namelen, &sb->s_stack_depth); -+ if (err) -+ goto out_put_lowerpath; -+ -+ lower = strchr(lower, '\0') + 1; -+ } -+ -+ err = -EINVAL; - sb->s_stack_depth++; - if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { - pr_err("overlayfs: maximum fs stacking depth exceeded\n"); -@@ -850,24 +897,25 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - } - } - -- ufs->lower_mnt = kcalloc(1, sizeof(struct vfsmount *), GFP_KERNEL); -+ ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); - if (ufs->lower_mnt == NULL) - goto out_put_workdir; -+ for (i = 0; i < numlower; i++) { -+ struct vfsmount *mnt = clone_private_mount(&stack[i]); - -- mnt = clone_private_mount(&lowerpath); -- err = PTR_ERR(mnt); -- if (IS_ERR(mnt)) { -- pr_err("overlayfs: failed to clone lowerpath\n"); -- goto out_put_lower_mnt; -- } -- /* -- * Make lower_mnt R/O. That way fchmod/fchown on lower file -- * will fail instead of modifying lower fs. -- */ -- mnt->mnt_flags |= MNT_READONLY; -+ if (IS_ERR(mnt)) { -+ pr_err("overlayfs: failed to clone lowerpath\n"); -+ goto out_put_lower_mnt; -+ } -+ /* -+ * Make lower_mnt R/O. That way fchmod/fchown on lower file -+ * will fail instead of modifying lower fs. -+ */ -+ mnt->mnt_flags |= MNT_READONLY; - -- ufs->lower_mnt[0] = mnt; -- ufs->numlower = 1; -+ ufs->lower_mnt[ufs->numlower] = mnt; -+ ufs->numlower++; -+ } - - /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ - if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) -@@ -876,7 +924,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - sb->s_d_op = &ovl_dentry_operations; - - err = -ENOMEM; -- oe = ovl_alloc_entry(1); -+ oe = ovl_alloc_entry(numlower); - if (!oe) - goto out_put_lower_mnt; - -@@ -885,12 +933,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - goto out_free_oe; - - mntput(upperpath.mnt); -- mntput(lowerpath.mnt); -+ for (i = 0; i < numlower; i++) -+ mntput(stack[i].mnt); - path_put(&workpath); -+ kfree(lowertmp); - - oe->__upperdentry = upperpath.dentry; -- oe->lowerstack[0].dentry = lowerpath.dentry; -- oe->lowerstack[0].mnt = ufs->lower_mnt[0]; -+ for (i = 0; i < numlower; i++) { -+ oe->lowerstack[i].dentry = stack[i].dentry; -+ oe->lowerstack[i].mnt = ufs->lower_mnt[i]; -+ } - - root_dentry->d_fsdata = oe; - -@@ -912,7 +964,11 @@ out_put_workdir: - out_put_upper_mnt: - mntput(ufs->upper_mnt); - out_put_lowerpath: -- path_put(&lowerpath); -+ for (i = 0; i < numlower; i++) -+ path_put(&stack[i]); -+ kfree(stack); -+out_free_lowertmp: -+ kfree(lowertmp); - out_put_workpath: - path_put(&workpath); - out_put_upperpath: --- -2.7.4 - - -From 96149453dee0b67c9ecb1032c0ed0e4aa31fb914 Mon Sep 17 00:00:00 2001 -From: hujianyang -Date: Wed, 26 Nov 2014 16:16:59 +0800 -Subject: [PATCH 55/73] ovl: Cleanup redundant blank lines - -This patch removes redundant blanks lines in overlayfs. - -Signed-off-by: hujianyang -Signed-off-by: Miklos Szeredi -(cherry picked from commit 1ba38725a351f91769918b132c17fb7fcaf6c2f5) -Signed-off-by: Alex Shi ---- - fs/overlayfs/copy_up.c | 1 - - fs/overlayfs/inode.c | 1 - - fs/overlayfs/super.c | 1 - - 3 files changed, 3 deletions(-) - -diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c -index a5bfd60..24f6404 100644 ---- a/fs/overlayfs/copy_up.c -+++ b/fs/overlayfs/copy_up.c -@@ -191,7 +191,6 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) - ovl_set_timestamps(upperdentry, stat); - - return err; -- - } - - static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, -diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c -index 48492f1..5ac1236 100644 ---- a/fs/overlayfs/inode.c -+++ b/fs/overlayfs/inode.c -@@ -433,5 +433,4 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, - } - - return inode; -- - } -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 5c495a1..e9ce4a9 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -106,7 +106,6 @@ void ovl_path_upper(struct dentry *dentry, struct path *path) - - enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) - { -- - enum ovl_path_type type = ovl_path_type(dentry); - - if (!OVL_TYPE_UPPER(type)) --- -2.7.4 - - -From 4fb1e9e207167f999c57954716a51942d0d61674 Mon Sep 17 00:00:00 2001 -From: hujianyang -Date: Mon, 24 Nov 2014 18:25:21 +0800 -Subject: [PATCH 56/73] ovl: Use macros to present ovl_xattr - -This patch adds two macros: - -OVL_XATTR_PRE_NAME and OVL_XATTR_PRE_LEN - -to present ovl_xattr name prefix and its length. Also, a -new macro OVL_XATTR_OPAQUE is introduced to replace old -*ovl_opaque_xattr*. - -Fix the length of "trusted.overlay." to *16*. - -Signed-off-by: hujianyang -Signed-off-by: Miklos Szeredi -(cherry picked from commit cead89bb08c0f64e23886f1c18df9bb98e97c55c) -Signed-off-by: Alex Shi ---- - fs/overlayfs/dir.c | 4 ++-- - fs/overlayfs/inode.c | 2 +- - fs/overlayfs/overlayfs.h | 4 +++- - fs/overlayfs/super.c | 4 +--- - 4 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c -index dcae3ac..0dc4c33 100644 ---- a/fs/overlayfs/dir.c -+++ b/fs/overlayfs/dir.c -@@ -118,14 +118,14 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, - - static int ovl_set_opaque(struct dentry *upperdentry) - { -- return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0); -+ return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); - } - - static void ovl_remove_opaque(struct dentry *upperdentry) - { - int err; - -- err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr); -+ err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE); - if (err) { - pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", - upperdentry->d_name.name, err); -diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c -index 5ac1236..04f1248 100644 ---- a/fs/overlayfs/inode.c -+++ b/fs/overlayfs/inode.c -@@ -205,7 +205,7 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) - - static bool ovl_is_private_xattr(const char *name) - { -- return strncmp(name, "trusted.overlay.", 14) == 0; -+ return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0; - } - - int ovl_setxattr(struct dentry *dentry, const char *name, -diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h -index d176b67..17ac5af 100644 ---- a/fs/overlayfs/overlayfs.h -+++ b/fs/overlayfs/overlayfs.h -@@ -23,7 +23,9 @@ enum ovl_path_type { - #define OVL_TYPE_MERGE_OR_LOWER(type) \ - (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) - --extern const char *ovl_opaque_xattr; -+#define OVL_XATTR_PRE_NAME "trusted.overlay." -+#define OVL_XATTR_PRE_LEN 16 -+#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque" - - static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) - { -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index e9ce4a9..84f3144 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -62,8 +62,6 @@ struct ovl_entry { - - #define OVL_MAX_STACK 500 - --const char *ovl_opaque_xattr = "trusted.overlay.opaque"; -- - static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) - { - return oe->numlower ? oe->lowerstack[0].dentry : NULL; -@@ -254,7 +252,7 @@ static bool ovl_is_opaquedir(struct dentry *dentry) - if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) - return false; - -- res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1); -+ res = inode->i_op->getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); - if (res == 1 && val == 'y') - return true; - --- -2.7.4 - - -From 248653d3bb85e5b87899b47e56dc0cf86ee24f89 Mon Sep 17 00:00:00 2001 -From: hujianyang -Date: Tue, 6 Jan 2015 12:52:13 +0800 -Subject: [PATCH 57/73] ovl: Fix kernel panic while mounting overlayfs - -The function ovl_fill_super() in recently multi-layer support -version will incorrectly return 0 at error handling path and -then cause kernel panic. - -This failure can be reproduced by mounting a overlayfs with -upperdir and workdir in different mounts. - -And also, If the memory allocation of *lower_mnt* fail, this -function may return an zero either. - -This patch fix this problem by setting *err* to proper error -number before jumping to error handling path. - -Signed-off-by: hujianyang -Signed-off-by: Miklos Szeredi -(cherry picked from commit 2f83fd8c2849a388082f30d755a75c1e67c4643b) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 84f3144..6ca8ea8 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -836,6 +836,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - if (err) - goto out_put_upperpath; - -+ err = -EINVAL; - if (upperpath.mnt != workpath.mnt) { - pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); - goto out_put_workpath; -@@ -894,12 +895,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - } - } - -+ err = -ENOMEM; - ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); - if (ufs->lower_mnt == NULL) - goto out_put_workdir; - for (i = 0; i < numlower; i++) { - struct vfsmount *mnt = clone_private_mount(&stack[i]); - -+ err = PTR_ERR(mnt); - if (IS_ERR(mnt)) { - pr_err("overlayfs: failed to clone lowerpath\n"); - goto out_put_lower_mnt; --- -2.7.4 - - -From ef2578e619a9107cb007644f38701d6774a46c5f Mon Sep 17 00:00:00 2001 -From: hujianyang -Date: Tue, 6 Jan 2015 16:10:01 +0800 -Subject: [PATCH 58/73] ovl: Fix opaque regression in ovl_lookup - -Current multi-layer support overlayfs has a regression in -.lookup(). If there is a directory in upperdir and a regular -file has same name in lowerdir in a merged directory, lower -file is hidden and upper directory is set to opaque in former -case. But it is changed in present code. - -In lowerdir lookup path, if a found inode is not directory, -the type checking of previous inode is missing. This inode -will be copied to the lowerstack of ovl_entry directly. - -That will lead to several wrong conditions, for example, -the reading of the directory in upperdir may return an error -like: - - ls: reading directory .: Not a directory - -This patch makes the lowerdir lookup path check the opaque -for non-directory file too. - -Signed-off-by: hujianyang -Signed-off-by: Miklos Szeredi -(cherry picked from commit a425c037f3dd8a56469158ab5f37beb46402d958) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 23 +++++++++++++---------- - 1 file changed, 13 insertions(+), 10 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 6ca8ea8..9e94f4a 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -372,7 +372,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - bool opaque = false; - struct path lowerpath = poe->lowerstack[i]; - -- opaque = false; - this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); - err = PTR_ERR(this); - if (IS_ERR(this)) { -@@ -395,20 +394,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, - */ - if (i < poe->numlower - 1 && ovl_is_opaquedir(this)) - opaque = true; -- /* -- * If this is a non-directory then stop here. -- * -- * FIXME: check for opaqueness maybe better done in remove code. -- */ -- if (!S_ISDIR(this->d_inode->i_mode)) { -- opaque = true; -- } else if (prev && (!S_ISDIR(prev->d_inode->i_mode) || -- !S_ISDIR(this->d_inode->i_mode))) { -+ -+ if (prev && (!S_ISDIR(prev->d_inode->i_mode) || -+ !S_ISDIR(this->d_inode->i_mode))) { -+ /* -+ * FIXME: check for upper-opaqueness maybe better done -+ * in remove code. -+ */ - if (prev == upperdentry) - upperopaque = true; - dput(this); - break; - } -+ /* -+ * If this is a non-directory then stop here. -+ */ -+ if (!S_ISDIR(this->d_inode->i_mode)) -+ opaque = true; -+ - stack[ctr].dentry = this; - stack[ctr].mnt = lowerpath.mnt; - ctr++; --- -2.7.4 - - -From 1676d3ec23db0db95de768672df3355c92589a0e Mon Sep 17 00:00:00 2001 -From: Seunghun Lee -Date: Sat, 3 Jan 2015 02:26:49 +0900 -Subject: [PATCH 59/73] ovl: Prevent rw remount when it should be ro mount - -Overlayfs should be mounted read-only when upper-fs is read-only or nonexistent. -But now it can be remounted read-write and this can cause kernel panic. -So we should prevent read-write remount when the above situation happens. - -Signed-off-by: Seunghun Lee -Signed-off-by: Miklos Szeredi -(cherry picked from commit 3cdf6fe91041b3afd6761f76254f7b6cbe8020fc) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 9e94f4a..b90952f 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -525,10 +525,22 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) - return 0; - } - -+static int ovl_remount(struct super_block *sb, int *flags, char *data) -+{ -+ struct ovl_fs *ufs = sb->s_fs_info; -+ -+ if (!(*flags & MS_RDONLY) && -+ (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))) -+ return -EROFS; -+ -+ return 0; -+} -+ - static const struct super_operations ovl_super_operations = { - .put_super = ovl_put_super, - .statfs = ovl_statfs, - .show_options = ovl_show_options, -+ .remount_fs = ovl_remount, - }; - - enum { --- -2.7.4 - - -From add5fbd4b119cd897c130cc2282c4040dc004b88 Mon Sep 17 00:00:00 2001 -From: hujianyang -Date: Thu, 11 Dec 2014 10:30:18 +0800 -Subject: [PATCH 60/73] ovl: discard independent cursor in readdir() - -Since the ovl_dir_cache is stable during a directory reading, the cursor -of struct ovl_dir_file don't need to be an independent entry in the list -of a merged directory. - -This patch changes *cursor* to a pointer which points to the entry in the -ovl_dir_cache. After this, we don't need to check *is_cursor* either. - -Signed-off-by: hujianyang -Signed-off-by: Miklos Szeredi -(cherry picked from commit 4330397e4e8a662f36d101659e2a59ce32e76ff4) -Signed-off-by: Alex Shi ---- - fs/overlayfs/readdir.c | 38 ++++++++++++++------------------------ - 1 file changed, 14 insertions(+), 24 deletions(-) - -diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c -index 2b8315b..907870e 100644 ---- a/fs/overlayfs/readdir.c -+++ b/fs/overlayfs/readdir.c -@@ -24,7 +24,6 @@ struct ovl_cache_entry { - struct list_head l_node; - struct rb_node node; - bool is_whiteout; -- bool is_cursor; - char name[]; - }; - -@@ -49,7 +48,7 @@ struct ovl_dir_file { - bool is_real; - bool is_upper; - struct ovl_dir_cache *cache; -- struct ovl_cache_entry cursor; -+ struct list_head *cursor; - struct file *realfile; - struct file *upperfile; - }; -@@ -97,7 +96,6 @@ static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, - p->type = d_type; - p->ino = ino; - p->is_whiteout = false; -- p->is_cursor = false; - - if (d_type == DT_CHR) { - struct dentry *dentry; -@@ -196,7 +194,6 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) - { - struct ovl_dir_cache *cache = od->cache; - -- list_del_init(&od->cursor.l_node); - WARN_ON(cache->refcount <= 0); - cache->refcount--; - if (!cache->refcount) { -@@ -256,6 +253,7 @@ static void ovl_dir_reset(struct file *file) - if (cache && ovl_dentry_version_get(dentry) != cache->version) { - ovl_cache_put(od, dentry); - od->cache = NULL; -+ od->cursor = NULL; - } - WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); - if (od->is_real && OVL_TYPE_MERGE(type)) -@@ -297,17 +295,16 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) - - static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) - { -- struct ovl_cache_entry *p; -+ struct list_head *p; - loff_t off = 0; - -- list_for_each_entry(p, &od->cache->entries, l_node) { -- if (p->is_cursor) -- continue; -+ list_for_each(p, &od->cache->entries) { - if (off >= pos) - break; - off++; - } -- list_move_tail(&od->cursor.l_node, &p->l_node); -+ /* Cursor is safe since the cache is stable */ -+ od->cursor = p; - } - - static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) -@@ -346,6 +343,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) - { - struct ovl_dir_file *od = file->private_data; - struct dentry *dentry = file->f_path.dentry; -+ struct ovl_cache_entry *p; - - if (!ctx->pos) - ovl_dir_reset(file); -@@ -364,19 +362,13 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) - ovl_seek_cursor(od, ctx->pos); - } - -- while (od->cursor.l_node.next != &od->cache->entries) { -- struct ovl_cache_entry *p; -- -- p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node); -- /* Skip cursors */ -- if (!p->is_cursor) { -- if (!p->is_whiteout) { -- if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) -- break; -- } -- ctx->pos++; -- } -- list_move(&od->cursor.l_node, &p->l_node); -+ while (od->cursor != &od->cache->entries) { -+ p = list_entry(od->cursor, struct ovl_cache_entry, l_node); -+ if (!p->is_whiteout) -+ if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) -+ break; -+ od->cursor = p->l_node.next; -+ ctx->pos++; - } - return 0; - } -@@ -495,11 +487,9 @@ static int ovl_dir_open(struct inode *inode, struct file *file) - kfree(od); - return PTR_ERR(realfile); - } -- INIT_LIST_HEAD(&od->cursor.l_node); - od->realfile = realfile; - od->is_real = !OVL_TYPE_MERGE(type); - od->is_upper = OVL_TYPE_UPPER(type); -- od->cursor.is_cursor = true; - file->private_data = od; - - return 0; --- -2.7.4 - - -From 088805e56746eafd55a13e2dc7fb37d046cfdfdb Mon Sep 17 00:00:00 2001 -From: hujianyang -Date: Thu, 15 Jan 2015 13:17:36 +0800 -Subject: [PATCH 61/73] ovl: print error message for invalid mount options - -Overlayfs should print an error message if an incorrect mount option -is caught like other filesystems. - -After this patch, improper option input could be clearly known. - -Reported-by: Fabian Sturm -Signed-off-by: hujianyang -Signed-off-by: Miklos Szeredi -(cherry picked from commit bead55ef775f6e25a8d286c0d47030580f577bec) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index b90952f..ab3c8cb 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -615,6 +615,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) - break; - - default: -+ pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); - return -EINVAL; - } - } --- -2.7.4 - - -From 0d8e803097108c61b26175178e62465a7d36c889 Mon Sep 17 00:00:00 2001 -From: hujianyang -Date: Thu, 15 Jan 2015 13:19:21 +0800 -Subject: [PATCH 62/73] ovl: check lowerdir amount for non-upper mount - -Recently multi-lower layer mount support allow upperdir and workdir -to be omitted, then cause overlayfs can be mount with only one -lowerdir directory. This action make no sense and have potential risk. - -This patch check the total number of lower directories to prevent -mounting overlayfs with only one directory. - -Also, an error message is added to indicate lower directories exceed -OVL_MAX_STACK limit. - -Signed-off-by: hujianyang -Signed-off-by: Miklos Szeredi -(cherry picked from commit 6be4506e34cf6075a1307b646e0a6c46c1c9010d) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index ab3c8cb..edbb3eb 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -870,8 +870,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - - err = -EINVAL; - stacklen = ovl_split_lowerdirs(lowertmp); -- if (stacklen > OVL_MAX_STACK) -+ if (stacklen > OVL_MAX_STACK) { -+ pr_err("overlayfs: too many lower directries, limit is %d\n", -+ OVL_MAX_STACK); - goto out_free_lowertmp; -+ } else if (!ufs->config.upperdir && stacklen == 1) { -+ pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); -+ goto out_free_lowertmp; -+ } - - stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); - if (!stack) --- -2.7.4 - - -From d7170c004481f9fbe303a391774e0fcb238f0e78 Mon Sep 17 00:00:00 2001 -From: hujianyang -Date: Thu, 15 Jan 2015 13:20:57 +0800 -Subject: [PATCH 63/73] ovl: upper fs should not be R/O - -After importing multi-lower layer support, users could mount a r/o -partition as the left most lowerdir instead of using it as upperdir. -And a r/o upperdir may cause an error like - - overlayfs: failed to create directory ./workdir/work - -during mount. - -This patch check the *s_flags* of upper fs and return an error if -it is a r/o partition. The checking of *upper_mnt->mnt_sb->s_flags* -can be removed now. - -This patch also remove - - /* FIXME: workdir is not needed for a R/O mount */ - -from ovl_fill_super() because: - -1) for upper fs r/o case -Setting a r/o partition as upper is prevented, no need to care about -workdir in this case. - -2) for "mount overlay -o ro" with a r/w upper fs case -Users could remount overlayfs to r/w in this case, so workdir should -not be omitted. - -Signed-off-by: hujianyang -Signed-off-by: Miklos Szeredi -(cherry picked from commit 71cbad7e694ee81233b3be3a38b81c3d5872cc6f) -Signed-off-by: Alex Shi ---- - fs/overlayfs/super.c | 24 +++++++++++++++++++----- - 1 file changed, 19 insertions(+), 5 deletions(-) - -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index edbb3eb..5f0d199 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) - { - struct ovl_fs *ufs = sb->s_fs_info; - -- if (!(*flags & MS_RDONLY) && -- (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))) -+ if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) - return -EROFS; - - return 0; -@@ -619,6 +618,15 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) - return -EINVAL; - } - } -+ -+ /* Workdir is useless in non-upper mount */ -+ if (!config->upperdir && config->workdir) { -+ pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", -+ config->workdir); -+ kfree(config->workdir); -+ config->workdir = NULL; -+ } -+ - return 0; - } - -@@ -838,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - - sb->s_stack_depth = 0; - if (ufs->config.upperdir) { -- /* FIXME: workdir is not needed for a R/O mount */ - if (!ufs->config.workdir) { - pr_err("overlayfs: missing 'workdir'\n"); - goto out_free_config; -@@ -848,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - if (err) - goto out_free_config; - -+ /* Upper fs should not be r/o */ -+ if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { -+ pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); -+ err = -EINVAL; -+ goto out_put_upperpath; -+ } -+ - err = ovl_mount_dir(ufs->config.workdir, &workpath); - if (err) - goto out_put_upperpath; -@@ -939,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) - ufs->numlower++; - } - -- /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ -- if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) -+ /* If the upper fs is nonexistent, we mark overlayfs r/o too */ -+ if (!ufs->upper_mnt) - sb->s_flags |= MS_RDONLY; - - sb->s_d_op = &ovl_dentry_operations; --- -2.7.4 - - -From 59b077027a431f849a07bd330f0de14c31376743 Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Sat, 13 Dec 2014 00:59:53 +0100 -Subject: [PATCH 64/73] ovl: add testsuite to docs - -Reported-by: Sedat Dilek -Signed-off-by: Miklos Szeredi -(cherry picked from commit 2b7a8f36f092a7855f6438cd42d6990394f450fa) -Signed-off-by: Alex Shi ---- - Documentation/filesystems/overlayfs.txt | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt -index b370928..006ea48 100644 ---- a/Documentation/filesystems/overlayfs.txt -+++ b/Documentation/filesystems/overlayfs.txt -@@ -208,3 +208,15 @@ Changes to the underlying filesystems while part of a mounted overlay - filesystem are not allowed. If the underlying filesystem is changed, - the behavior of the overlay is undefined, though it will not result in - a crash or deadlock. -+ -+Testsuite -+--------- -+ -+There's testsuite developed by David Howells at: -+ -+ git://git.infradead.org/users/dhowells/unionmount-testsuite.git -+ -+Run as root: -+ -+ # cd unionmount-testsuite -+ # ./run --ov --- -2.7.4 - - -From 6fb17a412207343151aac1fde91935a542339deb Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Thu, 8 Jan 2015 15:09:15 +0100 -Subject: [PATCH 65/73] ovl: document lower layer ordering - -Reported-by: Fabian Sturm -Signed-off-by: Miklos Szeredi -(cherry picked from commit 6d900f5a33393067e370736d39798f814f5e25cc) -Signed-off-by: Alex Shi ---- - Documentation/filesystems/overlayfs.txt | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt -index 006ea48..6db0e5d 100644 ---- a/Documentation/filesystems/overlayfs.txt -+++ b/Documentation/filesystems/overlayfs.txt -@@ -167,8 +167,12 @@ separator character between the directory names. For example: - - mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged - --As the example shows, "upperdir=" and "workdir=" may be omitted. In that case --the overlay will be read-only. -+As the example shows, "upperdir=" and "workdir=" may be omitted. In -+that case the overlay will be read-only. -+ -+The specified lower directories will be stacked beginning from the -+rightmost one and going left. In the above example lower1 will be the -+top, lower2 the middle and lower3 the bottom layer. - - - Non-standard behavior --- -2.7.4 - - -From 77dfdd746390b9230ac5420f13dd363bcdac6e98 Mon Sep 17 00:00:00 2001 -From: Pavel Emelyanov -Date: Thu, 10 Oct 2013 17:10:30 +0400 -Subject: [PATCH 66/73] fuse: Connection bit for enabling writeback - -Off (0) by default. Will be used in the next patches and will be turned -on at the very end. - -Signed-off-by: Maxim Patlasov -Signed-off-by: Pavel Emelyanov -Signed-off-by: Miklos Szeredi -(cherry picked from commit d5cd66c58edf10a7ee786659994595fd43995aab) -Signed-off-by: Alex Shi ---- - fs/fuse/fuse_i.h | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h -index 2da5db2..374a8be 100644 ---- a/fs/fuse/fuse_i.h -+++ b/fs/fuse/fuse_i.h -@@ -480,6 +480,9 @@ struct fuse_conn { - /** Set if bdi is valid */ - unsigned bdi_initialized:1; - -+ /** write-back cache policy (default is write-through) */ -+ unsigned writeback_cache:1; -+ - /* - * The following bitfields are only for optimization purposes - * and hence races in setting them will not cause malfunction --- -2.7.4 - - -From 4a40d2a3f50fd5721e3c8896e077d3f62f5b73be Mon Sep 17 00:00:00 2001 -From: Pavel Emelyanov -Date: Thu, 10 Oct 2013 17:10:04 +0400 -Subject: [PATCH 67/73] fuse: Linking file to inode helper - -When writeback is ON every writeable file should be in per-inode write list, -not only mmap-ed ones. Thus introduce a helper for this linkage. - -Signed-off-by: Maxim Patlasov -Signed-off-by: Pavel Emelyanov -Signed-off-by: Miklos Szeredi -(cherry picked from commit 650b22b941fa03590c4a3671e79ec2c96ea59e9a) -Signed-off-by: Alex Shi ---- - fs/fuse/file.c | 33 +++++++++++++++++++-------------- - 1 file changed, 19 insertions(+), 14 deletions(-) - -diff --git a/fs/fuse/file.c b/fs/fuse/file.c -index d8a6027..f018386 100644 ---- a/fs/fuse/file.c -+++ b/fs/fuse/file.c -@@ -188,6 +188,22 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, - } - EXPORT_SYMBOL_GPL(fuse_do_open); - -+static void fuse_link_write_file(struct file *file) -+{ -+ struct inode *inode = file_inode(file); -+ struct fuse_conn *fc = get_fuse_conn(inode); -+ struct fuse_inode *fi = get_fuse_inode(inode); -+ struct fuse_file *ff = file->private_data; -+ /* -+ * file may be written through mmap, so chain it onto the -+ * inodes's write_file list -+ */ -+ spin_lock(&fc->lock); -+ if (list_empty(&ff->write_entry)) -+ list_add(&ff->write_entry, &fi->write_files); -+ spin_unlock(&fc->lock); -+} -+ - void fuse_finish_open(struct inode *inode, struct file *file) - { - struct fuse_file *ff = file->private_data; -@@ -1942,20 +1958,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = { - - static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) - { -- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { -- struct inode *inode = file_inode(file); -- struct fuse_conn *fc = get_fuse_conn(inode); -- struct fuse_inode *fi = get_fuse_inode(inode); -- struct fuse_file *ff = file->private_data; -- /* -- * file may be written through mmap, so chain it onto the -- * inodes's write_file list -- */ -- spin_lock(&fc->lock); -- if (list_empty(&ff->write_entry)) -- list_add(&ff->write_entry, &fi->write_files); -- spin_unlock(&fc->lock); -- } -+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) -+ fuse_link_write_file(file); -+ - file_accessed(file); - vma->vm_ops = &fuse_file_vm_ops; - return 0; --- -2.7.4 - - -From fb2667e92663bb09a3d7e79de4103b7ce296b589 Mon Sep 17 00:00:00 2001 -From: Pavel Emelyanov -Date: Thu, 10 Oct 2013 17:10:16 +0400 -Subject: [PATCH 68/73] fuse: Prepare to handle short reads - -A helper which gets called when read reports less bytes than was requested. -See patch "trust kernel i_size only" for details. - -Signed-off-by: Maxim Patlasov -Signed-off-by: Pavel Emelyanov -Signed-off-by: Miklos Szeredi -(cherry picked from commit a92adc824ed5feaa2d4f7029f21170f574987aee) -Signed-off-by: Alex Shi ---- - fs/fuse/file.c | 21 +++++++++++++-------- - 1 file changed, 13 insertions(+), 8 deletions(-) - -diff --git a/fs/fuse/file.c b/fs/fuse/file.c -index f018386..4646157 100644 ---- a/fs/fuse/file.c -+++ b/fs/fuse/file.c -@@ -671,6 +671,15 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, - spin_unlock(&fc->lock); - } - -+static void fuse_short_read(struct fuse_req *req, struct inode *inode, -+ u64 attr_ver) -+{ -+ size_t num_read = req->out.args[0].size; -+ -+ loff_t pos = page_offset(req->pages[0]) + num_read; -+ fuse_read_update_size(inode, pos, attr_ver); -+} -+ - static int fuse_readpage(struct file *file, struct page *page) - { - struct fuse_io_priv io = { .async = 0, .file = file }; -@@ -708,18 +717,18 @@ static int fuse_readpage(struct file *file, struct page *page) - req->page_descs[0].length = count; - num_read = fuse_send_read(req, &io, pos, count, NULL); - err = req->out.h.error; -- fuse_put_request(fc, req); - - if (!err) { - /* - * Short read means EOF. If file size is larger, truncate it - */ - if (num_read < count) -- fuse_read_update_size(inode, pos + num_read, attr_ver); -+ fuse_short_read(req, inode, attr_ver); - - SetPageUptodate(page); - } - -+ fuse_put_request(fc, req); - fuse_invalidate_atime(inode); - out: - unlock_page(page); -@@ -742,13 +751,9 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) - /* - * Short read means EOF. If file size is larger, truncate it - */ -- if (!req->out.h.error && num_read < count) { -- loff_t pos; -+ if (!req->out.h.error && num_read < count) -+ fuse_short_read(req, inode, req->misc.read.attr_ver); - -- pos = page_offset(req->pages[0]) + num_read; -- fuse_read_update_size(inode, pos, -- req->misc.read.attr_ver); -- } - fuse_invalidate_atime(inode); - } - --- -2.7.4 - - -From 628bfe8bf63df0e83632598115cb91c7c52a115a Mon Sep 17 00:00:00 2001 -From: Pavel Emelyanov -Date: Thu, 10 Oct 2013 17:10:46 +0400 -Subject: [PATCH 69/73] fuse: Trust kernel i_size only - -Make fuse think that when writeback is on the inode's i_size is always -up-to-date and not update it with the value received from the userspace. -This is done because the page cache code may update i_size without letting -the FS know. - -This assumption implies fixing the previously introduced short-read helper -- -when a short read occurs the 'hole' is filled with zeroes. - -fuse_file_fallocate() is also fixed because now we should keep i_size up to -date, so it must be updated if FUSE_FALLOCATE request succeeded. - -Signed-off-by: Maxim V. Patlasov -Signed-off-by: Miklos Szeredi -(cherry picked from commit 8373200b124d03de7fa2e99be56de8642e604e9e) -Signed-off-by: Alex Shi ---- - fs/fuse/dir.c | 13 +++++++++++-- - fs/fuse/file.c | 21 +++++++++++++++++++-- - fs/fuse/inode.c | 11 +++++++++-- - 3 files changed, 39 insertions(+), 6 deletions(-) - -diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c -index d53a7d9..6fc8339 100644 ---- a/fs/fuse/dir.c -+++ b/fs/fuse/dir.c -@@ -870,6 +870,11 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, - struct kstat *stat) - { - unsigned int blkbits; -+ struct fuse_conn *fc = get_fuse_conn(inode); -+ -+ /* see the comment in fuse_change_attributes() */ -+ if (fc->writeback_cache && S_ISREG(inode->i_mode)) -+ attr->size = i_size_read(inode); - - stat->dev = inode->i_sb->s_dev; - stat->ino = attr->ino; -@@ -1611,6 +1616,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, - struct fuse_setattr_in inarg; - struct fuse_attr_out outarg; - bool is_truncate = false; -+ bool is_wb = fc->writeback_cache; - loff_t oldsize; - int err; - -@@ -1682,7 +1688,9 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, - fuse_change_attributes_common(inode, &outarg.attr, - attr_timeout(&outarg)); - oldsize = inode->i_size; -- i_size_write(inode, outarg.attr.size); -+ /* see the comment in fuse_change_attributes() */ -+ if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) -+ i_size_write(inode, outarg.attr.size); - - if (is_truncate) { - /* NOTE: this may release/reacquire fc->lock */ -@@ -1694,7 +1702,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, - * Only call invalidate_inode_pages2() after removing - * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. - */ -- if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { -+ if ((is_truncate || !is_wb) && -+ S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { - truncate_pagecache(inode, outarg.attr.size); - invalidate_inode_pages2(inode->i_mapping); - } -diff --git a/fs/fuse/file.c b/fs/fuse/file.c -index 4646157..5b863c0 100644 ---- a/fs/fuse/file.c -+++ b/fs/fuse/file.c -@@ -675,9 +675,26 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode, - u64 attr_ver) - { - size_t num_read = req->out.args[0].size; -+ struct fuse_conn *fc = get_fuse_conn(inode); -+ -+ if (fc->writeback_cache) { -+ /* -+ * A hole in a file. Some data after the hole are in page cache, -+ * but have not reached the client fs yet. So, the hole is not -+ * present there. -+ */ -+ int i; -+ int start_idx = num_read >> PAGE_CACHE_SHIFT; -+ size_t off = num_read & (PAGE_CACHE_SIZE - 1); - -- loff_t pos = page_offset(req->pages[0]) + num_read; -- fuse_read_update_size(inode, pos, attr_ver); -+ for (i = start_idx; i < req->num_pages; i++) { -+ zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE); -+ off = 0; -+ } -+ } else { -+ loff_t pos = page_offset(req->pages[0]) + num_read; -+ fuse_read_update_size(inode, pos, attr_ver); -+ } - } - - static int fuse_readpage(struct file *file, struct page *page) -diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c -index 73f6bcb..22a78a6 100644 ---- a/fs/fuse/inode.c -+++ b/fs/fuse/inode.c -@@ -197,6 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - { - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); -+ bool is_wb = fc->writeback_cache; - loff_t oldsize; - struct timespec old_mtime; - -@@ -211,10 +212,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, - fuse_change_attributes_common(inode, attr, attr_valid); - - oldsize = inode->i_size; -- i_size_write(inode, attr->size); -+ /* -+ * In case of writeback_cache enabled, the cached writes beyond EOF -+ * extend local i_size without keeping userspace server in sync. So, -+ * attr->size coming from server can be stale. We cannot trust it. -+ */ -+ if (!is_wb || !S_ISREG(inode->i_mode)) -+ i_size_write(inode, attr->size); - spin_unlock(&fc->lock); - -- if (S_ISREG(inode->i_mode)) { -+ if (!is_wb && S_ISREG(inode->i_mode)) { - bool inval = false; - - if (oldsize != attr->size) { --- -2.7.4 - - -From 9d48c62fbcbc8ce7c9cad37119d12fe0be61300c Mon Sep 17 00:00:00 2001 -From: Maxim Patlasov -Date: Thu, 26 Dec 2013 19:51:11 +0400 -Subject: [PATCH 70/73] fuse: Trust kernel i_mtime only - -Let the kernel maintain i_mtime locally: - - clear S_NOCMTIME - - implement i_op->update_time() - - flush mtime on fsync and last close - - update i_mtime explicitly on truncate and fallocate - -Fuse inode flag FUSE_I_MTIME_DIRTY serves as indication that local i_mtime -should be flushed to the server eventually. - -Signed-off-by: Maxim Patlasov -Signed-off-by: Miklos Szeredi -(cherry picked from commit b0aa760652179072119582375f8dc896ed5b5dfd) -Signed-off-by: Alex Shi ---- - fs/fuse/dir.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++--------- - fs/fuse/file.c | 30 +++++++++++++--- - fs/fuse/fuse_i.h | 6 +++- - fs/fuse/inode.c | 13 +++++-- - 4 files changed, 132 insertions(+), 25 deletions(-) - -diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c -index 6fc8339..b4adb3d9 100644 ---- a/fs/fuse/dir.c -+++ b/fs/fuse/dir.c -@@ -873,8 +873,11 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, - struct fuse_conn *fc = get_fuse_conn(inode); - - /* see the comment in fuse_change_attributes() */ -- if (fc->writeback_cache && S_ISREG(inode->i_mode)) -+ if (fc->writeback_cache && S_ISREG(inode->i_mode)) { - attr->size = i_size_read(inode); -+ attr->mtime = inode->i_mtime.tv_sec; -+ attr->mtimensec = inode->i_mtime.tv_nsec; -+ } - - stat->dev = inode->i_sb->s_dev; - stat->ino = attr->ino; -@@ -1513,12 +1516,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, - FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); - } - --static bool update_mtime(unsigned ivalid) -+static bool update_mtime(unsigned ivalid, bool trust_local_mtime) - { - /* Always update if mtime is explicitly set */ - if (ivalid & ATTR_MTIME_SET) - return true; - -+ /* Or if kernel i_mtime is the official one */ -+ if (trust_local_mtime) -+ return true; -+ - /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ - if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) - return false; -@@ -1527,7 +1534,8 @@ static bool update_mtime(unsigned ivalid) - return true; - } - --static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) -+static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, -+ bool trust_local_mtime) - { - unsigned ivalid = iattr->ia_valid; - -@@ -1546,11 +1554,11 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) - if (!(ivalid & ATTR_ATIME_SET)) - arg->valid |= FATTR_ATIME_NOW; - } -- if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { -+ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) { - arg->valid |= FATTR_MTIME; - arg->mtime = iattr->ia_mtime.tv_sec; - arg->mtimensec = iattr->ia_mtime.tv_nsec; -- if (!(ivalid & ATTR_MTIME_SET)) -+ if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime) - arg->valid |= FATTR_MTIME_NOW; - } - } -@@ -1599,6 +1607,63 @@ void fuse_release_nowrite(struct inode *inode) - spin_unlock(&fc->lock); - } - -+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, -+ struct inode *inode, -+ struct fuse_setattr_in *inarg_p, -+ struct fuse_attr_out *outarg_p) -+{ -+ req->in.h.opcode = FUSE_SETATTR; -+ req->in.h.nodeid = get_node_id(inode); -+ req->in.numargs = 1; -+ req->in.args[0].size = sizeof(*inarg_p); -+ req->in.args[0].value = inarg_p; -+ req->out.numargs = 1; -+ if (fc->minor < 9) -+ req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; -+ else -+ req->out.args[0].size = sizeof(*outarg_p); -+ req->out.args[0].value = outarg_p; -+} -+ -+/* -+ * Flush inode->i_mtime to the server -+ */ -+int fuse_flush_mtime(struct file *file, bool nofail) -+{ -+ struct inode *inode = file->f_mapping->host; -+ struct fuse_inode *fi = get_fuse_inode(inode); -+ struct fuse_conn *fc = get_fuse_conn(inode); -+ struct fuse_req *req = NULL; -+ struct fuse_setattr_in inarg; -+ struct fuse_attr_out outarg; -+ int err; -+ -+ if (nofail) { -+ req = fuse_get_req_nofail_nopages(fc, file); -+ } else { -+ req = fuse_get_req_nopages(fc); -+ if (IS_ERR(req)) -+ return PTR_ERR(req); -+ } -+ -+ memset(&inarg, 0, sizeof(inarg)); -+ memset(&outarg, 0, sizeof(outarg)); -+ -+ inarg.valid |= FATTR_MTIME; -+ inarg.mtime = inode->i_mtime.tv_sec; -+ inarg.mtimensec = inode->i_mtime.tv_nsec; -+ -+ fuse_setattr_fill(fc, req, inode, &inarg, &outarg); -+ fuse_request_send(fc, req); -+ err = req->out.h.error; -+ fuse_put_request(fc, req); -+ -+ if (!err) -+ clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); -+ -+ return err; -+} -+ - /* - * Set attributes, and at the same time refresh them. - * -@@ -1619,6 +1684,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, - bool is_wb = fc->writeback_cache; - loff_t oldsize; - int err; -+ bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode); - - if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) - attr->ia_valid |= ATTR_FORCE; -@@ -1647,7 +1713,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, - - memset(&inarg, 0, sizeof(inarg)); - memset(&outarg, 0, sizeof(outarg)); -- iattr_to_fattr(attr, &inarg); -+ iattr_to_fattr(attr, &inarg, trust_local_mtime); - if (file) { - struct fuse_file *ff = file->private_data; - inarg.valid |= FATTR_FH; -@@ -1658,17 +1724,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, - inarg.valid |= FATTR_LOCKOWNER; - inarg.lock_owner = fuse_lock_owner_id(fc, current->files); - } -- req->in.h.opcode = FUSE_SETATTR; -- req->in.h.nodeid = get_node_id(inode); -- req->in.numargs = 1; -- req->in.args[0].size = sizeof(inarg); -- req->in.args[0].value = &inarg; -- req->out.numargs = 1; -- if (fc->minor < 9) -- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; -- else -- req->out.args[0].size = sizeof(outarg); -- req->out.args[0].value = &outarg; -+ fuse_setattr_fill(fc, req, inode, &inarg, &outarg); - fuse_request_send(fc, req); - err = req->out.h.error; - fuse_put_request(fc, req); -@@ -1685,6 +1741,12 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, - } - - spin_lock(&fc->lock); -+ /* the kernel maintains i_mtime locally */ -+ if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) { -+ inode->i_mtime = attr->ia_mtime; -+ clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); -+ } -+ - fuse_change_attributes_common(inode, &outarg.attr, - attr_timeout(&outarg)); - oldsize = inode->i_size; -@@ -1915,6 +1977,17 @@ static int fuse_removexattr(struct dentry *entry, const char *name) - return err; - } - -+static int fuse_update_time(struct inode *inode, struct timespec *now, -+ int flags) -+{ -+ if (flags & S_MTIME) { -+ inode->i_mtime = *now; -+ set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state); -+ BUG_ON(!S_ISREG(inode->i_mode)); -+ } -+ return 0; -+} -+ - static const struct inode_operations fuse_dir_inode_operations = { - .lookup = fuse_lookup, - .mkdir = fuse_mkdir, -@@ -1954,6 +2027,7 @@ static const struct inode_operations fuse_common_inode_operations = { - .getxattr = fuse_getxattr, - .listxattr = fuse_listxattr, - .removexattr = fuse_removexattr, -+ .update_time = fuse_update_time, - }; - - static const struct inode_operations fuse_symlink_inode_operations = { -diff --git a/fs/fuse/file.c b/fs/fuse/file.c -index 5b863c0..5916dc5 100644 ---- a/fs/fuse/file.c -+++ b/fs/fuse/file.c -@@ -308,6 +308,9 @@ static int fuse_open(struct inode *inode, struct file *file) - - static int fuse_release(struct inode *inode, struct file *file) - { -+ if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) -+ fuse_flush_mtime(file, true); -+ - fuse_release_common(file, FUSE_RELEASE); - - /* return value is ignored by VFS */ -@@ -475,6 +478,12 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, - - fuse_sync_writes(inode); - -+ if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) { -+ int err = fuse_flush_mtime(file, false); -+ if (err) -+ goto out; -+ } -+ - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) { - err = PTR_ERR(req); -@@ -960,16 +969,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, - return req->misc.write.out.size; - } - --void fuse_write_update_size(struct inode *inode, loff_t pos) -+bool fuse_write_update_size(struct inode *inode, loff_t pos) - { - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); -+ bool ret = false; - - spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; -- if (pos > inode->i_size) -+ if (pos > inode->i_size) { - i_size_write(inode, pos); -+ ret = true; -+ } - spin_unlock(&fc->lock); -+ -+ return ret; - } - - static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, -@@ -2873,8 +2887,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, - goto out; - - /* we could have extended the file */ -- if (!(mode & FALLOC_FL_KEEP_SIZE)) -- fuse_write_update_size(inode, offset + length); -+ if (!(mode & FALLOC_FL_KEEP_SIZE)) { -+ bool changed = fuse_write_update_size(inode, offset + length); -+ -+ if (changed && fc->writeback_cache) { -+ struct fuse_inode *fi = get_fuse_inode(inode); -+ -+ inode->i_mtime = current_fs_time(inode->i_sb); -+ set_bit(FUSE_I_MTIME_DIRTY, &fi->state); -+ } -+ } - - if (mode & FALLOC_FL_PUNCH_HOLE) - truncate_pagecache_range(inode, offset, offset + length - 1); -diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h -index 374a8be..1e6ad6d 100644 ---- a/fs/fuse/fuse_i.h -+++ b/fs/fuse/fuse_i.h -@@ -119,6 +119,8 @@ enum { - FUSE_I_INIT_RDPLUS, - /** An operation changing file size is in progress */ - FUSE_I_SIZE_UNSTABLE, -+ /** i_mtime has been updated locally; a flush to userspace needed */ -+ FUSE_I_MTIME_DIRTY, - }; - - struct fuse_conn; -@@ -876,7 +878,9 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, - unsigned fuse_file_poll(struct file *file, poll_table *wait); - int fuse_dev_release(struct inode *inode, struct file *file); - --void fuse_write_update_size(struct inode *inode, loff_t pos); -+bool fuse_write_update_size(struct inode *inode, loff_t pos); -+ -+int fuse_flush_mtime(struct file *file, bool nofail); - - int fuse_do_setattr(struct inode *inode, struct iattr *attr, - struct file *file); -diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c -index 22a78a6..338da52 100644 ---- a/fs/fuse/inode.c -+++ b/fs/fuse/inode.c -@@ -170,8 +170,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - inode->i_blocks = attr->blocks; - inode->i_atime.tv_sec = attr->atime; - inode->i_atime.tv_nsec = attr->atimensec; -- inode->i_mtime.tv_sec = attr->mtime; -- inode->i_mtime.tv_nsec = attr->mtimensec; -+ /* mtime from server may be stale due to local buffered write */ -+ if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { -+ inode->i_mtime.tv_sec = attr->mtime; -+ inode->i_mtime.tv_nsec = attr->mtimensec; -+ } - inode->i_ctime.tv_sec = attr->ctime; - inode->i_ctime.tv_nsec = attr->ctimensec; - -@@ -250,6 +253,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) - { - inode->i_mode = attr->mode & S_IFMT; - inode->i_size = attr->size; -+ inode->i_mtime.tv_sec = attr->mtime; -+ inode->i_mtime.tv_nsec = attr->mtimensec; - if (S_ISREG(inode->i_mode)) { - fuse_init_common(inode); - fuse_init_file_inode(inode); -@@ -296,7 +301,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, - return NULL; - - if ((inode->i_state & I_NEW)) { -- inode->i_flags |= S_NOATIME|S_NOCMTIME; -+ inode->i_flags |= S_NOATIME; -+ if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) -+ inode->i_flags |= S_NOCMTIME; - inode->i_generation = generation; - inode->i_data.backing_dev_info = &fc->bdi; - fuse_init_inode(inode, attr); --- -2.7.4 - - -From 6b1c8b7974158eabc822d1a01f528f95793219ae Mon Sep 17 00:00:00 2001 -From: Maxim Patlasov -Date: Mon, 28 Apr 2014 14:19:24 +0200 -Subject: [PATCH 71/73] fuse: trust kernel i_ctime only - -Let the kernel maintain i_ctime locally: update i_ctime explicitly on -truncate, fallocate, open(O_TRUNC), setxattr, removexattr, link, rename, -unlink. - -The inode flag I_DIRTY_SYNC serves as indication that local i_ctime should -be flushed to the server eventually. The patch sets the flag and updates -i_ctime in course of operations listed above. - -Signed-off-by: Maxim Patlasov -Signed-off-by: Miklos Szeredi -(cherry picked from commit 31f3267b4ba16b12fb9dd3b1953ea0f221cc2ab4) -Signed-off-by: Alex Shi ---- - fs/fuse/dir.c | 22 ++++++++++++++++++++-- - fs/fuse/inode.c | 6 ++++-- - 2 files changed, 24 insertions(+), 4 deletions(-) - -diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c -index b4adb3d9..6d5e45c 100644 ---- a/fs/fuse/dir.c -+++ b/fs/fuse/dir.c -@@ -680,6 +680,14 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, - return create_new_entry(fc, req, dir, entry, S_IFLNK); - } - -+static inline void fuse_update_ctime(struct inode *inode) -+{ -+ if (!IS_NOCMTIME(inode)) { -+ inode->i_ctime = current_fs_time(inode->i_sb); -+ mark_inode_dirty_sync(inode); -+ } -+} -+ - static int fuse_unlink(struct inode *dir, struct dentry *entry) - { - int err; -@@ -714,6 +722,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) - fuse_invalidate_attr(inode); - fuse_invalidate_attr(dir); - fuse_invalidate_entry_cache(entry); -+ fuse_update_ctime(inode); - } else if (err == -EINTR) - fuse_invalidate_entry(entry); - return err; -@@ -772,6 +781,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, - if (!err) { - /* ctime changes */ - fuse_invalidate_attr(oldent->d_inode); -+ fuse_update_ctime(oldent->d_inode); - - fuse_invalidate_attr(olddir); - if (olddir != newdir) -@@ -781,6 +791,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, - if (newent->d_inode) { - fuse_invalidate_attr(newent->d_inode); - fuse_invalidate_entry_cache(newent); -+ fuse_update_ctime(newent->d_inode); - } - } else if (err == -EINTR) { - /* If request was interrupted, DEITY only knows if the -@@ -860,6 +871,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, - inc_nlink(inode); - spin_unlock(&fc->lock); - fuse_invalidate_attr(inode); -+ fuse_update_ctime(inode); - } else if (err == -EINTR) { - fuse_invalidate_attr(inode); - } -@@ -877,6 +889,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, - attr->size = i_size_read(inode); - attr->mtime = inode->i_mtime.tv_sec; - attr->mtimensec = inode->i_mtime.tv_nsec; -+ attr->ctime = inode->i_ctime.tv_sec; -+ attr->ctimensec = inode->i_ctime.tv_nsec; - } - - stat->dev = inode->i_sb->s_dev; -@@ -1841,8 +1855,10 @@ static int fuse_setxattr(struct dentry *entry, const char *name, - fc->no_setxattr = 1; - err = -EOPNOTSUPP; - } -- if (!err) -+ if (!err) { - fuse_invalidate_attr(inode); -+ fuse_update_ctime(inode); -+ } - return err; - } - -@@ -1972,8 +1988,10 @@ static int fuse_removexattr(struct dentry *entry, const char *name) - fc->no_removexattr = 1; - err = -EOPNOTSUPP; - } -- if (!err) -+ if (!err) { - fuse_invalidate_attr(inode); -+ fuse_update_ctime(inode); -+ } - return err; - } - -diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c -index 338da52..8fe0b48 100644 ---- a/fs/fuse/inode.c -+++ b/fs/fuse/inode.c -@@ -174,9 +174,9 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { - inode->i_mtime.tv_sec = attr->mtime; - inode->i_mtime.tv_nsec = attr->mtimensec; -+ inode->i_ctime.tv_sec = attr->ctime; -+ inode->i_ctime.tv_nsec = attr->ctimensec; - } -- inode->i_ctime.tv_sec = attr->ctime; -- inode->i_ctime.tv_nsec = attr->ctimensec; - - if (attr->blksize != 0) - inode->i_blkbits = ilog2(attr->blksize); -@@ -255,6 +255,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) - inode->i_size = attr->size; - inode->i_mtime.tv_sec = attr->mtime; - inode->i_mtime.tv_nsec = attr->mtimensec; -+ inode->i_ctime.tv_sec = attr->ctime; -+ inode->i_ctime.tv_nsec = attr->ctimensec; - if (S_ISREG(inode->i_mode)) { - fuse_init_common(inode); - fuse_init_file_inode(inode); --- -2.7.4 - - -From e03ae8b69bb899c5cdd0c594fa58a85cd180df1b Mon Sep 17 00:00:00 2001 -From: Miklos Szeredi -Date: Mon, 28 Apr 2014 16:43:44 +0200 -Subject: [PATCH 72/73] fuse: add renameat2 support - -Support RENAME_EXCHANGE and RENAME_NOREPLACE flags on the userspace ABI. - -Signed-off-by: Miklos Szeredi -(cherry picked from commit 1560c974dcd40a8d3f193283acd7cc6aee13dc13) -Signed-off-by: Alex Shi - - Conflicts: - fs/fuse/dir.c - include/uapi/linux/fuse.h ---- - fs/fuse/dir.c | 24 ++++++++++++++++-------- - fs/fuse/fuse_i.h | 3 +++ - include/uapi/linux/fuse.h | 10 ++++++++++ - 3 files changed, 29 insertions(+), 8 deletions(-) - -diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c -index 6d5e45c..33dec83 100644 ---- a/fs/fuse/dir.c -+++ b/fs/fuse/dir.c -@@ -753,23 +753,26 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) - return err; - } - --static int fuse_rename(struct inode *olddir, struct dentry *oldent, -- struct inode *newdir, struct dentry *newent) -+static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, -+ struct inode *newdir, struct dentry *newent, -+ unsigned int flags, int opcode, size_t argsize) - { - int err; -- struct fuse_rename_in inarg; -+ struct fuse_rename2_in inarg; - struct fuse_conn *fc = get_fuse_conn(olddir); -- struct fuse_req *req = fuse_get_req_nopages(fc); -+ struct fuse_req *req; - -+ req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - -- memset(&inarg, 0, sizeof(inarg)); -+ memset(&inarg, 0, argsize); - inarg.newdir = get_node_id(newdir); -- req->in.h.opcode = FUSE_RENAME; -+ inarg.flags = flags; -+ req->in.h.opcode = opcode; - req->in.h.nodeid = get_node_id(olddir); - req->in.numargs = 3; -- req->in.args[0].size = sizeof(inarg); -+ req->in.args[0].size = argsize; - req->in.args[0].value = &inarg; - req->in.args[1].size = oldent->d_name.len + 1; - req->in.args[1].value = oldent->d_name.name; -@@ -783,12 +786,17 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, - fuse_invalidate_attr(oldent->d_inode); - fuse_update_ctime(oldent->d_inode); - -+ if (flags & RENAME_EXCHANGE) { -+ fuse_invalidate_attr(newent->d_inode); -+ fuse_update_ctime(newent->d_inode); -+ } -+ - fuse_invalidate_attr(olddir); - if (olddir != newdir) - fuse_invalidate_attr(newdir); - - /* newent will end up negative */ -- if (newent->d_inode) { -+ if (!(flags & RENAME_EXCHANGE) && newent->d_inode) { - fuse_invalidate_attr(newent->d_inode); - fuse_invalidate_entry_cache(newent); - fuse_update_ctime(newent->d_inode); -diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h -index 1e6ad6d..7cc58c9 100644 ---- a/fs/fuse/fuse_i.h -+++ b/fs/fuse/fuse_i.h -@@ -544,6 +544,9 @@ struct fuse_conn { - /** Is fallocate not implemented by fs? */ - unsigned no_fallocate:1; - -+ /** Is rename with flags implemented by fs? */ -+ unsigned no_rename2:1; -+ - /** Use enhanced/automatic page cache invalidation. */ - unsigned auto_inval_data:1; - -diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h -index 60bb2f9..b77cf38 100644 ---- a/include/uapi/linux/fuse.h -+++ b/include/uapi/linux/fuse.h -@@ -93,6 +93,9 @@ - * - * 7.22 - * - add FUSE_ASYNC_DIO -+ * -+ * 7.23 -+ * - add FUSE_RENAME2 request - */ - - #ifndef _LINUX_FUSE_H -@@ -343,6 +346,7 @@ enum fuse_opcode { - FUSE_BATCH_FORGET = 42, - FUSE_FALLOCATE = 43, - FUSE_READDIRPLUS = 44, -+ FUSE_RENAME2 = 45, - - /* CUSE specific operations */ - CUSE_INIT = 4096, -@@ -421,6 +425,12 @@ struct fuse_rename_in { - uint64_t newdir; - }; - -+struct fuse_rename2_in { -+ uint64_t newdir; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ - struct fuse_link_in { - uint64_t oldnodeid; - }; --- -2.7.4 - - -From ab0f88ef018ee96ff56dc39a032c9882a07037d0 Mon Sep 17 00:00:00 2001 -From: Alex Shi -Date: Thu, 20 Aug 2015 14:56:22 +0800 -Subject: [PATCH 73/73] fs/xfs: add missed symbol readlink_copy - -When compile davinci_all_defconfig in Kevin's kernelci.org, -The missed symbol experting cause following error: - - ERROR: "readlink_copy" [fs/xfs/xfs.ko] undefined! - -This patch fixed the problem. - -Signed-off-by: Alex Shi ---- - fs/namei.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/namei.c b/fs/namei.c -index 0fbf150..e3b23a0 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -4340,6 +4340,7 @@ int readlink_copy(char __user *buffer, int buflen, const char *link) - out: - return len; - } -+EXPORT_SYMBOL(readlink_copy); - - /* - * A helper for ->readlink(). This should be used *ONLY* for symlinks that --- -2.7.4 - diff --git a/packages/linux/patches/amlogic-3.14/linux-007-sound_soc_aml_m8-Report-only-working-frequecies-and-bit-depths.patch b/packages/linux/patches/amlogic-3.14/linux-007-sound_soc_aml_m8-Report-only-working-frequecies-and-bit-depths.patch deleted file mode 100644 index 72c12ac072..0000000000 --- a/packages/linux/patches/amlogic-3.14/linux-007-sound_soc_aml_m8-Report-only-working-frequecies-and-bit-depths.patch +++ /dev/null @@ -1,58 +0,0 @@ -From edab2a489829689fdaadb90f1897c948ea3c9020 Mon Sep 17 00:00:00 2001 -From: kszaq -Date: Wed, 3 Jun 2015 10:20:04 +0200 -Subject: [PATCH] sound/soc/aml/m8: Report only working frequecies and bit - depths - ---- - sound/soc/aml/m8/aml_spdif_codec.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/sound/soc/aml/m8/aml_spdif_codec.c b/sound/soc/aml/m8/aml_spdif_codec.c -index ac76ef6..cf0e929 100644 ---- a/sound/soc/aml/m8/aml_spdif_codec.c -+++ b/sound/soc/aml/m8/aml_spdif_codec.c -@@ -26,9 +26,8 @@ - - #define DRV_NAME "spdif-dit" - --#define STUB_RATES SNDRV_PCM_RATE_8000_192000 --#define STUB_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ -- SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) -+#define STUB_RATES SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 | SNDRV_PCM_RATE_192000 -+#define STUB_FORMATS SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE - - struct pinctrl *pin_spdif_ctl; - struct device *spdif_dev; --- -1.8.3.1 - -From 1e6294498438359c4ac39b2ab563487d750fbaaf Mon Sep 17 00:00:00 2001 -From: kszaq -Date: Tue, 30 Aug 2016 23:31:29 +0200 -Subject: [PATCH] sound/soc/aml/m8: report only working frequencies and bit - depths for I2S - ---- - sound/soc/aml/m8/aml_i2s_dai.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/sound/soc/aml/m8/aml_i2s_dai.c b/sound/soc/aml/m8/aml_i2s_dai.c -index 3626676..d2ebad1 100644 ---- a/sound/soc/aml/m8/aml_i2s_dai.c -+++ b/sound/soc/aml/m8/aml_i2s_dai.c -@@ -290,9 +290,8 @@ static int aml_dai_i2s_resume(struct snd_soc_dai *dai) - return 0; - } - --#define AML_DAI_I2S_RATES (SNDRV_PCM_RATE_8000_192000) --#define AML_DAI_I2S_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ -- SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) -+#define AML_DAI_I2S_RATES SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 | SNDRV_PCM_RATE_192000 -+#define AML_DAI_I2S_FORMATS SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE - - static struct snd_soc_dai_ops aml_dai_i2s_ops = { - .startup = aml_dai_i2s_startup, --- -1.8.3.1 - diff --git a/projects/Odroid_C2/patches/linux/linux-008-max_freq_dvfs_table.patch b/projects/Odroid_C2/patches/linux/linux-008-max_freq_dvfs_table.patch deleted file mode 100644 index 3b24ae7b52..0000000000 --- a/projects/Odroid_C2/patches/linux/linux-008-max_freq_dvfs_table.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 01dc1e32edb25c62f4f5d25161de0d109da860df Mon Sep 17 00:00:00 2001 -From: Joy Cho -Date: Wed, 31 Aug 2016 16:27:48 +0900 -Subject: [PATCH 1/1] ODROID-C2: Add setup routine to set max. cpu frequency of - dvfs table - -- set "max_freq" in boot.ini -- in MHz unit -ex) setenv max_freq "1656" - -Change-Id: I352c9540d0c34d3ec0ba0f470dae9d4e0786c001 ---- - drivers/amlogic/mailbox/scpi_protocol.c | 52 +++++++++++++++++++++++++++++++++ - 1 file changed, 52 insertions(+) - -diff --git a/drivers/amlogic/mailbox/scpi_protocol.c b/drivers/amlogic/mailbox/scpi_protocol.c -index 37fd00a48c9d..ee8052aafd0d 100644 ---- a/drivers/amlogic/mailbox/scpi_protocol.c -+++ b/drivers/amlogic/mailbox/scpi_protocol.c -@@ -85,6 +85,12 @@ static int high_priority_cmds[] = { - SCPI_CMD_SENSOR_CFG_BOUNDS, - }; - -+#if defined(CONFIG_ARCH_MESON64_ODROIDC2) -+#define DVFS_COUNT_MAX 13 -+#define DVFS_COUNT_1536 6 -+static unsigned long max_freq_dvfs; -+#endif -+ - static struct scpi_opp *scpi_opps[MAX_DVFS_DOMAINS]; - - static int scpi_linux_errmap[SCPI_ERR_MAX] = { -@@ -236,6 +242,9 @@ struct scpi_opp *scpi_dvfs_get_opps(u8 domain) - struct scpi_opp *opps; - size_t opps_sz; - int count, ret; -+#if defined(CONFIG_ARCH_MESON64_ODROIDC2) -+ int i, max_index; -+#endif - - if (domain >= MAX_DVFS_DOMAINS) - return ERR_PTR(-EINVAL); -@@ -254,6 +263,27 @@ struct scpi_opp *scpi_dvfs_get_opps(u8 domain) - return ERR_PTR(-ENOMEM); - - count = DVFS_OPP_COUNT(buf.header); -+ -+#if defined(CONFIG_ARCH_MESON64_ODROIDC2) -+ max_index = 0; -+ if (max_freq_dvfs) { -+ for (i = 0; i < count; i++) { -+ if (buf.opp[i].freq_hz == max_freq_dvfs) -+ break; -+ else -+ max_index++; -+ } -+ count = max_index + 1; -+ } -+ /* if no param "max_freq_dvfs or wrong "max_freq_dvfs" -+ * from boot.ini, consider stable max value */ -+ if ((max_freq_dvfs == 0) || (count > DVFS_COUNT_MAX)) -+ count = DVFS_COUNT_1536; /* default max : 1.536GHz */ -+ -+ pr_info("dvfs [%s] - new count %d, max_freq %ld\n", __func__, -+ count, max_freq_dvfs); -+#endif -+ - opps_sz = count * sizeof(*(opps->opp)); - - opps->count = count; -@@ -453,6 +453,29 @@ int scpi_send_usr_data(u32 client_id, u32 *val, u32 size) - } - EXPORT_SYMBOL_GPL(scpi_send_usr_data); - -+#if defined(CONFIG_ARCH_MESON64_ODROIDC2) -+static int __init get_max_freq(char *str) -+{ -+ int ret; -+ -+ if (NULL == str) { -+ /* consider default set */ -+ max_freq_dvfs = 1536000000; -+ return -EINVAL; -+ } -+ -+ ret = kstrtoul(str, 0, &max_freq_dvfs); -+ -+ /* in unit Hz */ -+ max_freq_dvfs *= 1000000; -+ -+ pr_info("dvfs [%s] - max_freq : %ld\n", __func__, max_freq_dvfs); -+ -+ return 0; -+} -+__setup("max_freq=", get_max_freq); -+#endif -+ - int scpi_get_usr_data(u32 client_id, u32 *val, u32 size) - { - struct scpi_data_buf sdata; --- -2.11.0 -