From 5b22fa44580ad8dab25c4a910fc9295bae0ddf73 Mon Sep 17 00:00:00 2001 From: John Eckersberg Date: Thu, 4 Dec 2025 12:39:07 -0500 Subject: [PATCH] ovl: add directory metadata passthrough feature Container runtimes like Podman unpack container image layers as separate directories that are then stacked using overlayfs. When a layer's tarball contains files within a directory but not the directory entry itself, the runtime must create a "structural" directory to hold the files. This structural directory gets the current timestamp as its mtime, which shadows the meaningful timestamp from a base image layer. Add a new "passthrough" feature that allows directories to delegate their metadata to a lower layer. When a directory has the "trusted.overlay.passthrough" xattr (or "user.overlay.passthrough" with userxattr mount option), overlayfs will walk down through the layer stack until it finds a directory without the passthrough xattr, and return metadata from that layer. Multiple layers can have passthrough set, allowing the chain to extend through several layers. When any modification is made to a directory with passthrough xattr (chmod, chown, touch, or creating/deleting files within the directory), the metadata is first copied from the resolved lower layer, the passthrough xattr is removed, and then the modification is applied. This feature is controlled by: - Kernel config: CONFIG_OVERLAY_FS_PASSTHROUGH (default off) - Module parameter: "passthrough=on/off" - Mount option: "passthrough=on/off" Signed-off-by: John Eckersberg --- Documentation/filesystems/overlayfs.rst | 44 ++++++++++++++ fs/overlayfs/Kconfig | 21 +++++++ fs/overlayfs/copy_up.c | 81 +++++++++++++++++++++++++ fs/overlayfs/inode.c | 73 ++++++++++++++++++++++ fs/overlayfs/overlayfs.h | 3 + fs/overlayfs/ovl_entry.h | 1 + fs/overlayfs/params.c | 14 +++++ fs/overlayfs/params.h | 1 + fs/overlayfs/util.c | 25 ++++++++ 9 files changed, 263 insertions(+) diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index ab989807a2cb..913a06cbee52 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -525,6 +525,50 @@ supports these values: will only be used if the data file has fs-verity enabled, otherwise a full copy-up is used. + +Directory metadata passthrough +------------------------------ + +When the "passthrough" feature is enabled, directories marked with the +"trusted.overlay.passthrough" xattr (or "user.overlay.passthrough" with +userxattr) will report their metadata (mtime, atime, ctime, mode, uid, gid) +from a lower layer instead of the layer containing the directory. + +This feature is primarily useful for container runtimes that unpack container +image layers. When a layer's tarball contains files within a directory but +not the directory entry itself, the runtime must create a "structural" +directory to hold the files. This structural directory gets the current +timestamp as its mtime, which would otherwise shadow the meaningful timestamp +from a base image layer. + +By marking structural directories with the passthrough xattr, overlayfs will +walk down through the layer stack until it finds a directory without the +passthrough xattr, and return metadata from that layer. Multiple layers can +have passthrough set, allowing the chain to extend through several layers:: + + Upper: (empty, for runtime modifications) + Lower 1: /foo/ [passthrough] → keep looking + Lower 2: /foo/ [passthrough] → keep looking + Lower 3: /foo/ [no passthrough] → use this layer's metadata + +When any modification is made to a directory with passthrough xattr (such as +chmod, chown, touch, or creating/deleting files within the directory), the +metadata is first copied from the resolved lower layer to the layer containing +the passthrough directory, the passthrough xattr is removed, and then the +modification is applied. + +This feature is controlled by: + +- Kernel config: CONFIG_OVERLAY_FS_PASSTHROUGH (default off) +- Module parameter: "passthrough=on/off" +- Mount option: "passthrough=on/off" + +Note, that this feature is not backward compatible. That is, mounting an +overlay which has passthrough directories on a kernel that doesn't support +this feature will result in incorrect metadata being reported for those +directories. + + Sharing and copying layers -------------------------- diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 2ac67e04a6fb..71ec9457adf5 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -126,6 +126,27 @@ config OVERLAY_FS_METACOPY If unsure, say N. +config OVERLAY_FS_PASSTHROUGH + bool "Overlayfs: turn on directory metadata passthrough feature by default" + depends on OVERLAY_FS + help + If this config option is enabled then overlay filesystems will + pass through directory metadata requests to lower layers when a + directory is marked with the "trusted.overlay.passthrough" xattr. + This is useful for container runtimes that need to preserve + directory timestamps from base image layers when unpacking image + layers that contain files but not their parent directory entries. + + It is still possible to turn off this feature globally with the + "passthrough=off" module option or on a filesystem instance basis + with the "passthrough=off" mount option. + + Note, that this feature is not backward compatible. That is, + mounting an overlay which has passthrough directories on a kernel + that doesn't support this feature will have unexpected results. + + If unsure, say N. + config OVERLAY_FS_DEBUG bool "Overlayfs: turn on extra debugging checks" default n diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 758611ee4475..5425bc136b2c 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -415,6 +415,87 @@ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry, return err; } +/* + * For directories with passthrough xattr, copy metadata from the passthrough + * source (a lower layer) to the upper directory and remove the passthrough + * xattr. This should be called before modifying a directory's metadata. + * + * Returns 0 on success or if no passthrough handling was needed. + * Returns negative error on failure. + */ +int ovl_copy_up_dir_passthrough(struct dentry *dentry) +{ + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct dentry *upperdentry; + struct ovl_entry *oe; + struct ovl_path *lowerstack; + unsigned int numlower; + unsigned int i; + struct path upperpath, curpath; + struct kstat stat; + int err; + + /* Feature must be enabled */ + if (!ofs->config.passthrough) + return 0; + + /* Must be a directory */ + if (!d_is_dir(dentry)) + return 0; + + /* Must have an upper dentry */ + upperdentry = ovl_dentry_upper(dentry); + if (!upperdentry) + return 0; + + /* Check if upper has passthrough xattr */ + upperpath.mnt = ovl_upper_mnt(ofs); + upperpath.dentry = upperdentry; + if (!ovl_path_check_passthrough_xattr(ofs, &upperpath)) + return 0; + + /* Find the lower layer without passthrough xattr */ + oe = OVL_E(dentry); + lowerstack = ovl_lowerstack(oe); + numlower = ovl_numlower(oe); + + for (i = 0; i < numlower; i++) { + curpath.mnt = lowerstack[i].layer->mnt; + curpath.dentry = lowerstack[i].dentry; + + if (!ovl_path_check_passthrough_xattr(ofs, &curpath)) { + /* Found the source of metadata */ + err = vfs_getattr(&curpath, &stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); + if (err) + return err; + + /* Copy the metadata to upper */ + err = ovl_set_attr(ofs, upperdentry, &stat); + if (err) + return err; + + /* Remove the passthrough xattr */ + err = ovl_removexattr(ofs, upperdentry, + OVL_XATTR_PASSTHROUGH); + if (err && err != -ENODATA) + return err; + + return 0; + } + } + + /* + * All layers have passthrough - shouldn't happen, but don't fail. + * Just remove the xattr from upper. + */ + err = ovl_removexattr(ofs, upperdentry, OVL_XATTR_PASSTHROUGH); + if (err && err != -ENODATA) + return err; + + return 0; +} + struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, bool is_upper) { diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index bdbf86b56a9b..58a4aab03608 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -39,6 +39,13 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, err = ovl_copy_up(dentry); else err = ovl_copy_up_with_data(dentry); + if (!err) { + /* + * For directories with passthrough xattr, copy metadata from + * the passthrough source before modifying attributes. + */ + err = ovl_copy_up_dir_passthrough(dentry); + } if (!err) { struct inode *winode = NULL; @@ -160,11 +167,56 @@ static inline int ovl_real_getattr_nosec(struct super_block *sb, return vfs_getattr_nosec(path, stat, request_mask, flags); } +/* + * For directories with passthrough xattr, find the lower layer that should + * provide metadata. Walks down the layer stack until finding a directory + * without the passthrough xattr. + * + * Returns true if a passthrough metadata source was found, with metapath set. + * Caller must path_put(metapath) when done. + * Returns false if the topmost path doesn't have passthrough xattr. + */ +static bool ovl_get_passthrough_metapath(struct ovl_fs *ofs, + struct dentry *dentry, + const struct path *toppath, + struct path *metapath) +{ + struct ovl_entry *oe = OVL_E(dentry); + struct ovl_path *lowerstack = ovl_lowerstack(oe); + unsigned int numlower = ovl_numlower(oe); + unsigned int i; + struct path curpath; + + /* Check if top layer has passthrough */ + if (!ovl_path_check_passthrough_xattr(ofs, toppath)) + return false; + + /* Walk through lower layers looking for one without passthrough */ + for (i = 0; i < numlower; i++) { + curpath.mnt = lowerstack[i].layer->mnt; + curpath.dentry = lowerstack[i].dentry; + + if (!ovl_path_check_passthrough_xattr(ofs, &curpath)) { + /* Found the layer providing metadata */ + path_get(&curpath); + *metapath = curpath; + return true; + } + } + + /* + * All layers have passthrough - this shouldn't happen in practice + * as the bottom layer should have the real metadata. + */ + return false; +} + int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; struct super_block *sb = dentry->d_sb; + struct ovl_fs *ofs = OVL_FS(sb); enum ovl_path_type type; struct path realpath; struct inode *inode = d_inode(dentry); @@ -176,10 +228,31 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, metacopy_blocks = ovl_is_metacopy_dentry(dentry); type = ovl_path_real(dentry, &realpath); + + /* + * For directories with passthrough enabled, check if this directory + * has the passthrough xattr. If so, get metadata from the lower + * layer that doesn't have the passthrough xattr. + */ + if (is_dir && ofs->config.passthrough) { + struct path metapath; + + if (ovl_get_passthrough_metapath(ofs, dentry, &realpath, &metapath)) { + err = ovl_real_getattr_nosec(sb, &metapath, stat, + request_mask, flags); + path_put(&metapath); + if (err) + return err; + goto out_stat_done; + } + } + err = ovl_real_getattr_nosec(sb, &realpath, stat, request_mask, flags); if (err) return err; +out_stat_done: + /* Report the effective immutable/append-only STATX flags */ generic_fill_statx_attr(inode, stat); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index f9ac9bdde830..43e34c1da1ef 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -50,6 +50,7 @@ enum ovl_xattr { OVL_XATTR_METACOPY, OVL_XATTR_PROTATTR, OVL_XATTR_XWHITEOUT, + OVL_XATTR_PASSTHROUGH, }; enum ovl_inode_flag { @@ -543,6 +544,7 @@ char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, enum ovl_xattr ox); bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path); +bool ovl_path_check_passthrough_xattr(struct ovl_fs *ofs, const struct path *path); bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs, const struct path *upperpath); @@ -909,6 +911,7 @@ int ovl_copy_up_with_data(struct dentry *dentry); int ovl_maybe_copy_up(struct dentry *dentry, int flags); int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new); int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); +int ovl_copy_up_dir_passthrough(struct dentry *dentry); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, bool is_upper); struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 1d4828dbcf7a..3f5ab9b054ec 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -17,6 +17,7 @@ struct ovl_config { bool nfs_export; int xino; bool metacopy; + bool passthrough; bool userxattr; bool ovl_volatile; }; diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 63b7346c5ee1..143b39b57fd3 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -43,6 +43,11 @@ module_param_named(metacopy, ovl_metacopy_def, bool, 0644); MODULE_PARM_DESC(metacopy, "Default to on or off for the metadata only copy up feature"); +static bool ovl_passthrough_def = IS_ENABLED(CONFIG_OVERLAY_FS_PASSTHROUGH); +module_param_named(passthrough, ovl_passthrough_def, bool, 0644); +MODULE_PARM_DESC(passthrough, + "Default to on or off for the directory metadata passthrough feature"); + enum ovl_opt { Opt_lowerdir, Opt_lowerdir_add, @@ -57,6 +62,7 @@ enum ovl_opt { Opt_userxattr, Opt_xino, Opt_metacopy, + Opt_passthrough, Opt_verity, Opt_volatile, Opt_override_creds, @@ -154,6 +160,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_flag("userxattr", Opt_userxattr), fsparam_enum("xino", Opt_xino, ovl_parameter_xino), fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool), + fsparam_enum("passthrough", Opt_passthrough, ovl_parameter_bool), fsparam_enum("verity", Opt_verity, ovl_parameter_verity), fsparam_flag("volatile", Opt_volatile), fsparam_flag_no("override_creds", Opt_override_creds), @@ -662,6 +669,10 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) config->metacopy = result.uint_32; ctx->set.metacopy = true; break; + case Opt_passthrough: + config->passthrough = result.uint_32; + ctx->set.passthrough = true; + break; case Opt_verity: config->verity_mode = result.uint_32; break; @@ -800,6 +811,7 @@ int ovl_init_fs_context(struct fs_context *fc) ofs->config.nfs_export = ovl_nfs_export_def; ofs->config.xino = ovl_xino_def(); ofs->config.metacopy = ovl_metacopy_def; + ofs->config.passthrough = ovl_passthrough_def; fc->s_fs_info = ofs; fc->fs_private = ctx; @@ -1070,6 +1082,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry) seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config)); if (ofs->config.metacopy != ovl_metacopy_def) seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy)); + if (ofs->config.passthrough != ovl_passthrough_def) + seq_printf(m, ",passthrough=%s", str_on_off(ofs->config.passthrough)); if (ofs->config.ovl_volatile) seq_puts(m, ",volatile"); if (ofs->config.userxattr) diff --git a/fs/overlayfs/params.h b/fs/overlayfs/params.h index ffd53cdd8482..260dc721786b 100644 --- a/fs/overlayfs/params.h +++ b/fs/overlayfs/params.h @@ -12,6 +12,7 @@ extern const struct constant_table ovl_parameter_redirect_dir[]; /* The set of options that user requested explicitly via mount options */ struct ovl_opt_set { bool metacopy; + bool passthrough; bool redirect; bool nfs_export; bool index; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 94986d11a166..0c0e293cd5d9 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -622,6 +622,13 @@ static void ovl_dir_version_inc(struct dentry *dentry, bool impurity) void ovl_dir_modified(struct dentry *dentry, bool impurity) { + /* + * For directories with passthrough, copy up metadata from the + * passthrough source before the modification takes effect. + * This also removes the passthrough xattr. + */ + ovl_copy_up_dir_passthrough(dentry); + /* Copy mtime/ctime */ ovl_copyattr(d_inode(dentry)); @@ -779,6 +786,22 @@ bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path) return res >= 0; } +/* + * Check if a directory has the passthrough xattr set. + * Passthrough directories delegate their metadata to a lower layer. + */ +bool ovl_path_check_passthrough_xattr(struct ovl_fs *ofs, const struct path *path) +{ + int res; + + /* Only directories can have passthrough xattr */ + if (!d_is_dir(path->dentry)) + return false; + + res = ovl_path_getxattr(ofs, path, OVL_XATTR_PASSTHROUGH, NULL, 0); + return res >= 0; +} + /* * Load persistent uuid from xattr into s_uuid if found, or store a new * random generated value in s_uuid and in xattr. @@ -865,6 +888,7 @@ char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path, #define OVL_XATTR_METACOPY_POSTFIX "metacopy" #define OVL_XATTR_PROTATTR_POSTFIX "protattr" #define OVL_XATTR_XWHITEOUT_POSTFIX "whiteout" +#define OVL_XATTR_PASSTHROUGH_POSTFIX "passthrough" #define OVL_XATTR_TAB_ENTRY(x) \ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ @@ -881,6 +905,7 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_PASSTHROUGH), }; int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, -- 2.51.1