Commit 9e90d216 by btimby

Overhauled most of the ArchiveMountFS file system method implementations so that

they behave properly when dealing with archive files (instead of archive contents).

A couple of these changes were also driven by the need to mix ArchiveFS mount points
with mount points hosting other file systems (like remote file systems).
parent 4de31c2d
...@@ -178,25 +178,38 @@ class ArchiveMountFS(mountfs.MountFS): ...@@ -178,25 +178,38 @@ class ArchiveMountFS(mountfs.MountFS):
'''A subclass of MountFS that automatically identifies archives. Once identified '''A subclass of MountFS that automatically identifies archives. Once identified
archives are mounted in place of the archive file.''' archives are mounted in place of the archive file.'''
def __init__(self, rootfs, auto_mount=True, max_size=None): def __init__(self, rootfs, auto_close=True, auto_mount=True, max_size=None):
self.auto_mount = auto_mount self.auto_mount = auto_mount
self.max_size = max_size self.max_size = max_size
super(ArchiveMountFS, self).__init__(auto_close=True) super(ArchiveMountFS, self).__init__(auto_close=auto_close)
self.rootfs = rootfs self.rootfs = rootfs
self.mountdir('/', rootfs) self.mountdir('/', rootfs)
def __del__(self): def __del__(self):
# Close automatically. # Close (if requested by auto_close, why by default is True) when
# de-referenced.
self.close() self.close()
def ismount(self, path): def ismount(self, path):
"Checks if the given path has a file system mounted on it."
try: try:
object = self.mount_tree[path] object = self.mount_tree[path]
except KeyError: except KeyError:
return False return False
return type(object) is mountfs.MountFS.DirMount return isinstance(object, mountfs.MountFS.DirMount)
def _delegate(self, path, auto_mount=True): def _delegate(self, path, auto_mount=True):
"""A _delegate() override that will automatically mount archives that are
encountered in the path. For example, the path /foo/bar.zip/baz.txt contains
the archive path /foo/bar.zip. If this archive can be mounted by ArchiveFS,
it will be. Then the file system call will be delegated to that mounted file
system, which will act upon /baz.txt within the archive. This is lazy
initialization which means users of this class need not crawl the file system
for archives and mount them all up-front.
This behavior can be overridden by self.auto_mount=False or by passing the
auto_mount=False keyword argument.
"""
if self.auto_mount and auto_mount: if self.auto_mount and auto_mount:
for ppath in recursepath(path)[1:]: for ppath in recursepath(path)[1:]:
if self.ismount(ppath): if self.ismount(ppath):
...@@ -227,94 +240,121 @@ class ArchiveMountFS(mountfs.MountFS): ...@@ -227,94 +240,121 @@ class ArchiveMountFS(mountfs.MountFS):
continue continue
return super(ArchiveMountFS, self)._delegate(path) return super(ArchiveMountFS, self)._delegate(path)
def getsyspath(self, path): def getsyspath(self, path, allow_none=False):
"""Optimized getsyspath() that avoids calling _delegate() and thus """A getsyspath() override that returns paths relative to the root fs."""
mounting an archive.""" root = self.rootfs.getsyspath('/', allow_none=allow_none)
return self.rootfs.getsyspath(path) if root:
return join(root, path.lstrip('/'))
def open(self, path, *args, **kwargs):
"""An open() override that opens an archive. It is not fooled by mounted
archives. If the path is a mounted archive, it is unmounted and the archive
file is opened and returned."""
if libarchive.is_archive_name(path) and self.ismount(path):
self.unmount(path)
fs, _mount_path, delegate_path = self._delegate(path, auto_mount=False)
return fs.open(delegate_path, *args, **kwargs)
def getinfo(self, path): def getinfo(self, path):
"Optimized getinfo() that skips mounting an archive to get it's info." """A getinfo() override that allows archives to masqueraded as directories.
path = normpath(path).lstrip('/') If the path is not an archive, the call is delegated. In the event that the
if libarchive.is_archive_name(path): path is an archive, that archive is mounted to ensure it can actually be
# Skip trying to mount the archive and just get it's info. treaded like a directory."""
fs, _mount_path, delegate_path = self._delegate(path)
if isinstance(fs, ArchiveFS) and path == _mount_path:
info = self.rootfs.getinfo(path) info = self.rootfs.getinfo(path)
# Masquerade as a directory.
info['st_mode'] = info.get('st_mode', 0) | stat.S_IFDIR info['st_mode'] = info.get('st_mode', 0) | stat.S_IFDIR
return info return info
return super(ArchiveMountFS, self).getinfo(path) return super(ArchiveMountFS, self).getinfo(path)
def isdir(self, path):
"""An isdir() override that allows archives to masquerade as directories. If
the path is not an archive, the call is delegated. In the event that the path
is an archive, that archive is mounted to ensure it can actually be treated
like a directory."""
fs, _mount_path, delegate_path = self._delegate(path)
if isinstance(fs, ArchiveFS) and path == _mount_path:
# If the path is an archive mount point, it is a directory.
return True
return super(ArchiveMountFS, self).isdir(path)
def isfile(self, path):
"""An isfile() override that checks if the given path is a file or not. It is
not fooled by a mounted archive. If the path is not an archive, the call is
delegated."""
fs, _mount_path, delegate_path = self._delegate(path, auto_mount=False)
if isinstance(fs, ArchiveFS) and path == _mount_path:
# If the path is an archive mount point, it is a file.
return True
else:
return fs.isfile(delegate_path)
def getsize(self, path): def getsize(self, path):
"Optimized getsize() that skips mounting an archive to get is' size." """A getsize() override that returns the size of an archive. It is not fooled by
path = normpath(path).lstrip('/') a mounted archive. If the path is not an archive, the call is delegated."""
if libarchive.is_archive_name(path): fs, _mount_path, delegate_path = self._delegate(path, auto_mount=False)
if isinstance(fs, ArchiveFS) and path == _mount_path:
return self.rootfs.getsize(path) return self.rootfs.getsize(path)
return super(ArchiveMountFS, self).getsize(path) else:
return fs.getsize(delegate_path)
def remove(self, path): def remove(self, path):
"Optimized remove() that deletes an archive directly." """A remove() override that deletes an archive directly. It is not fooled
path = normpath(path).lstrip('/') by a mounted archive. If the path is not an archive, the call is delegated."""
if self.ismount(path) and libarchive.is_archive_name(path): if libarchive.is_archive_name(path) and self.ismount(path):
# Ensure a mount archive is unmounted before it is deleted.
self.unmount(path) self.unmount(path)
if libarchive.is_archive_name(path): fs, _mount_path, delegate_path = self._delegate(path, auto_mount=False)
# Send the delete directoy to the root filesystem. This avoids return fs.remove(delegate_path)
# being delegated, and the fs we just unmounted being remounted.
return self.rootfs.remove(path)
# Otherwise, just delegate to the responsible fs.
return super(ArchiveMountFS, self).remove(path)
def makedir(self, path, *args, **kwargs): def makedir(self, path, *args, **kwargs):
"""A makedir() override that handles creation of a directory at an archive
location properly. If the path is not an archive, the call is delegated."""
# If the caller is trying to create a directory where an archive lives # If the caller is trying to create a directory where an archive lives
# we should raise an error. In the case when allow_recreate=True, this # we should raise an error. In the case when allow_recreate=True, this
# call would succeed without the check below. # call would succeed without the check below.
if self.rootfs.isfile(path): fs, _mount_path, delegate_path = self._delegate(path, auto_mount=False)
if isinstance(fs, ArchiveFS) and path == _mount_path:
raise ResourceInvalidError(path, msg="Cannot create directory, there's " raise ResourceInvalidError(path, msg="Cannot create directory, there's "
"already a file of that name: %(path)s") "already a file of that name: %(path)s")
return super(ArchiveMountFS, self).makedir(path, *args, **kwargs) return fs.makedir(delegate_path, *args, **kwargs)
def copy(self, src, dst, **kwargs): def copy(self, src, dst, overwrite=False, chunk_size=1024*64):
"""An optimized copy() that will skip mounting an archive if one is involved """An optimized copy() that will skip mounting an archive if one is involved
as either the src or dst. It tries to be smart and delegate as much work as as either the src or dst. This allows the file containing the archive to be
possible.""" copied."""
src = normpath(src).lstrip('/')
dst = normpath(dst).lstrip('/')
# If src or dst are an archive unmount them. Then delegate their path and allow mounting
# only if the path itself does not point at an archive.
src_is_archive = libarchive.is_archive_name(src)
if src_is_archive and self.ismount(src):
self.unmount(src)
fs1, _mount_path1, delegate_path1 = self._delegate(src, auto_mount=(not src_is_archive))
dst_is_archive = libarchive.is_archive_name(dst)
if dst_is_archive and self.ismount(dst):
self.unmount(dst)
fs2, _mount_path2, delegate_path2 = self._delegate(dst, auto_mount=(not dst_is_archive))
# Use the same logic that appears in MountFS:
if fs1 is fs2 and fs1 is not self:
fs1.copy(delegate_path1, delegate_path2, **kwargs)
else:
super(ArchiveMountFS, self).copy(src, dst, **kwargs)
def move(self, src, dst, **kwargs):
"""An optimized move() that does not bother mounting an archive to perform a move.
It actually uses copy() then remove() to do it's work, since both of those are
already "safe"."""
src = normpath(src).lstrip('/')
dst = normpath(dst).lstrip('/')
# If src or dst are an archive unmount them. Then delegate their path and allow mounting
# only if the path itself does not point at an archive.
src_is_archive = libarchive.is_archive_name(src) src_is_archive = libarchive.is_archive_name(src)
# If src path is a mounted archive, unmount it.
if src_is_archive and self.ismount(src): if src_is_archive and self.ismount(src):
self.unmount(src) self.unmount(src)
fs1, _mount_path1, delegate_path1 = self._delegate(src, auto_mount=(not src_is_archive)) # Now delegate the path, if the path is an archive, don't remount it.
srcfs, _ignored, src = self._delegate(src, auto_mount=(not src_is_archive))
# Follow the same steps for dst.
dst_is_archive = libarchive.is_archive_name(dst) dst_is_archive = libarchive.is_archive_name(dst)
if dst_is_archive and self.ismount(dst): if dst_is_archive and self.ismount(dst):
self.unmount(dst) self.unmount(dst)
fs2, _mount_path2, delegate_path2 = self._delegate(dst, auto_mount=(not dst_is_archive)) dstfs, _ignored, dst = self._delegate(dst, auto_mount=(not dst_is_archive))
# Use the same logic that appears in MountFS: # srcfs, src and dstfs, dst are now the file system and path for our src and dst.
if fs1 is fs2 and fs1 is not self: if srcfs is dstfs and srcfs is not self:
fs1.move(delegate_path1, delegate_path2, **kwargs) # Both src and dst are on the same fs, let it do the copy.
srcfs.copy(src, dst, **kwargs)
else: else:
super(ArchiveMountFS, self).move(src, dst, **kwargs) # Src and dst are on different file systems. Just do the copy...
srcfd = None
try:
srcfd = srcfs.open(src, 'rb')
dstfs.setcontents(dst, srcfd, chunk_size=chunk_size)
except ResourceNotFoundError:
if srcfs.exists(src) and not dstfs.exists(dirname(dst)):
raise ParentDirectoryMissingError(dst)
finally:
if srcfd:
srcfd.close()
def move(self, src, dst, overwrite=False, chunk_size=1024*64):
"""An optimized move() that delegates the work to the overridden copy() and
remove() methods."""
self.copy(src, dst, overwrite=overwrite, chunk_size=chunk_size)
self.remove(src)
def main(): def main():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment