Commit 6ea42bbc by rfkelly0

S3FS: more robust detection of keys that represent subdirs

parent e9c3bf44
...@@ -15,7 +15,7 @@ implementations of this interface such as: ...@@ -15,7 +15,7 @@ implementations of this interface such as:
""" """
__version__ = "0.4.0b1" __version__ = "0.4.0b2"
__author__ = "Will McGugan (will@willmcgugan.com)" __author__ = "Will McGugan (will@willmcgugan.com)"
# 'base' imports * from 'path' and 'errors', so their # 'base' imports * from 'path' and 'errors', so their
......
...@@ -395,6 +395,13 @@ class S3FS(FS): ...@@ -395,6 +395,13 @@ class S3FS(FS):
raise ResourceInvalidError(path,msg=msg) raise ResourceInvalidError(path,msg=msg)
raise ResourceNotFoundError(path) raise ResourceNotFoundError(path)
def _key_is_dir(self, k):
if isinstance(k,Prefix):
return True
if k.name.endswith(self._separator):
return True
return False
def _filter_keys(self,path,keys,wildcard,full,absolute, def _filter_keys(self,path,keys,wildcard,full,absolute,
dirs_only,files_only): dirs_only,files_only):
"""Filter out keys not matching the given criteria. """Filter out keys not matching the given criteria.
...@@ -406,9 +413,9 @@ class S3FS(FS): ...@@ -406,9 +413,9 @@ class S3FS(FS):
if dirs_only and files_only: if dirs_only and files_only:
raise ValueError("dirs_only and files_only can not both be True") raise ValueError("dirs_only and files_only can not both be True")
if dirs_only: if dirs_only:
keys = ((nm,k) for (nm,k) in keys if k.name.endswith(sep)) keys = ((nm,k) for (nm,k) in keys if self._key_is_dir(k))
elif files_only: elif files_only:
keys = ((nm,k) for (nm,k) in keys if not k.name.endswith(sep)) keys = ((nm,k) for (nm,k) in keys if not self._key_is_dir(k))
if wildcard is not None: if wildcard is not None:
if callable(wildcard): if callable(wildcard):
keys = ((nm,k) for (nm,k) in keys if wildcard(nm)) keys = ((nm,k) for (nm,k) in keys if wildcard(nm))
...@@ -546,7 +553,7 @@ class S3FS(FS): ...@@ -546,7 +553,7 @@ class S3FS(FS):
info["name"] = basename(name) info["name"] = basename(name)
else: else:
info["name"] = basename(self._uns3key(k.name)) info["name"] = basename(self._uns3key(k.name))
if isinstance(key,Prefix): if self._key_is_dir(key):
info["st_mode"] = 0700 | statinfo.S_IFDIR info["st_mode"] = 0700 | statinfo.S_IFDIR
else: else:
info["st_mode"] = 0700 | statinfo.S_IFREG info["st_mode"] = 0700 | statinfo.S_IFREG
...@@ -554,6 +561,8 @@ class S3FS(FS): ...@@ -554,6 +561,8 @@ class S3FS(FS):
info['size'] = int(key.size) info['size'] = int(key.size)
etag = getattr(key,"etag",None) etag = getattr(key,"etag",None)
if etag is not None: if etag is not None:
if isinstance(etag,unicode):
etag = etag.encode("utf8")
info['etag'] = etag.strip('"').strip("'") info['etag'] = etag.strip('"').strip("'")
if hasattr(key,"last_modified"): if hasattr(key,"last_modified"):
# TODO: does S3 use any other formats? # TODO: does S3 use any other formats?
...@@ -633,7 +642,7 @@ class S3FS(FS): ...@@ -633,7 +642,7 @@ class S3FS(FS):
yield item yield item
else: else:
prefix = self._s3path(path) prefix = self._s3path(path)
for k in self._s3bukt.list(prefix=prefix): for k in self._s3bukt.list(prefix=prefix):
name = relpath(self._uns3path(k.name,prefix)) name = relpath(self._uns3path(k.name,prefix))
if name != "": if name != "":
if not isinstance(name,unicode): if not isinstance(name,unicode):
...@@ -648,6 +657,34 @@ class S3FS(FS): ...@@ -648,6 +657,34 @@ class S3FS(FS):
continue continue
yield pathjoin(path,name) yield pathjoin(path,name)
def walkinfo(self,
path="/",
wildcard=None,
dir_wildcard=None,
search="breadth",
ignore_errors=False ):
if search != "breadth" or dir_wildcard is not None:
args = (wildcard,dir_wildcard,search,ignore_errors)
for item in super(S3FS,self).walkfiles(path,*args):
yield (item,self.getinfo(item))
else:
prefix = self._s3path(path)
for k in self._s3bukt.list(prefix=prefix):
name = relpath(self._uns3path(k.name,prefix))
if name != "":
if not isinstance(name,unicode):
name = name.decode("utf8")
if wildcard is not None:
if callable(wildcard):
if not wildcard(basename(name)):
continue
else:
if not fnmatch(basename(name),wildcard):
continue
yield (pathjoin(path,name),self._get_key_info(k,name))
def walkfilesinfo(self, def walkfilesinfo(self,
path="/", path="/",
wildcard=None, wildcard=None,
...@@ -660,7 +697,7 @@ class S3FS(FS): ...@@ -660,7 +697,7 @@ class S3FS(FS):
yield (item,self.getinfo(item)) yield (item,self.getinfo(item))
else: else:
prefix = self._s3path(path) prefix = self._s3path(path)
for k in self._s3bukt.list(prefix=prefix): for k in self._s3bukt.list(prefix=prefix):
name = relpath(self._uns3path(k.name,prefix)) name = relpath(self._uns3path(k.name,prefix))
if name != "": if name != "":
if not isinstance(name,unicode): if not isinstance(name,unicode):
......
...@@ -54,7 +54,7 @@ class LazyFS(WrapFS): ...@@ -54,7 +54,7 @@ class LazyFS(WrapFS):
return state return state
def __setstate__(self, state): def __setstate__(self, state):
self.__dict__.update(state) super(LazyFS,self).__setstate__(state)
self._lazy_creation_lock = Lock() self._lazy_creation_lock = Lock()
def _get_wrapped_fs(self): def _get_wrapped_fs(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment