Commit 62e77b6a by willmcgugan

Optimized FTP fs by caching directory structure

parent 72f3d49f
......@@ -26,4 +26,4 @@
* New FS implementation:
* FTPFS: access a plain old FTP server
* ReadOnlyFS: a WrapFS that makes an fs read-only
* Added cache_hint method to base.py
......@@ -144,6 +144,16 @@ class FS(object):
if not getattr(self, 'closed', True):
self.close()
def cache_hint(self, enabled):
"""Recommends the use of caching. Implementations are free to use or
ignore this value.
:param enabled: If True the implementation is permitted to cache directory
structure / file info.
"""
pass
def close(self):
self.closed = True
......@@ -289,6 +299,39 @@ class FS(object):
"""
raise UnsupportedError("list directory")
def listdirinfo(self, path="./",
wildcard=None,
full=False,
absolute=False,
dirs_only=False,
files_only=False):
"""Retrieves an iterable of paths and path info (as returned by getinfo) under
a given path.
:param path: Root of the path to list
:param wildcard: Filter paths that mach this wildcard
:dirs_only: Return only directory paths
:files_only: Return only files
:raises ResourceNotFoundError: If the path is not found
:raises ResourceInvalidError: If the path exists, but is not a directory
"""
def get_path(p):
if not full:
return pathjoin(path, p)
return [(p, self.getinfo(get_path(p)))
for p in self._listdir( path,
widcard=wildcard,
full=full,
absolute=absolute,
dirs_only=dirs_only,
files_only=files_only )]
def _listdir_helper(self, path, entries,
wildcard=None,
......
......@@ -35,7 +35,7 @@ class InfoFrame(wx.Frame):
self.list_ctrl.SetColumnWidth(1, 300)
for key in keys:
self.list_ctrl.Append((key, repr(info.get(key))))
self.list_ctrl.Append((key, str(info.get(key))))
......
......@@ -672,6 +672,8 @@ class _FTPFile(object):
if self.ftp is not None:
self.ftp.close()
self.closed = True
if 'w' in self.mode or 'a' in self.mode:
self.ftpfs._on_file_written(self.path)
def __iter__(self):
return self.next()
......@@ -709,12 +711,20 @@ class _FTPFile(object):
def ftperrors(f):
@wraps(f)
def deco(self, *args, **kwargs):
self._lock.acquire()
try:
self._enter_dircache()
try:
try:
ret = f(self, *args, **kwargs)
except Exception, e:
#import traceback
#traceback.print_exc()
self._translate_exception(args[0] if args else '', e)
finally:
self._leave_dircache()
finally:
self._lock.release()
if not self.use_dircache:
self.clear_dircache()
return ret
return deco
......@@ -731,7 +741,7 @@ class FTPFS(FS):
def __init__(self, host='', user='', passwd='', acct='', timeout=_GLOBAL_DEFAULT_TIMEOUT,
port=21,
dircache=False,
dircache=True,
max_buffer_size=128*1024*1024):
"""
:param host:
......@@ -739,8 +749,8 @@ class FTPFS(FS):
:param passwd:
:param timeout:
:param dircache: If True then directory information will be cached,
which will speed up operations such as isdir and isfile, but changes
to the ftp file structure will not be visible (till clear_dircache) is
which will speed up operations such as getinfo, isdi, isfile, but changes
to the ftp file structure will not be visible untill clear_dircache is
called
:param max_buffer_size: Number of bytes to hold before blocking write operations.
......@@ -755,14 +765,108 @@ class FTPFS(FS):
self.acct = acct
self.timeout = timeout
self._dircache = {}
self.use_dircache = dircache
self.get_dircache()
self.max_buffer_size = max_buffer_size
self._cache_hint = False
self._locals._ftp = None
self._thread_ftps = set()
self.ftp
@synchronize
def cache_hint(self, enabled):
self._cache_hint = enabled
@synchronize
def _enter_dircache(self):
self.get_dircache()
count = getattr(self._locals, '_dircache_count', 0)
count += 1
self._locals._dircache_count = count
@synchronize
def _leave_dircache(self):
self._locals._dircache_count -= 1
if not self._locals._dircache_count and not self._cache_hint:
self.clear_dircache()
assert self._locals._dircache_count >= 0, "dircache count should never be negative"
@synchronize
def get_dircache(self):
dircache = getattr(self._locals, '_dircache', None)
if dircache is None:
dircache = {}
self._locals._dircache = dircache
self._locals._dircache_count = 0
return dircache
@synchronize
def _on_file_written(self, path):
self.clear_dircache(dirname(path))
@synchronize
def _readdir(self, path):
dircache = self.get_dircache()
dircache_count = self._locals._dircache_count
if dircache_count:
cached_dirlist = dircache.get(path)
if cached_dirlist is not None:
return cached_dirlist
dirlist = {}
parser = FTPListDataParser()
def on_line(line):
#print repr(line)
if not isinstance(line, unicode):
line = line.decode('utf-8')
info = parser.parse_line(line)
if info:
info = info.__dict__
dirlist[info['name']] = info
try:
self.ftp.dir(_encode(path), on_line)
except error_reply:
pass
dircache[path] = dirlist
return dirlist
@synchronize
def clear_dircache(self, *paths):
"""
Clear cached directory information.
:path: Path of directory to clear cache for, or all directories if
None (the default)
"""
dircache = self.get_dircache()
if not paths:
dircache.clear()
else:
for path in paths:
dircache.pop(path, None)
@synchronize
def _check_path(self, path):
base, fname = pathsplit(abspath(path))
dirlist = self._readdir(base)
if fname and fname not in dirlist:
raise ResourceNotFoundError(path)
return dirlist, fname
def _get_dirlist(self, path):
base, fname = pathsplit(abspath(path))
dirlist = self._readdir(base)
return dirlist, fname
@synchronize
def get_ftp(self):
......@@ -771,6 +875,7 @@ class FTPFS(FS):
ftp = self._locals._ftp
self._thread_ftps.add(ftp)
return self._locals._ftp
@synchronize
def set_ftp(self, ftp):
self._locals._ftp = ftp
ftp = property(get_ftp, set_ftp)
......@@ -826,83 +931,28 @@ class FTPFS(FS):
raise ResourceNotFoundError(path)
raise PermissionDeniedError(str(exception), path=path, msg="FTP error: %s (see details)" % str(exception), details=exception)
raise exception
@ftperrors
@synchronize
def close(self):
for ftp in self._thread_ftps:
ftp.close()
self._thread_ftps.clear()
self.closed = True
@ftperrors
@synchronize
def open(self, path, mode='r'):
mode = mode.lower()
if 'r' in mode:
if not self.isfile(path):
raise ResourceNotFoundError(path)
if 'w' in mode or 'a' in mode:
self.clear_dircache(dirname(path))
ftp = self._open_ftp()
f = _FTPFile(self, ftp, path, mode)
return f
@synchronize
def _readdir(self, path):
if self.use_dircache:
cached_dirlist = self._dircache.get(path)
if cached_dirlist is not None:
return cached_dirlist
dirlist = {}
parser = FTPListDataParser()
def on_line(line):
#print repr(line)
if not isinstance(line, unicode):
line = line.decode('utf-8')
info = parser.parse_line(line)
if info:
info = info.__dict__
dirlist[info['name']] = info
try:
self.ftp.dir(_encode(path), on_line)
except error_reply:
pass
self._dircache[path] = dirlist
return dirlist
@synchronize
def clear_dircache(self, path=None):
"""
Clear cached directory information.
:path: Path of directory to clear cache for, or all directories if
None (the default)
"""
if path is None:
self._dircache.clear()
if path in self._dircache:
del self._dircache[path]
@synchronize
@ftperrors
def _check_path(self, path, ignore_missing=False):
base, fname = pathsplit(abspath(path))
dirlist = self._readdir(base)
if fname and fname not in dirlist:
raise ResourceNotFoundError(path)
return dirlist, fname
def _get_dirlist(self, path):
base, fname = pathsplit(abspath(path))
dirlist = self._readdir(base)
return dirlist, fname
@synchronize
@ftperrors
def exists(self, path):
if path in ('', '/'):
......@@ -910,7 +960,6 @@ class FTPFS(FS):
dirlist, fname = self._get_dirlist(path)
return fname in dirlist
@synchronize
@ftperrors
def isdir(self, path):
if path in ('', '/'):
......@@ -921,7 +970,6 @@ class FTPFS(FS):
return False
return info['try_cwd']
@synchronize
@ftperrors
def isfile(self, path):
if path in ('', '/'):
......@@ -933,7 +981,6 @@ class FTPFS(FS):
return not info['try_cwd']
@ftperrors
@synchronize
def listdir(self, path="./", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False):
path = normpath(path)
if not self.exists(path):
......@@ -946,11 +993,11 @@ class FTPFS(FS):
@ftperrors
@synchronize
def makedir(self, path, recursive=False, allow_recreate=False):
if path in ('', '/'):
return
def checkdir(path):
self.clear_dircache(dirname(path), path)
try:
self.ftp.mkd(_encode(path))
except error_reply:
......@@ -966,7 +1013,7 @@ class FTPFS(FS):
for p in recursepath(path):
checkdir(p)
else:
base, dirname = pathsplit(path)
base = dirname(path)
if not self.exists(base):
raise ParentDirectoryMissingError(path)
......@@ -977,19 +1024,18 @@ class FTPFS(FS):
raise DestinationExistsError(path)
checkdir(path)
@ftperrors
@synchronize
def remove(self, path):
if not self.exists(path):
raise ResourceNotFoundError(path)
if not self.isfile(path):
raise ResourceInvalidError(path)
self.clear_dircache(dirname(path))
self.ftp.delete(_encode(path))
@ftperrors
@synchronize
def removedir(self, path, recursive=False, force=False):
if not self.exists(path):
raise ResourceNotFoundError(path)
if self.isfile(path):
......@@ -1008,6 +1054,7 @@ class FTPFS(FS):
self.removedir(rpath, force=force)
except FSError:
pass
self.clear_dircache(dirname(path), path)
self.ftp.rmd(_encode(path))
except error_reply:
pass
......@@ -1018,15 +1065,14 @@ class FTPFS(FS):
pass
@ftperrors
@synchronize
def rename(self, src, dst):
self.clear_dircache(dirname(src), dirname(dst), src, dst)
try:
self.ftp.rename(_encode(src), _encode(dst))
except error_reply:
pass
@ftperrors
@synchronize
def getinfo(self, path):
dirlist, fname = self._check_path(path)
if not fname:
......@@ -1037,8 +1083,16 @@ class FTPFS(FS):
return info
@ftperrors
@synchronize
def getsize(self, path):
size = None
if self._locals._dircache_count:
dirlist, fname = self._check_path(path)
size = dirlist[fname].get('size')
if size is not None:
return size
self.ftp.sendcmd('TYPE I')
size = self.ftp.size(_encode(path))
if size is None:
......@@ -1049,7 +1103,6 @@ class FTPFS(FS):
return size
@ftperrors
@synchronize
def desc(self, path):
dirlist, fname = self._check_path(path)
if fname not in dirlist:
......@@ -1057,11 +1110,11 @@ class FTPFS(FS):
return dirlist[fname].get('raw_line', 'No description available')
@ftperrors
@synchronize
def move(self, src, dst, overwrite=False, chunk_size=16384):
if not overwrite and self.exists(dst):
raise DestinationExistsError(dst)
self.clear_dircache(dirname(src), dirname(dst))
try:
self.rename(src, dst)
except error_reply:
......@@ -1070,14 +1123,25 @@ class FTPFS(FS):
self.copy(src, dst)
self.remove(src)
@ftperrors
def movedir(self, src, dst, overwrite=False, ignore_errors=False, chunk_size=16384):
self.clear_dircache(src, dst, dirname(src), dirname(dst))
super(FTPFS, self).movedir(src, dst, overwrite, ignore_errors, chunk_size)
@ftperrors
def copydir(self, src, dst, overwrite=False, ignore_errors=False, chunk_size=16384):
self.clear_dircache(src, dst, dirname(src), dirname(dst))
super(FTPFS, self).copydir(src, dst, overwrite, ignore_errors, chunk_size)
if __name__ == "__main__":
ftp_fs = FTPFS('ftp.ncsa.uiuc.edu')
#from fs.browsewin import browse
#browse(ftp_fs)
ftp_fs.cache_hint(True)
from fs.browsewin import browse
browse(ftp_fs)
ftp_fs = FTPFS('127.0.0.1', 'user', '12345', dircache=True)
#ftp_fs = FTPFS('127.0.0.1', 'user', '12345', dircache=True)
#f = ftp_fs.open('testout.txt', 'w')
#f.write("Testing writing to an ftp file!")
#f.write("\nHai!")
......@@ -1105,6 +1169,5 @@ if __name__ == "__main__":
#print ftp_fs.getsize('test.txt')
from fs.browsewin import browse
browse(ftp_fs)
\ No newline at end of file
#from fs.browsewin import browse
#browse(ftp_fs)
......@@ -72,7 +72,3 @@ if __name__ == "__main__":
ftpd = ftpserver.FTPServer(address, handler)
ftpd.serve_forever()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment