Commit 62e77b6a by willmcgugan

Optimized FTP fs by caching directory structure

parent 72f3d49f
...@@ -26,4 +26,4 @@ ...@@ -26,4 +26,4 @@
* New FS implementation: * New FS implementation:
* FTPFS: access a plain old FTP server * FTPFS: access a plain old FTP server
* ReadOnlyFS: a WrapFS that makes an fs read-only * ReadOnlyFS: a WrapFS that makes an fs read-only
* Added cache_hint method to base.py
...@@ -144,6 +144,16 @@ class FS(object): ...@@ -144,6 +144,16 @@ class FS(object):
if not getattr(self, 'closed', True): if not getattr(self, 'closed', True):
self.close() self.close()
def cache_hint(self, enabled):
"""Recommends the use of caching. Implementations are free to use or
ignore this value.
:param enabled: If True the implementation is permitted to cache directory
structure / file info.
"""
pass
def close(self): def close(self):
self.closed = True self.closed = True
...@@ -289,6 +299,39 @@ class FS(object): ...@@ -289,6 +299,39 @@ class FS(object):
""" """
raise UnsupportedError("list directory") raise UnsupportedError("list directory")
def listdirinfo(self, path="./",
wildcard=None,
full=False,
absolute=False,
dirs_only=False,
files_only=False):
"""Retrieves an iterable of paths and path info (as returned by getinfo) under
a given path.
:param path: Root of the path to list
:param wildcard: Filter paths that mach this wildcard
:dirs_only: Return only directory paths
:files_only: Return only files
:raises ResourceNotFoundError: If the path is not found
:raises ResourceInvalidError: If the path exists, but is not a directory
"""
def get_path(p):
if not full:
return pathjoin(path, p)
return [(p, self.getinfo(get_path(p)))
for p in self._listdir( path,
widcard=wildcard,
full=full,
absolute=absolute,
dirs_only=dirs_only,
files_only=files_only )]
def _listdir_helper(self, path, entries, def _listdir_helper(self, path, entries,
wildcard=None, wildcard=None,
......
...@@ -35,7 +35,7 @@ class InfoFrame(wx.Frame): ...@@ -35,7 +35,7 @@ class InfoFrame(wx.Frame):
self.list_ctrl.SetColumnWidth(1, 300) self.list_ctrl.SetColumnWidth(1, 300)
for key in keys: for key in keys:
self.list_ctrl.Append((key, repr(info.get(key)))) self.list_ctrl.Append((key, str(info.get(key))))
......
...@@ -672,6 +672,8 @@ class _FTPFile(object): ...@@ -672,6 +672,8 @@ class _FTPFile(object):
if self.ftp is not None: if self.ftp is not None:
self.ftp.close() self.ftp.close()
self.closed = True self.closed = True
if 'w' in self.mode or 'a' in self.mode:
self.ftpfs._on_file_written(self.path)
def __iter__(self): def __iter__(self):
return self.next() return self.next()
...@@ -709,12 +711,20 @@ class _FTPFile(object): ...@@ -709,12 +711,20 @@ class _FTPFile(object):
def ftperrors(f): def ftperrors(f):
@wraps(f) @wraps(f)
def deco(self, *args, **kwargs): def deco(self, *args, **kwargs):
self._lock.acquire()
try:
self._enter_dircache()
try:
try: try:
ret = f(self, *args, **kwargs) ret = f(self, *args, **kwargs)
except Exception, e: except Exception, e:
#import traceback
#traceback.print_exc()
self._translate_exception(args[0] if args else '', e) self._translate_exception(args[0] if args else '', e)
finally:
self._leave_dircache()
finally:
self._lock.release()
if not self.use_dircache:
self.clear_dircache()
return ret return ret
return deco return deco
...@@ -731,7 +741,7 @@ class FTPFS(FS): ...@@ -731,7 +741,7 @@ class FTPFS(FS):
def __init__(self, host='', user='', passwd='', acct='', timeout=_GLOBAL_DEFAULT_TIMEOUT, def __init__(self, host='', user='', passwd='', acct='', timeout=_GLOBAL_DEFAULT_TIMEOUT,
port=21, port=21,
dircache=False, dircache=True,
max_buffer_size=128*1024*1024): max_buffer_size=128*1024*1024):
""" """
:param host: :param host:
...@@ -739,8 +749,8 @@ class FTPFS(FS): ...@@ -739,8 +749,8 @@ class FTPFS(FS):
:param passwd: :param passwd:
:param timeout: :param timeout:
:param dircache: If True then directory information will be cached, :param dircache: If True then directory information will be cached,
which will speed up operations such as isdir and isfile, but changes which will speed up operations such as getinfo, isdi, isfile, but changes
to the ftp file structure will not be visible (till clear_dircache) is to the ftp file structure will not be visible untill clear_dircache is
called called
:param max_buffer_size: Number of bytes to hold before blocking write operations. :param max_buffer_size: Number of bytes to hold before blocking write operations.
...@@ -755,14 +765,108 @@ class FTPFS(FS): ...@@ -755,14 +765,108 @@ class FTPFS(FS):
self.acct = acct self.acct = acct
self.timeout = timeout self.timeout = timeout
self._dircache = {}
self.use_dircache = dircache self.use_dircache = dircache
self.get_dircache()
self.max_buffer_size = max_buffer_size self.max_buffer_size = max_buffer_size
self._cache_hint = False
self._locals._ftp = None self._locals._ftp = None
self._thread_ftps = set() self._thread_ftps = set()
self.ftp self.ftp
@synchronize
def cache_hint(self, enabled):
self._cache_hint = enabled
@synchronize
def _enter_dircache(self):
self.get_dircache()
count = getattr(self._locals, '_dircache_count', 0)
count += 1
self._locals._dircache_count = count
@synchronize
def _leave_dircache(self):
self._locals._dircache_count -= 1
if not self._locals._dircache_count and not self._cache_hint:
self.clear_dircache()
assert self._locals._dircache_count >= 0, "dircache count should never be negative"
@synchronize
def get_dircache(self):
dircache = getattr(self._locals, '_dircache', None)
if dircache is None:
dircache = {}
self._locals._dircache = dircache
self._locals._dircache_count = 0
return dircache
@synchronize
def _on_file_written(self, path):
self.clear_dircache(dirname(path))
@synchronize
def _readdir(self, path):
dircache = self.get_dircache()
dircache_count = self._locals._dircache_count
if dircache_count:
cached_dirlist = dircache.get(path)
if cached_dirlist is not None:
return cached_dirlist
dirlist = {}
parser = FTPListDataParser()
def on_line(line):
#print repr(line)
if not isinstance(line, unicode):
line = line.decode('utf-8')
info = parser.parse_line(line)
if info:
info = info.__dict__
dirlist[info['name']] = info
try:
self.ftp.dir(_encode(path), on_line)
except error_reply:
pass
dircache[path] = dirlist
return dirlist
@synchronize
def clear_dircache(self, *paths):
"""
Clear cached directory information.
:path: Path of directory to clear cache for, or all directories if
None (the default)
"""
dircache = self.get_dircache()
if not paths:
dircache.clear()
else:
for path in paths:
dircache.pop(path, None)
@synchronize
def _check_path(self, path):
base, fname = pathsplit(abspath(path))
dirlist = self._readdir(base)
if fname and fname not in dirlist:
raise ResourceNotFoundError(path)
return dirlist, fname
def _get_dirlist(self, path):
base, fname = pathsplit(abspath(path))
dirlist = self._readdir(base)
return dirlist, fname
@synchronize @synchronize
def get_ftp(self): def get_ftp(self):
...@@ -771,6 +875,7 @@ class FTPFS(FS): ...@@ -771,6 +875,7 @@ class FTPFS(FS):
ftp = self._locals._ftp ftp = self._locals._ftp
self._thread_ftps.add(ftp) self._thread_ftps.add(ftp)
return self._locals._ftp return self._locals._ftp
@synchronize
def set_ftp(self, ftp): def set_ftp(self, ftp):
self._locals._ftp = ftp self._locals._ftp = ftp
ftp = property(get_ftp, set_ftp) ftp = property(get_ftp, set_ftp)
...@@ -826,83 +931,28 @@ class FTPFS(FS): ...@@ -826,83 +931,28 @@ class FTPFS(FS):
raise ResourceNotFoundError(path) raise ResourceNotFoundError(path)
raise PermissionDeniedError(str(exception), path=path, msg="FTP error: %s (see details)" % str(exception), details=exception) raise PermissionDeniedError(str(exception), path=path, msg="FTP error: %s (see details)" % str(exception), details=exception)
raise exception raise exception
@ftperrors @ftperrors
@synchronize
def close(self): def close(self):
for ftp in self._thread_ftps: for ftp in self._thread_ftps:
ftp.close() ftp.close()
self._thread_ftps.clear()
self.closed = True self.closed = True
@ftperrors @ftperrors
@synchronize
def open(self, path, mode='r'): def open(self, path, mode='r'):
mode = mode.lower()
if 'r' in mode: if 'r' in mode:
if not self.isfile(path): if not self.isfile(path):
raise ResourceNotFoundError(path) raise ResourceNotFoundError(path)
if 'w' in mode or 'a' in mode:
self.clear_dircache(dirname(path))
ftp = self._open_ftp() ftp = self._open_ftp()
f = _FTPFile(self, ftp, path, mode) f = _FTPFile(self, ftp, path, mode)
return f return f
@synchronize
def _readdir(self, path):
if self.use_dircache:
cached_dirlist = self._dircache.get(path)
if cached_dirlist is not None:
return cached_dirlist
dirlist = {}
parser = FTPListDataParser()
def on_line(line):
#print repr(line)
if not isinstance(line, unicode):
line = line.decode('utf-8')
info = parser.parse_line(line)
if info:
info = info.__dict__
dirlist[info['name']] = info
try:
self.ftp.dir(_encode(path), on_line)
except error_reply:
pass
self._dircache[path] = dirlist
return dirlist
@synchronize
def clear_dircache(self, path=None):
"""
Clear cached directory information.
:path: Path of directory to clear cache for, or all directories if
None (the default)
"""
if path is None:
self._dircache.clear()
if path in self._dircache:
del self._dircache[path]
@synchronize
@ftperrors
def _check_path(self, path, ignore_missing=False):
base, fname = pathsplit(abspath(path))
dirlist = self._readdir(base)
if fname and fname not in dirlist:
raise ResourceNotFoundError(path)
return dirlist, fname
def _get_dirlist(self, path):
base, fname = pathsplit(abspath(path))
dirlist = self._readdir(base)
return dirlist, fname
@synchronize
@ftperrors @ftperrors
def exists(self, path): def exists(self, path):
if path in ('', '/'): if path in ('', '/'):
...@@ -910,7 +960,6 @@ class FTPFS(FS): ...@@ -910,7 +960,6 @@ class FTPFS(FS):
dirlist, fname = self._get_dirlist(path) dirlist, fname = self._get_dirlist(path)
return fname in dirlist return fname in dirlist
@synchronize
@ftperrors @ftperrors
def isdir(self, path): def isdir(self, path):
if path in ('', '/'): if path in ('', '/'):
...@@ -921,7 +970,6 @@ class FTPFS(FS): ...@@ -921,7 +970,6 @@ class FTPFS(FS):
return False return False
return info['try_cwd'] return info['try_cwd']
@synchronize
@ftperrors @ftperrors
def isfile(self, path): def isfile(self, path):
if path in ('', '/'): if path in ('', '/'):
...@@ -933,7 +981,6 @@ class FTPFS(FS): ...@@ -933,7 +981,6 @@ class FTPFS(FS):
return not info['try_cwd'] return not info['try_cwd']
@ftperrors @ftperrors
@synchronize
def listdir(self, path="./", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False): def listdir(self, path="./", wildcard=None, full=False, absolute=False, dirs_only=False, files_only=False):
path = normpath(path) path = normpath(path)
if not self.exists(path): if not self.exists(path):
...@@ -946,11 +993,11 @@ class FTPFS(FS): ...@@ -946,11 +993,11 @@ class FTPFS(FS):
@ftperrors @ftperrors
@synchronize
def makedir(self, path, recursive=False, allow_recreate=False): def makedir(self, path, recursive=False, allow_recreate=False):
if path in ('', '/'): if path in ('', '/'):
return return
def checkdir(path): def checkdir(path):
self.clear_dircache(dirname(path), path)
try: try:
self.ftp.mkd(_encode(path)) self.ftp.mkd(_encode(path))
except error_reply: except error_reply:
...@@ -966,7 +1013,7 @@ class FTPFS(FS): ...@@ -966,7 +1013,7 @@ class FTPFS(FS):
for p in recursepath(path): for p in recursepath(path):
checkdir(p) checkdir(p)
else: else:
base, dirname = pathsplit(path) base = dirname(path)
if not self.exists(base): if not self.exists(base):
raise ParentDirectoryMissingError(path) raise ParentDirectoryMissingError(path)
...@@ -977,19 +1024,18 @@ class FTPFS(FS): ...@@ -977,19 +1024,18 @@ class FTPFS(FS):
raise DestinationExistsError(path) raise DestinationExistsError(path)
checkdir(path) checkdir(path)
@ftperrors @ftperrors
@synchronize
def remove(self, path): def remove(self, path):
if not self.exists(path): if not self.exists(path):
raise ResourceNotFoundError(path) raise ResourceNotFoundError(path)
if not self.isfile(path): if not self.isfile(path):
raise ResourceInvalidError(path) raise ResourceInvalidError(path)
self.clear_dircache(dirname(path))
self.ftp.delete(_encode(path)) self.ftp.delete(_encode(path))
@ftperrors @ftperrors
@synchronize
def removedir(self, path, recursive=False, force=False): def removedir(self, path, recursive=False, force=False):
if not self.exists(path): if not self.exists(path):
raise ResourceNotFoundError(path) raise ResourceNotFoundError(path)
if self.isfile(path): if self.isfile(path):
...@@ -1008,6 +1054,7 @@ class FTPFS(FS): ...@@ -1008,6 +1054,7 @@ class FTPFS(FS):
self.removedir(rpath, force=force) self.removedir(rpath, force=force)
except FSError: except FSError:
pass pass
self.clear_dircache(dirname(path), path)
self.ftp.rmd(_encode(path)) self.ftp.rmd(_encode(path))
except error_reply: except error_reply:
pass pass
...@@ -1018,15 +1065,14 @@ class FTPFS(FS): ...@@ -1018,15 +1065,14 @@ class FTPFS(FS):
pass pass
@ftperrors @ftperrors
@synchronize
def rename(self, src, dst): def rename(self, src, dst):
self.clear_dircache(dirname(src), dirname(dst), src, dst)
try: try:
self.ftp.rename(_encode(src), _encode(dst)) self.ftp.rename(_encode(src), _encode(dst))
except error_reply: except error_reply:
pass pass
@ftperrors @ftperrors
@synchronize
def getinfo(self, path): def getinfo(self, path):
dirlist, fname = self._check_path(path) dirlist, fname = self._check_path(path)
if not fname: if not fname:
...@@ -1037,8 +1083,16 @@ class FTPFS(FS): ...@@ -1037,8 +1083,16 @@ class FTPFS(FS):
return info return info
@ftperrors @ftperrors
@synchronize
def getsize(self, path): def getsize(self, path):
size = None
if self._locals._dircache_count:
dirlist, fname = self._check_path(path)
size = dirlist[fname].get('size')
if size is not None:
return size
self.ftp.sendcmd('TYPE I') self.ftp.sendcmd('TYPE I')
size = self.ftp.size(_encode(path)) size = self.ftp.size(_encode(path))
if size is None: if size is None:
...@@ -1049,7 +1103,6 @@ class FTPFS(FS): ...@@ -1049,7 +1103,6 @@ class FTPFS(FS):
return size return size
@ftperrors @ftperrors
@synchronize
def desc(self, path): def desc(self, path):
dirlist, fname = self._check_path(path) dirlist, fname = self._check_path(path)
if fname not in dirlist: if fname not in dirlist:
...@@ -1057,11 +1110,11 @@ class FTPFS(FS): ...@@ -1057,11 +1110,11 @@ class FTPFS(FS):
return dirlist[fname].get('raw_line', 'No description available') return dirlist[fname].get('raw_line', 'No description available')
@ftperrors @ftperrors
@synchronize
def move(self, src, dst, overwrite=False, chunk_size=16384): def move(self, src, dst, overwrite=False, chunk_size=16384):
if not overwrite and self.exists(dst): if not overwrite and self.exists(dst):
raise DestinationExistsError(dst) raise DestinationExistsError(dst)
self.clear_dircache(dirname(src), dirname(dst))
try: try:
self.rename(src, dst) self.rename(src, dst)
except error_reply: except error_reply:
...@@ -1070,14 +1123,25 @@ class FTPFS(FS): ...@@ -1070,14 +1123,25 @@ class FTPFS(FS):
self.copy(src, dst) self.copy(src, dst)
self.remove(src) self.remove(src)
@ftperrors
def movedir(self, src, dst, overwrite=False, ignore_errors=False, chunk_size=16384):
self.clear_dircache(src, dst, dirname(src), dirname(dst))
super(FTPFS, self).movedir(src, dst, overwrite, ignore_errors, chunk_size)
@ftperrors
def copydir(self, src, dst, overwrite=False, ignore_errors=False, chunk_size=16384):
self.clear_dircache(src, dst, dirname(src), dirname(dst))
super(FTPFS, self).copydir(src, dst, overwrite, ignore_errors, chunk_size)
if __name__ == "__main__": if __name__ == "__main__":
ftp_fs = FTPFS('ftp.ncsa.uiuc.edu') ftp_fs = FTPFS('ftp.ncsa.uiuc.edu')
#from fs.browsewin import browse ftp_fs.cache_hint(True)
#browse(ftp_fs) from fs.browsewin import browse
browse(ftp_fs)
ftp_fs = FTPFS('127.0.0.1', 'user', '12345', dircache=True) #ftp_fs = FTPFS('127.0.0.1', 'user', '12345', dircache=True)
#f = ftp_fs.open('testout.txt', 'w') #f = ftp_fs.open('testout.txt', 'w')
#f.write("Testing writing to an ftp file!") #f.write("Testing writing to an ftp file!")
#f.write("\nHai!") #f.write("\nHai!")
...@@ -1105,6 +1169,5 @@ if __name__ == "__main__": ...@@ -1105,6 +1169,5 @@ if __name__ == "__main__":
#print ftp_fs.getsize('test.txt') #print ftp_fs.getsize('test.txt')
from fs.browsewin import browse #from fs.browsewin import browse
browse(ftp_fs) #browse(ftp_fs)
\ No newline at end of file
...@@ -72,7 +72,3 @@ if __name__ == "__main__": ...@@ -72,7 +72,3 @@ if __name__ == "__main__":
ftpd = ftpserver.FTPServer(address, handler) ftpd = ftpserver.FTPServer(address, handler)
ftpd.serve_forever() ftpd.serve_forever()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment