Commit 4d59bcde by willmcgugan

Modified signature generation to only read a portion of the beginning of the…

Modified signature generation to only read a portion of the beginning of the file, which should be enough to indicate a potention duplicate.
parent 11b9f16a
...@@ -172,7 +172,7 @@ def find_duplicates(fs, compare_paths=None, quick=False, signature_size=16384): ...@@ -172,7 +172,7 @@ def find_duplicates(fs, compare_paths=None, quick=False, signature_size=16384):
signatures = defaultdict(list) signatures = defaultdict(list)
# A signature is a tuple of CRC32s for each 16K of the file # A signature is a tuple of CRC32s for each 4x16K of the file
# This allows us to find potential duplicates with a dictionary lookup # This allows us to find potential duplicates with a dictionary lookup
for paths in size_duplicates: for paths in size_duplicates:
for path in paths: for path in paths:
...@@ -180,7 +180,7 @@ def find_duplicates(fs, compare_paths=None, quick=False, signature_size=16384): ...@@ -180,7 +180,7 @@ def find_duplicates(fs, compare_paths=None, quick=False, signature_size=16384):
fread = None fread = None
try: try:
fread = fs.open(path, 'rb') fread = fs.open(path, 'rb')
while True: while len(signature) < 4:
data = fread.read(signature_size) data = fread.read(signature_size)
if not data: if not data:
break break
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment