Commit d4db4c6d by Ned Batchelder

Fix encoding errors when hashing XML

We occasionally get UnicodeEncodeErrors from these two places,
because the XML is a Unicode string, and is implicitly encoded to ascii.
parent a3a7d1d5
...@@ -87,7 +87,7 @@ class ErrorDescriptor(ErrorFields, JSONEditingDescriptor): ...@@ -87,7 +87,7 @@ class ErrorDescriptor(ErrorFields, JSONEditingDescriptor):
# but url_names aren't guaranteed to be unique between descriptor types, # but url_names aren't guaranteed to be unique between descriptor types,
# and ErrorDescriptor can wrap any type. When the wrapped module is fixed, # and ErrorDescriptor can wrap any type. When the wrapped module is fixed,
# it will be written out with the original url_name. # it will be written out with the original url_name.
name=hashlib.sha1(contents).hexdigest() name=hashlib.sha1(contents.encode('utf8')).hexdigest()
) )
# real metadata stays in the content, but add a display name # real metadata stays in the content, but add a display name
......
...@@ -108,7 +108,8 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem): ...@@ -108,7 +108,8 @@ class ImportSystem(XMLParsingSystem, MakoDescriptorSystem):
orig_name = orig_name[len(tag) + 1:-12] orig_name = orig_name[len(tag) + 1:-12]
# append the hash of the content--the first 12 bytes should be plenty. # append the hash of the content--the first 12 bytes should be plenty.
orig_name = "_" + orig_name if orig_name not in (None, "") else "" orig_name = "_" + orig_name if orig_name not in (None, "") else ""
return tag + orig_name + "_" + hashlib.sha1(xml).hexdigest()[:12] xml_bytes = xml.encode('utf8')
return tag + orig_name + "_" + hashlib.sha1(xml_bytes).hexdigest()[:12]
# Fallback if there was nothing we could use: # Fallback if there was nothing we could use:
if url_name is None or url_name == "": if url_name is None or url_name == "":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment