converter.py 2.22 KB
Newer Older
1 2
import re
import itertools
Steve Strassmann committed
3 4

class Converter:
5 6 7 8
    """Converter is an abstract class that transforms strings.
       It hides embedded tags (HTML or Python sequences) from transformation
  
       To implement Converter, provide implementation for inner_convert_string()
Steve Strassmann committed
9

10 11 12 13 14 15 16
       Strategy:
         1. extract tags embedded in the string
           a. use the index of each extracted tag to re-insert it later
           b. replace tags in string with numbers (<0>, <1>, etc.)
           c. save extracted tags in a separate list
         2. convert string
         3. re-insert the extracted tags
Steve Strassmann committed
17

18 19 20 21 22 23
    """
    
    # matches tags like these:
    #   HTML:   <B>, </B>, <BR/>, <textformat leading="10">
    #   Python: %(date)s, %(name)s
    tag_pattern = re.compile(r'(<[-\w" .:?=/]*>)|({[^}]*})|(%\([^)]*\)\w)', re.I)
Steve Strassmann committed
24

25 26 27 28 29 30
    def convert(self, string):
        """Returns: a converted tagged string
           param: string (contains html tags)
    
           Don't replace characters inside tags
        """
Steve Strassmann committed
31 32 33 34 35
        (string, tags) = self.detag_string(string)
        string = self.inner_convert_string(string)
        string = self.retag_string(string, tags)
        return string

36 37 38 39 40 41 42
    def detag_string(self, string):
        """Extracts tags from string.
        
           returns (string, list) where
           string: string has tags replaced by indices (<BR>... => <0>, <1>, <2>, etc.)
           list: list of the removed tags ('<BR>', '<I>', '</I>')
        """
Steve Strassmann committed
43 44 45 46 47 48 49 50 51
        counter = itertools.count(0)
        count = lambda m: '<%s>' % counter.next()
        tags = self.tag_pattern.findall(string)
        tags = [''.join(tag) for tag in tags]
        (new, nfound) = self.tag_pattern.subn(count, string)
        if len(tags) != nfound:
            raise Exception('tags dont match:'+string)
        return (new, tags)

52 53
    def retag_string(self, string, tags):
        """substitutes each tag back into string, into occurrences of <0>, <1> etc"""
Steve Strassmann committed
54 55 56 57 58 59 60 61 62
        for (i, tag) in enumerate(tags):
            p = '<%s>' % i
            string = re.sub(p, tag, string, 1)
        return string


    # ------------------------------
    # Customize this in subclasses of Converter

63
    def inner_convert_string(self, string):
Steve Strassmann committed
64 65
        return string  # do nothing by default