Extract tags for each scenario by the Feature.

The previous option (using a regex search in the whole file to look for tags in each new scenario) was quite slow. This commit is especially interesting for projects with feature file of more than 20K. This commit closes the issue #369.

Extract tags for each scenario by the Feature.
The previous option (using a regex search in the whole file to look for tags in each new scenario) was quite slow. This commit is especially interesting for projects with feature file of more than 20K. This commit closes the issue #369.
ab61ec4c · hectord · 3f25debd · ab61ec4c · ab61ec4c · ab61ec4c
Commit ab61ec4c authored Aug 11, 2013 by hectord
Hide whitespace changes
Inline Side-by-side

Showing with 67 additions and 89 deletions

lettuce/core.py
+22 -55

tests/unit/test_feature_parser.py
+37 -4

tests/unit/test_scenario_parsing.py
+8 -30

No files found.
--- a/lettuce/core.py
+++ b/lettuce/core.py
@@ -45,7 +45,8 @@ class REP(object):
    within_double_quotes = re.compile(r'("[^"]+")')
    within_single_quotes = re.compile(r"('[^']+')")
    only_whitespace = re.compile('^\s*$')
-    tag_extraction_regex = re.compile(r'(?:(?:^|\s+)[@]([^@\s]+))')
+    last_tag_extraction_regex = re.compile(ur'(?:\s|^)[@](\S+)\s*$')
+    first_tag_extraction_regex = re.compile(ur'^\s*[@](\S+)(?:\s|$)')
    tag_strip_regex = re.compile(ur'(?:(?:^\s*|\s+)[@]\S+\s*)+$', re.DOTALL)
    comment_strip1 = re.compile(ur'(^[^\'"]*)[#]([^\'"]*)$')
    comment_strip2 = re.compile(ur'(^[^\'"]+)[#](.*)$')
@@ -542,7 +543,7 @@ class Scenario(object):
                 with_file=None,
                 original_string=None,
                 language=None,
-                 previous_scenario=None):
+                 tags=None):
        self.feature = None
        if not language:
@@ -550,6 +551,7 @@ class Scenario(object):
        self.name = name
        self.language = language
+        self.tags = tags
        self.remaining_lines = remaining_lines
        self.steps = self._parse_remaining_lines(remaining_lines,
                                                 with_file,
@@ -559,8 +561,6 @@ class Scenario(object):
        self.with_file = with_file
        self.original_string = original_string
-        self.previous_scenario = previous_scenario
        if with_file and original_string:
            scenario_definition = ScenarioDescription(self, with_file,
                                                      original_string,
@@ -571,11 +571,6 @@ class Scenario(object):
            self.steps, self.outlines, with_file, original_string))
        self._add_myself_to_steps()
-        if original_string and '@' in self.original_string:
-            self.tags = self._find_tags_in(original_string)
-        else:
-            self.tags = []
    @property
    def max_length(self):
        if self.outlines:
@@ -740,40 +735,6 @@ class Scenario(object):
        for step in self.solved_steps:
            step.scenario = self
-    def _find_tags_in(self, original_string):
-        broad_regex = re.compile(ur"([@].*)%s: (%s)" % (
-            self.language.scenario_separator,
-            re.escape(self.name)), re.DOTALL)
-        regexes = []
-        if not self.previous_scenario:
-            regexes.append(broad_regex)
-        else:
-            regexes.append(re.compile(ur"(?:%s: %s.*)([@]?.*)%s: (%s)\s*\n" % (
-                self.language.non_capturable_scenario_separator,
-                re.escape(self.previous_scenario.name),
-                self.language.scenario_separator,
-                re.escape(self.name)), re.DOTALL))
-        def try_finding_with(regex):
-            found = regex.search(original_string)
-            if found:
-                tag_lines = found.group().splitlines()
-                tags = list(chain(*map(self._extract_tag, tag_lines)))
-                return tags
-        for regex in regexes:
-            found = try_finding_with(regex)
-            if found:
-                return found
-        return []
-    def _extract_tag(self, item):
-        return REP.tag_extraction_regex.findall(item)
    def _resolve_steps(self, steps, outlines, with_file, original_string):
        for outline in outlines:
            for step in steps:
@@ -831,7 +792,7 @@ class Scenario(object):
                    with_file=None,
                    original_string=None,
                    language=None,
-                    previous_scenario=None):
+                    tags=None):
        """ Creates a new scenario from string"""
        # ignoring comments
        string = "\n".join(strings.get_stripped_lines(string, ignore_lines_starting_with='#'))
@@ -865,7 +826,7 @@ class Scenario(object):
            with_file=with_file,
            original_string=original_string,
            language=language,
-            previous_scenario=previous_scenario,
+            tags=tags,
        )
        return scenario
@@ -1011,7 +972,7 @@ class Feature(object):
            if found:
                tag_lines = found.group().splitlines()
-                tags = set(chain(*map(self._extract_tag, tag_lines)))
+                tags = list(chain(*map(self._extract_tag, tag_lines)))
                return tags
        for regex in regexes:
@@ -1094,6 +1055,15 @@ class Feature(object):
    def _set_definition(self, definition):
        self.described_at = definition
+    def _extract_tags(self, string, extract_regex=REP.last_tag_extraction_regex):
+        tags = []
+        while True:
+            m = extract_regex.search(string)
+            if not m:
+                return tags, string
+            tags.insert(0, m.groups()[0])
+            string = extract_regex.sub('', string)
    def _strip_next_scenario_tags(self, string):
        stripped = REP.tag_strip_regex.sub('', string)
@@ -1143,6 +1113,7 @@ class Feature(object):
        description = u""
        background = None
+        tags_scenario = []
        if not re.search("^" + scenario_prefix, joined):
            if not parts:
@@ -1151,8 +1122,8 @@ class Feature(object):
                    (u"Features must have scenarios.\n"
                     "Please refer to the documentation available at http://lettuce.it for more information.")
                )
+            tags_scenario, description_and_background = self._extract_tags(parts[0])
-            description, background_lines = self._extract_desc_and_bg(parts[0])
+            description, background_lines = self._extract_desc_and_bg(description_and_background)
            background = background_lines and Background.from_string(
                background_lines,
@@ -1176,22 +1147,18 @@ class Feature(object):
        scenarios = []
        while upcoming_scenarios:
+            tags_next_scenario, current = self._extract_tags(upcoming_scenarios[0])
            current = self._strip_next_scenario_tags(upcoming_scenarios.pop(0))
-            previous_scenario = None
-            has_previous = len(scenarios) > 0
-            if has_previous:
-                previous_scenario = scenarios[-1]
            params = dict(
-                previous_scenario=previous_scenario,
+                tags=tags_scenario,
            )
            params.update(kw)
            current_scenario = Scenario.from_string(current, **params)
            current_scenario.background = background
            scenarios.append(current_scenario)
+            tags_scenario = tags_next_scenario
        return background, scenarios, description

--- a/tests/unit/test_feature_parser.py
+++ b/tests/unit/test_feature_parser.py
@@ -432,6 +432,24 @@ Feature: Taming the tag parser
    Then this scenario has only one tag
 """
+FEATURE22 = """
+Feature: one tag in the first scenario
+  @onetag
+  Scenario: This is the first scenario
+    Given I am parsed
+    Then this scenario has one tag
+"""
+FEATURE23 = """
+Feature: three tags in the first scenario
+  @onetag @another @$%^&even-weird_chars
+  Scenario: This is the first scenario
+    Given I am parsed
+    Then this scenario has three tags
+"""
 def test_feature_has_repr():
    "Feature implements __repr__ nicely"
@@ -618,11 +636,8 @@ def test_single_feature_single_tag():
    "All scenarios within a feature inherit the feature's tags"
    feature = Feature.from_string(FEATURE18)
-    # FIXME (mitgr81):  It seems worth the efficiency to not loop through the feature tags and
-    # check to see if every tag exists in the child.  The "right" fix might just be to not
-    # add the tag from the feature in the first scenario directly.
    assert that(feature.scenarios[0].tags).deep_equals([
-        'feature_runme', 'runme1', 'feature_runme'])
+        'runme1', 'feature_runme'])
    assert that(feature.scenarios[1].tags).deep_equals([
        'runme2', 'feature_runme'])
@@ -820,3 +835,21 @@ def test_scenario_post_email():
    scenario1.tags.should.be.empty
    scenario2.tags.should.be.empty
+def test_feature_first_scenario_tag_extraction():
+    ("A feature object should be able to find the single tag "
+     "belonging to the first scenario")
+    feature = Feature.from_string(FEATURE22)
+    assert that(feature.scenarios[0].tags).deep_equals([
+        'onetag'])
+def test_feature_first_scenario_tags_extraction():
+    ("A feature object should be able to find the tags "
+     "belonging to the first scenario")
+    feature = Feature.from_string(FEATURE23)
+    assert that(feature.scenarios[0].tags).deep_equals([
+        'onetag', 'another', '$%^&even-weird_chars'])
--- a/tests/unit/test_scenario_parsing.py
+++ b/tests/unit/test_scenario_parsing.py
@@ -454,32 +454,6 @@ def test_commented_scenarios():
    assert_equals(len(scenario.steps), 4)
-def test_scenario_has_tag():
-    ("A scenario object should be able to find at least one tag "
-     "on the first line")
-    scenario = Scenario.from_string(
-        SCENARIO1,
-        original_string=('@onetag\n' + SCENARIO1.strip()))
-    expect(scenario.tags).to.equal(['onetag'])
-def test_scenario_has_tags_singleline():
-    ("A scenario object should be able to find many tags "
-     "on the first line")
-    scenario = Scenario.from_string(
-        SCENARIO1,
-        original_string=(
-            '@onetag @another @$%^&even-weird_chars \n' + SCENARIO1.strip()))
-    expect(scenario.tags).to.equal([
-        'onetag',
-        'another',
-        '$%^&even-weird_chars',
-    ])
 def test_scenario_matches_tags():
    ("A scenario with tags should respond with True when "
@@ -487,7 +461,8 @@ def test_scenario_matches_tags():
    scenario = Scenario.from_string(
        SCENARIO1,
-        original_string=('@onetag\n@another-one\n' + SCENARIO1.strip()))
+        original_string=SCENARIO1.strip(),
+        tags=['onetag', 'another-one'])
    expect(scenario.tags).to.equal(['onetag', 'another-one'])
    assert scenario.matches_tags(['onetag'])
@@ -500,7 +475,8 @@ def test_scenario_matches_tags_fuzzywuzzy():
    scenario = Scenario.from_string(
        SCENARIO1,
-        original_string=('@anothertag\n@another-tag\n' + SCENARIO1.strip()))
+        original_string=SCENARIO1.strip(),
+        tags=['anothertag', 'another-tag'])
    assert scenario.matches_tags(['~another'])
@@ -511,7 +487,8 @@ def test_scenario_matches_tags_excluding():
    scenario = Scenario.from_string(
        SCENARIO1,
-        original_string=('@anothertag\n@another-tag\n' + SCENARIO1.strip()))
+        original_string=SCENARIO1.strip(),
+        tags=['anothertag', 'another-tag'])
    assert not scenario.matches_tags(['-anothertag'])
    assert scenario.matches_tags(['-foobar'])
@@ -544,7 +521,8 @@ def test_scenario_show_tags_in_its_representation():
    scenario = Scenario.from_string(
        SCENARIO1,
-        original_string=('@slow\n@firefox\n@chrome\n' + SCENARIO1.strip()))
+        original_string=SCENARIO1.strip(),
+        tags=['slow', 'firefox', 'chrome'])
    expect(scenario.represented()).to.equal(
        u'  @slow @firefox @chrome\n  '