Commit ab61ec4c by hectord

Extract tags for each scenario by the Feature.

The previous option (using a regex search in the whole
 file to look for tags in each new scenario) was quite
 slow.

This commit is especially interesting for projects with
 feature file of more than 20K.

This commit closes the issue #369.
parent 3f25debd
...@@ -45,7 +45,8 @@ class REP(object): ...@@ -45,7 +45,8 @@ class REP(object):
within_double_quotes = re.compile(r'("[^"]+")') within_double_quotes = re.compile(r'("[^"]+")')
within_single_quotes = re.compile(r"('[^']+')") within_single_quotes = re.compile(r"('[^']+')")
only_whitespace = re.compile('^\s*$') only_whitespace = re.compile('^\s*$')
tag_extraction_regex = re.compile(r'(?:(?:^|\s+)[@]([^@\s]+))') last_tag_extraction_regex = re.compile(ur'(?:\s|^)[@](\S+)\s*$')
first_tag_extraction_regex = re.compile(ur'^\s*[@](\S+)(?:\s|$)')
tag_strip_regex = re.compile(ur'(?:(?:^\s*|\s+)[@]\S+\s*)+$', re.DOTALL) tag_strip_regex = re.compile(ur'(?:(?:^\s*|\s+)[@]\S+\s*)+$', re.DOTALL)
comment_strip1 = re.compile(ur'(^[^\'"]*)[#]([^\'"]*)$') comment_strip1 = re.compile(ur'(^[^\'"]*)[#]([^\'"]*)$')
comment_strip2 = re.compile(ur'(^[^\'"]+)[#](.*)$') comment_strip2 = re.compile(ur'(^[^\'"]+)[#](.*)$')
...@@ -542,7 +543,7 @@ class Scenario(object): ...@@ -542,7 +543,7 @@ class Scenario(object):
with_file=None, with_file=None,
original_string=None, original_string=None,
language=None, language=None,
previous_scenario=None): tags=None):
self.feature = None self.feature = None
if not language: if not language:
...@@ -550,6 +551,7 @@ class Scenario(object): ...@@ -550,6 +551,7 @@ class Scenario(object):
self.name = name self.name = name
self.language = language self.language = language
self.tags = tags
self.remaining_lines = remaining_lines self.remaining_lines = remaining_lines
self.steps = self._parse_remaining_lines(remaining_lines, self.steps = self._parse_remaining_lines(remaining_lines,
with_file, with_file,
...@@ -559,8 +561,6 @@ class Scenario(object): ...@@ -559,8 +561,6 @@ class Scenario(object):
self.with_file = with_file self.with_file = with_file
self.original_string = original_string self.original_string = original_string
self.previous_scenario = previous_scenario
if with_file and original_string: if with_file and original_string:
scenario_definition = ScenarioDescription(self, with_file, scenario_definition = ScenarioDescription(self, with_file,
original_string, original_string,
...@@ -571,11 +571,6 @@ class Scenario(object): ...@@ -571,11 +571,6 @@ class Scenario(object):
self.steps, self.outlines, with_file, original_string)) self.steps, self.outlines, with_file, original_string))
self._add_myself_to_steps() self._add_myself_to_steps()
if original_string and '@' in self.original_string:
self.tags = self._find_tags_in(original_string)
else:
self.tags = []
@property @property
def max_length(self): def max_length(self):
if self.outlines: if self.outlines:
...@@ -740,40 +735,6 @@ class Scenario(object): ...@@ -740,40 +735,6 @@ class Scenario(object):
for step in self.solved_steps: for step in self.solved_steps:
step.scenario = self step.scenario = self
def _find_tags_in(self, original_string):
broad_regex = re.compile(ur"([@].*)%s: (%s)" % (
self.language.scenario_separator,
re.escape(self.name)), re.DOTALL)
regexes = []
if not self.previous_scenario:
regexes.append(broad_regex)
else:
regexes.append(re.compile(ur"(?:%s: %s.*)([@]?.*)%s: (%s)\s*\n" % (
self.language.non_capturable_scenario_separator,
re.escape(self.previous_scenario.name),
self.language.scenario_separator,
re.escape(self.name)), re.DOTALL))
def try_finding_with(regex):
found = regex.search(original_string)
if found:
tag_lines = found.group().splitlines()
tags = list(chain(*map(self._extract_tag, tag_lines)))
return tags
for regex in regexes:
found = try_finding_with(regex)
if found:
return found
return []
def _extract_tag(self, item):
return REP.tag_extraction_regex.findall(item)
def _resolve_steps(self, steps, outlines, with_file, original_string): def _resolve_steps(self, steps, outlines, with_file, original_string):
for outline in outlines: for outline in outlines:
for step in steps: for step in steps:
...@@ -831,7 +792,7 @@ class Scenario(object): ...@@ -831,7 +792,7 @@ class Scenario(object):
with_file=None, with_file=None,
original_string=None, original_string=None,
language=None, language=None,
previous_scenario=None): tags=None):
""" Creates a new scenario from string""" """ Creates a new scenario from string"""
# ignoring comments # ignoring comments
string = "\n".join(strings.get_stripped_lines(string, ignore_lines_starting_with='#')) string = "\n".join(strings.get_stripped_lines(string, ignore_lines_starting_with='#'))
...@@ -865,7 +826,7 @@ class Scenario(object): ...@@ -865,7 +826,7 @@ class Scenario(object):
with_file=with_file, with_file=with_file,
original_string=original_string, original_string=original_string,
language=language, language=language,
previous_scenario=previous_scenario, tags=tags,
) )
return scenario return scenario
...@@ -1011,7 +972,7 @@ class Feature(object): ...@@ -1011,7 +972,7 @@ class Feature(object):
if found: if found:
tag_lines = found.group().splitlines() tag_lines = found.group().splitlines()
tags = set(chain(*map(self._extract_tag, tag_lines))) tags = list(chain(*map(self._extract_tag, tag_lines)))
return tags return tags
for regex in regexes: for regex in regexes:
...@@ -1094,6 +1055,15 @@ class Feature(object): ...@@ -1094,6 +1055,15 @@ class Feature(object):
def _set_definition(self, definition): def _set_definition(self, definition):
self.described_at = definition self.described_at = definition
def _extract_tags(self, string, extract_regex=REP.last_tag_extraction_regex):
tags = []
while True:
m = extract_regex.search(string)
if not m:
return tags, string
tags.insert(0, m.groups()[0])
string = extract_regex.sub('', string)
def _strip_next_scenario_tags(self, string): def _strip_next_scenario_tags(self, string):
stripped = REP.tag_strip_regex.sub('', string) stripped = REP.tag_strip_regex.sub('', string)
...@@ -1143,6 +1113,7 @@ class Feature(object): ...@@ -1143,6 +1113,7 @@ class Feature(object):
description = u"" description = u""
background = None background = None
tags_scenario = []
if not re.search("^" + scenario_prefix, joined): if not re.search("^" + scenario_prefix, joined):
if not parts: if not parts:
...@@ -1151,8 +1122,8 @@ class Feature(object): ...@@ -1151,8 +1122,8 @@ class Feature(object):
(u"Features must have scenarios.\n" (u"Features must have scenarios.\n"
"Please refer to the documentation available at http://lettuce.it for more information.") "Please refer to the documentation available at http://lettuce.it for more information.")
) )
tags_scenario, description_and_background = self._extract_tags(parts[0])
description, background_lines = self._extract_desc_and_bg(parts[0]) description, background_lines = self._extract_desc_and_bg(description_and_background)
background = background_lines and Background.from_string( background = background_lines and Background.from_string(
background_lines, background_lines,
...@@ -1176,22 +1147,18 @@ class Feature(object): ...@@ -1176,22 +1147,18 @@ class Feature(object):
scenarios = [] scenarios = []
while upcoming_scenarios: while upcoming_scenarios:
tags_next_scenario, current = self._extract_tags(upcoming_scenarios[0])
current = self._strip_next_scenario_tags(upcoming_scenarios.pop(0)) current = self._strip_next_scenario_tags(upcoming_scenarios.pop(0))
previous_scenario = None
has_previous = len(scenarios) > 0
if has_previous:
previous_scenario = scenarios[-1]
params = dict( params = dict(
previous_scenario=previous_scenario, tags=tags_scenario,
) )
params.update(kw) params.update(kw)
current_scenario = Scenario.from_string(current, **params) current_scenario = Scenario.from_string(current, **params)
current_scenario.background = background current_scenario.background = background
scenarios.append(current_scenario) scenarios.append(current_scenario)
tags_scenario = tags_next_scenario
return background, scenarios, description return background, scenarios, description
......
...@@ -432,6 +432,24 @@ Feature: Taming the tag parser ...@@ -432,6 +432,24 @@ Feature: Taming the tag parser
Then this scenario has only one tag Then this scenario has only one tag
""" """
FEATURE22 = """
Feature: one tag in the first scenario
@onetag
Scenario: This is the first scenario
Given I am parsed
Then this scenario has one tag
"""
FEATURE23 = """
Feature: three tags in the first scenario
@onetag @another @$%^&even-weird_chars
Scenario: This is the first scenario
Given I am parsed
Then this scenario has three tags
"""
def test_feature_has_repr(): def test_feature_has_repr():
"Feature implements __repr__ nicely" "Feature implements __repr__ nicely"
...@@ -618,11 +636,8 @@ def test_single_feature_single_tag(): ...@@ -618,11 +636,8 @@ def test_single_feature_single_tag():
"All scenarios within a feature inherit the feature's tags" "All scenarios within a feature inherit the feature's tags"
feature = Feature.from_string(FEATURE18) feature = Feature.from_string(FEATURE18)
# FIXME (mitgr81): It seems worth the efficiency to not loop through the feature tags and
# check to see if every tag exists in the child. The "right" fix might just be to not
# add the tag from the feature in the first scenario directly.
assert that(feature.scenarios[0].tags).deep_equals([ assert that(feature.scenarios[0].tags).deep_equals([
'feature_runme', 'runme1', 'feature_runme']) 'runme1', 'feature_runme'])
assert that(feature.scenarios[1].tags).deep_equals([ assert that(feature.scenarios[1].tags).deep_equals([
'runme2', 'feature_runme']) 'runme2', 'feature_runme'])
...@@ -820,3 +835,21 @@ def test_scenario_post_email(): ...@@ -820,3 +835,21 @@ def test_scenario_post_email():
scenario1.tags.should.be.empty scenario1.tags.should.be.empty
scenario2.tags.should.be.empty scenario2.tags.should.be.empty
def test_feature_first_scenario_tag_extraction():
("A feature object should be able to find the single tag "
"belonging to the first scenario")
feature = Feature.from_string(FEATURE22)
assert that(feature.scenarios[0].tags).deep_equals([
'onetag'])
def test_feature_first_scenario_tags_extraction():
("A feature object should be able to find the tags "
"belonging to the first scenario")
feature = Feature.from_string(FEATURE23)
assert that(feature.scenarios[0].tags).deep_equals([
'onetag', 'another', '$%^&even-weird_chars'])
...@@ -454,32 +454,6 @@ def test_commented_scenarios(): ...@@ -454,32 +454,6 @@ def test_commented_scenarios():
assert_equals(len(scenario.steps), 4) assert_equals(len(scenario.steps), 4)
def test_scenario_has_tag():
("A scenario object should be able to find at least one tag "
"on the first line")
scenario = Scenario.from_string(
SCENARIO1,
original_string=('@onetag\n' + SCENARIO1.strip()))
expect(scenario.tags).to.equal(['onetag'])
def test_scenario_has_tags_singleline():
("A scenario object should be able to find many tags "
"on the first line")
scenario = Scenario.from_string(
SCENARIO1,
original_string=(
'@onetag @another @$%^&even-weird_chars \n' + SCENARIO1.strip()))
expect(scenario.tags).to.equal([
'onetag',
'another',
'$%^&even-weird_chars',
])
def test_scenario_matches_tags(): def test_scenario_matches_tags():
("A scenario with tags should respond with True when " ("A scenario with tags should respond with True when "
...@@ -487,7 +461,8 @@ def test_scenario_matches_tags(): ...@@ -487,7 +461,8 @@ def test_scenario_matches_tags():
scenario = Scenario.from_string( scenario = Scenario.from_string(
SCENARIO1, SCENARIO1,
original_string=('@onetag\n@another-one\n' + SCENARIO1.strip())) original_string=SCENARIO1.strip(),
tags=['onetag', 'another-one'])
expect(scenario.tags).to.equal(['onetag', 'another-one']) expect(scenario.tags).to.equal(['onetag', 'another-one'])
assert scenario.matches_tags(['onetag']) assert scenario.matches_tags(['onetag'])
...@@ -500,7 +475,8 @@ def test_scenario_matches_tags_fuzzywuzzy(): ...@@ -500,7 +475,8 @@ def test_scenario_matches_tags_fuzzywuzzy():
scenario = Scenario.from_string( scenario = Scenario.from_string(
SCENARIO1, SCENARIO1,
original_string=('@anothertag\n@another-tag\n' + SCENARIO1.strip())) original_string=SCENARIO1.strip(),
tags=['anothertag', 'another-tag'])
assert scenario.matches_tags(['~another']) assert scenario.matches_tags(['~another'])
...@@ -511,7 +487,8 @@ def test_scenario_matches_tags_excluding(): ...@@ -511,7 +487,8 @@ def test_scenario_matches_tags_excluding():
scenario = Scenario.from_string( scenario = Scenario.from_string(
SCENARIO1, SCENARIO1,
original_string=('@anothertag\n@another-tag\n' + SCENARIO1.strip())) original_string=SCENARIO1.strip(),
tags=['anothertag', 'another-tag'])
assert not scenario.matches_tags(['-anothertag']) assert not scenario.matches_tags(['-anothertag'])
assert scenario.matches_tags(['-foobar']) assert scenario.matches_tags(['-foobar'])
...@@ -544,7 +521,8 @@ def test_scenario_show_tags_in_its_representation(): ...@@ -544,7 +521,8 @@ def test_scenario_show_tags_in_its_representation():
scenario = Scenario.from_string( scenario = Scenario.from_string(
SCENARIO1, SCENARIO1,
original_string=('@slow\n@firefox\n@chrome\n' + SCENARIO1.strip())) original_string=SCENARIO1.strip(),
tags=['slow', 'firefox', 'chrome'])
expect(scenario.represented()).to.equal( expect(scenario.represented()).to.equal(
u' @slow @firefox @chrome\n ' u' @slow @firefox @chrome\n '
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment