Commit 3854108e by Michael Roytman

Address comments from code review

parent 63f04864
...@@ -5,38 +5,49 @@ import yaml ...@@ -5,38 +5,49 @@ import yaml
import sys import sys
import networkx as nx import networkx as nx
from collections import namedtuple from collections import namedtuple
import argparse
class FileParser: TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
DOCKER_PATH_ROOT = pathlib2.Path(TRAVIS_BUILD_DIR, "docker", "build")
def __init__(self): CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
self._load_repo_path() LOGGER = logging.getLogger(__name__)
def _load_repo_path(self):
"""Loads the path for the configuration repository from TRAVIS_BUILD_DIR environment variable."""
if os.environ.get("TRAVIS_BUILD_DIR"):
self.repo_path = os.environ.get("TRAVIS_BUILD_DIR")
else:
raise EnvironmentError("TRAVIS_BUILD_DIR environment variable is not set.")
def build_graph(self, git_dir, roles_dirs, aws_play_dirs, docker_play_dirs):
def build_graph(git_dir, roles_dirs, aws_play_dirs, docker_play_dirs):
""" """
Builds a dependency graph that shows relationships between roles and playbooks. Builds a dependency graph that shows relationships between roles and playbooks.
An edge [A, B], where A and B are roles, signifies that A depends on B. An edge An edge [A, B], where A and B are roles, signifies that A depends on B. An edge
[C, D], where C is a playbook and D is a role, signifies that C uses D. [C, D], where C is a playbook and D is a role, signifies that C uses D.
Input:
git_dir: A path to the top-most directory in the local git repository tool is to be run in.
roles_dirs: A list of relative paths to directories in which Ansible roles reside.
aws_play_dirs: A list of relative paths to directories in which AWS Ansible playbooks reside.
docker_play_dirs: A list of relative paths to directories in which Docker Ansible playbooks reside.
""" """
graph = nx.DiGraph() graph = nx.DiGraph()
self._map_roles_to_roles(graph, roles_dirs, git_dir, "dependencies", "role", "role") _map_roles_to_roles(graph, roles_dirs, git_dir, "dependencies", "role", "role")
self._map_plays_to_roles(graph, aws_play_dirs, git_dir, "roles", "aws_playbook", "role") _map_plays_to_roles(graph, aws_play_dirs, git_dir, "roles", "aws_playbook", "role")
self._map_plays_to_roles(graph, docker_play_dirs, git_dir, "roles", "docker_playbook", "role") _map_plays_to_roles(graph, docker_play_dirs, git_dir, "roles", "docker_playbook", "role")
return graph return graph
def _map_roles_to_roles(self, graph, dirs, git_dir, key, type_1, type_2): def _map_roles_to_roles(graph, dirs, git_dir, key, type_1, type_2):
"""Maps roles to the roles that they depend on.""" """
Maps roles to the roles that they depend on.
Input:
graph: A networkx digraph that is used to map Ansible dependencies.
dirs: A list of relative paths to directories in which Ansible roles reside.
git_dir: A path to the top-most directory in the local git repository tool is to be run in.
key: The key in a role yaml file in dirs that maps to relevant role data. In this case, key is
"dependencies", because a role's dependent roles is of interest.
type_1: Given edges A-B, the type of node A.
type_2: Given edges A-B, the type of node B.
Since this function maps roles to their dependent roles, both type_1 and type_2 are "role".
"""
Node = namedtuple('Node', ['name', 'type']) Node = namedtuple('Node', ['name', 'type'])
...@@ -44,34 +55,49 @@ class FileParser: ...@@ -44,34 +55,49 @@ class FileParser:
for d in dirs: for d in dirs:
d = pathlib2.Path(git_dir, d) d = pathlib2.Path(git_dir, d)
if d.is_dir():
# for all files/sub-directories in directory # for all files/sub-directories in directory
for directory in d.iterdir(): for item in d.iterdir():
# attempts to find meta/*.yml file in directory # attempts to find meta/*.yml file in item directory tree
role = [file for file in directory.glob("meta/*.yml")] roles = [f for f in item.glob("meta/*.yml")]
# if role exists # if a meta/*.yml file(s) exists for a role
if role: if roles:
with (open(str(role[0]), "r")) as file: # for each role
yaml_file = yaml.load(file) for role in roles:
yaml_file = _open_yaml_file(role)
# if a yaml file and key in file # if not an empty yaml file and key in file
if yaml_file is not None and key in yaml_file: if yaml_file is not None and key in yaml_file:
# for each dependent role # for each dependent role; yaml_file["dependencies"] returns list of
# dependent roles
for dependent in yaml_file[key]: for dependent in yaml_file[key]:
# get role name # get role name of each dependent role
name = self._get_role_name(dependent) name = _get_role_name(dependent)
# add node for role # add node for type_1, typically role
node_1 = Node(directory.name, type_1) node_1 = Node(item.name, type_1)
# add node for dependent role
# add node for type_2, typically dependent role
node_2 = Node(name, type_2) node_2 = Node(name, type_2)
# add edge role - dependent role
# add edge, typically role - dependent role
graph.add_edge(node_1, node_2) graph.add_edge(node_1, node_2)
def _map_plays_to_roles(self, graph, dirs, git_dir, key, type_1, type_2): def _map_plays_to_roles(graph, dirs, git_dir, key, type_1, type_2):
"""Maps plays to the roles they use.""" """
Maps plays to the roles they use.
Input:
graph: A networkx digraph that is used to map Ansible dependencies.
dirs: A list of relative paths to directories in which Ansible playbooks reside.
git_dir: A path to the top-most directory in the local git repository tool is to be run in.
key: The key in a playbook yaml file in dirs that maps to relevant playbook data. In this case, key is
"roles", because the roles used by a playbook is of interest.
type_1: Given edges A-B, the type of node A.
type_2: Given edges A-B, the type of node B.
Since this function maps plays to the roles they use, both type_1 is a type of playbook and type_2 is "role".
"""
Node = namedtuple('Node', ['name', 'type']) Node = namedtuple('Node', ['name', 'type'])
...@@ -79,14 +105,15 @@ class FileParser: ...@@ -79,14 +105,15 @@ class FileParser:
for d in dirs: for d in dirs:
d = pathlib2.Path(git_dir, d) d = pathlib2.Path(git_dir, d)
if d.is_dir():
# for all files/sub-directories in directory # for all files/sub-directories in directory
for directory in d.iterdir(): for item in d.iterdir():
# if a yaml file
if directory.is_file() and directory.suffix == ".yml": # if item is a file ending in .yml
with (open(str(directory), "r")) as file: if item.match("*.yml"):
yaml_file = yaml.load(file) # open .yml file for playbook
yaml_file = _open_yaml_file(item)
# if not an empty yaml file
if yaml_file is not None: if yaml_file is not None:
# for each play in yaml file # for each play in yaml file
for play in yaml_file: for play in yaml_file:
...@@ -95,17 +122,44 @@ class FileParser: ...@@ -95,17 +122,44 @@ class FileParser:
# for each role # for each role
for role in play[key]: for role in play[key]:
# get role name # get role name
name = self._get_role_name(role) name = _get_role_name(role)
#add node for type_1, typically for playbook
node_1 = Node(item.stem, type_1)
# add node for playbook # add node for type_2, typically for role
node_1 = Node(directory.stem, type_1)
# add node for role
node_2 = Node(name, type_2) node_2 = Node(name, type_2)
# add edge playbook - role
# add edge, typically playbook - role it uses
graph.add_edge(node_1, node_2) graph.add_edge(node_1, node_2)
def change_set_to_roles(self, files, git_dir, roles_dirs, playbooks_dirs, graph): def _open_yaml_file(file_str):
"""Converts change set consisting of a number of files to the roles that they represent.""" """
Opens yaml file.
Input:
file_str: The path to yaml file to be opened.
"""
with (file_str.open(mode='r')) as file:
try:
yaml_file = yaml.load(file)
return yaml_file
except yaml.YAMLError, exc:
LOGGER.warning("error in configuration file: %s" % str(exc))
sys.exit(1)
def change_set_to_roles(files, git_dir, roles_dirs, playbooks_dirs, graph):
"""
Converts change set consisting of a number of files to the roles that they represent/contain.
Input:
files: A list of files modified by a commit range.
git_dir: A path to the top-most directory in the local git repository tool is to be run in.
roles_dirs: A list of relative paths to directories in which Ansible roles reside.
playbook_dirs: A list of relative paths to directories in which Ansible playbooks reside.
graph: A networkx digraph that is used to map Ansible dependencies.
"""
# set of roles # set of roles
items = set() items = set()
...@@ -114,61 +168,81 @@ class FileParser: ...@@ -114,61 +168,81 @@ class FileParser:
for role_dir in roles_dirs: for role_dir in roles_dirs:
role_dir_path = pathlib2.Path(git_dir, role_dir) role_dir_path = pathlib2.Path(git_dir, role_dir)
# all files in role directory # get all files in the directories containing roles (i.e. all the roles in that directory)
candidate_files = [file for file in role_dir_path.glob("**/*")] candidate_files = (f for f in role_dir_path.glob("**/*"))
for file in files: # for all the files in the change set
file_path = pathlib2.Path(git_dir, file) for f in files:
file_path = pathlib2.Path(git_dir, f)
# if the change set file is in the set of role files
if file_path in candidate_files: if file_path in candidate_files:
name = self.get_resource_name(file_path, "roles") # get name of role and add it to set of roles of the change set
items.add(name) items.add(_get_resource_name(file_path, "roles"))
# for all directories containing playbooks # for all directories containing playbooks
for play_dir in playbooks_dirs: for play_dir in playbooks_dirs:
play_dir_path = pathlib2.Path(git_dir, play_dir) play_dir_path = pathlib2.Path(git_dir, play_dir)
# all files in role directory that end with yml extension # get all files in directory containing playbook that end with yml extension
candidate_files = [file for file in play_dir_path.glob("*.yml")] # (i.e. all playbooks in that directory)
candidate_files = (f for f in play_dir_path.glob("*.yml"))
for file in files: # for all filse in the change set
file_path = pathlib2.Path(git_dir, file) for f in files:
file_path = pathlib2.Path(git_dir, f)
# if the change set file is in teh set of playbook files
if file_path in candidate_files: if file_path in candidate_files:
name = self.get_resource_name(file_path, play_dir_path.name)
# gets first level of children of playbook in graph, which represents # gets first level of children of playbook in graph, which represents
# roles the playbook uses # all roles the playbook uses
descendants = nx.all_neighbors(graph, (file_path.stem, "aws_playbook")) descendants = nx.all_neighbors(graph, (file_path.stem, "aws_playbook"))
# adds all the roles that a playbook uses to set of roles of the change set
items |= {desc.name for desc in descendants} items |= {desc.name for desc in descendants}
return items return items
def get_resource_name(self, path, kind): def _get_resource_name(path, kind):
"""Gets name of resource from the filepath, which is the directory following occurence of kind.""" """
Gets name of resource from the filepath, which is the directory following occurence of kind.
Input:
path: A path to the resource (e.g. a role or a playbook)
kind: A description of the type of resource; this keyword precedes the name of a role or a playbook
in a file path and allows for the separation of its name;
e.g. for "configuration/playbooks/roles/discovery/...", kind = "roles" returns
"discovery" as the role name
"""
# get individual parts of a file path
dirs = path.parts dirs = path.parts
index = dirs.index(kind)
name = dirs[index+1]
return name
def get_dependencies(self, roles, graph): # type of resource is the next part of the file path after kind (e.g. after "roles" or "playbooks")
"""Determines all roles dependent on set of roles and returns set containing both.""" return dirs[dirs.index(kind)+1]
def get_dependencies(roles, graph):
"""
Determines all roles dependent on set of roles and returns set containing both.
Input:
roles: A set of roles.
graph: A networkx digraph that is used to map Ansible dependencies.
"""
items = set() items = set()
for role in roles: for role in roles:
# add the role itself
items.add(role) items.add(role)
# add all the roles that depend on the role
dependents = nx.descendants(graph, (role, "role")) dependents = nx.descendants(graph, (role, "role"))
names = {dep.name for dep in dependents} items |= {dependent.name for dependent in dependents}
items |= names
return items return items
def get_docker_plays(self, roles, graph): def get_docker_plays(roles, graph):
"""Gets all docker plays that contain at least role in common with roles.""" """Gets all docker plays that contain at least role in common with roles."""
# dict to determine coverage of plays # dict to determine coverage of plays
...@@ -176,7 +250,7 @@ class FileParser: ...@@ -176,7 +250,7 @@ class FileParser:
items = set() items = set()
docker_plays = [node.name for node in graph.nodes() if node.type == "docker_playbook"] docker_plays = (node.name for node in graph.nodes() if node.type == "docker_playbook")
for play in docker_plays: for play in docker_plays:
# all roles that are used by play # all roles that are used by play
...@@ -195,37 +269,29 @@ class FileParser: ...@@ -195,37 +269,29 @@ class FileParser:
for role in common_roles: for role in common_roles:
coverage[role] = True coverage[role] = True
self.check_coverage(coverage) # check coverage of roles
for role in coverage:
if not coverage[role]:
LOGGER.warning("role '%s' is not covered." % role)
return items return items
def filter_docker_plays(self, plays, repo_path): def filter_docker_plays(plays, repo_path):
"""Filters out docker plays that do not have a Dockerfile.""" """Filters out docker plays that do not have a Dockerfile."""
items = set() items = set()
logger = logging.getLogger(__name__)
for play in plays: for play in plays:
dockerfile = pathlib2.Path(self.repo_path, "docker", "build", play, "Dockerfile") dockerfile = pathlib2.Path(DOCKER_PATH_ROOT, play, "Dockerfile")
if dockerfile.exists(): if dockerfile.exists():
items.add(play) items.add(play)
else: else:
logger.warning(" covered playbook '%s' does not have Dockerfile." % play) LOGGER.warning("covered playbook '%s' does not have Dockerfile." % play)
return items return items
def check_coverage(self, coverage): def _get_role_name(role):
"""Checks which aws roles are not covered by docker plays."""
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)
for role in coverage:
if not coverage[role]:
logger.warning(" role '%s' is not covered." % role)
def _get_role_name(self, role):
""" """
Resolves a role name from either a simple declaration or a dictionary style declaration. Resolves a role name from either a simple declaration or a dictionary style declaration.
...@@ -245,11 +311,28 @@ class FileParser: ...@@ -245,11 +311,28 @@ class FileParser:
return role['role'] return role['role']
elif isinstance(role, basestring): elif isinstance(role, basestring):
return role return role
else:
LOGGER.warning("role %s could not be resolved to a role name." % role)
return None return None
def arg_parse():
parser = argparse.ArgumentParser(description = 'Given a commit range, analyze Ansible dependencies between roles and playbooks '
'and output a list of Docker plays affected by this commit range via these dependencies.')
parser.add_argument('--verbose', help="set warnings to be displayed", action="store_true")
return parser.parse_args()
if __name__ == '__main__': if __name__ == '__main__':
parser = FileParser()
args = arg_parse()
# configure logging
logging.basicConfig()
if not args.verbose:
logging.disable(logging.WARNING)
# set of modified files in the commit range
change_set = set() change_set = set()
# read from standard in # read from standard in
...@@ -257,24 +340,22 @@ if __name__ == '__main__': ...@@ -257,24 +340,22 @@ if __name__ == '__main__':
change_set.add(line.rstrip()) change_set.add(line.rstrip())
# read config file # read config file
config_file_path = pathlib2.Path(parser.repo_path, "util", "parsefiles_config.yml") config = _open_yaml_file(CONFIG_FILE_PATH)
with config_file_path.open() as config_file:
config = yaml.load(config_file)
# build grpah # build graph
graph = parser.build_graph(parser.repo_path, config["roles_paths"], config["aws_plays_paths"], config["docker_plays_paths"]) graph = build_graph(TRAVIS_BUILD_DIR, config["roles_paths"], config["aws_plays_paths"], config["docker_plays_paths"])
# transforms list of roles and plays into list of original roles and the roles contained in the plays # transforms list of roles and plays into list of original roles and the roles contained in the plays
roles = parser.change_set_to_roles(change_set, parser.repo_path, config["roles_paths"], config["aws_plays_paths"], graph) roles = change_set_to_roles(change_set, TRAVIS_BUILD_DIR, config["roles_paths"], config["aws_plays_paths"], graph)
# expands roles set to include roles that are dependent on existing roles # expands roles set to include roles that are dependent on existing roles
dependent_roles = parser.get_dependencies(roles, graph) dependent_roles = get_dependencies(roles, graph)
# determine which docker plays cover at least one role # determine which docker plays cover at least one role
docker_plays = parser.get_docker_plays(dependent_roles, graph) docker_plays = get_docker_plays(dependent_roles, graph)
# filter out docker plays without a Dockerfile # filter out docker plays without a Dockerfile
docker_plays = parser.filter_docker_plays(docker_plays, parser.repo_path) docker_plays = filter_docker_plays(docker_plays, TRAVIS_BUILD_DIR)
# prints Docker plays
print " ".join(str(play) for play in docker_plays) print " ".join(str(play) for play in docker_plays)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment