import os import pathlib2 import logging import yaml import sys import networkx as nx from collections import namedtuple import argparse TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR") DOCKER_PATH_ROOT = pathlib2.Path(TRAVIS_BUILD_DIR, "docker", "build") CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml") LOGGER = logging.getLogger(__name__) def build_graph(git_dir, roles_dirs, aws_play_dirs, docker_play_dirs): """ Builds a dependency graph that shows relationships between roles and playbooks. An edge [A, B], where A and B are roles, signifies that A depends on B. An edge [C, D], where C is a playbook and D is a role, signifies that C uses D. Input: git_dir: A path to the top-most directory in the local git repository tool is to be run in. roles_dirs: A list of relative paths to directories in which Ansible roles reside. aws_play_dirs: A list of relative paths to directories in which AWS Ansible playbooks reside. docker_play_dirs: A list of relative paths to directories in which Docker Ansible playbooks reside. """ graph = nx.DiGraph() _map_roles_to_roles(graph, roles_dirs, git_dir, "dependencies", "role", "role") _map_plays_to_roles(graph, aws_play_dirs, git_dir, "roles", "aws_playbook", "role") _map_plays_to_roles(graph, docker_play_dirs, git_dir, "roles", "docker_playbook", "role") return graph def _map_roles_to_roles(graph, dirs, git_dir, key, type_1, type_2): """ Maps roles to the roles that they depend on. Input: graph: A networkx digraph that is used to map Ansible dependencies. dirs: A list of relative paths to directories in which Ansible roles reside. git_dir: A path to the top-most directory in the local git repository tool is to be run in. key: The key in a role yaml file in dirs that maps to relevant role data. In this case, key is "dependencies", because a role's dependent roles is of interest. type_1: Given edges A-B, the type of node A. type_2: Given edges A-B, the type of node B. Since this function maps roles to their dependent roles, both type_1 and type_2 are "role". """ Node = namedtuple('Node', ['name', 'type']) # for each role directory for d in dirs: d = pathlib2.Path(git_dir, d) # for all files/sub-directories in directory for item in d.iterdir(): # attempts to find meta/*.yml file in item directory tree roles = {f for f in item.glob("meta/*.yml")} # if a meta/*.yml file(s) exists for a role if roles: # for each role for role in roles: yaml_file = _open_yaml_file(role) # if not an empty yaml file and key in file if yaml_file is not None and key in yaml_file: # for each dependent role; yaml_file["dependencies"] returns list of # dependent roles for dependent in yaml_file[key]: # get role name of each dependent role name = _get_role_name(dependent) # add node for type_1, typically role node_1 = Node(item.name, type_1) # add node for type_2, typically dependent role node_2 = Node(name, type_2) # add edge, typically dependent role - role graph.add_edge(node_2, node_1) def _map_plays_to_roles(graph, dirs, git_dir, key, type_1, type_2): """ Maps plays to the roles they use. Input: graph: A networkx digraph that is used to map Ansible dependencies. dirs: A list of relative paths to directories in which Ansible playbooks reside. git_dir: A path to the top-most directory in the local git repository tool is to be run in. key: The key in a playbook yaml file in dirs that maps to relevant playbook data. In this case, key is "roles", because the roles used by a playbook is of interest. type_1: Given edges A-B, the type of node A. type_2: Given edges A-B, the type of node B. Since this function maps plays to the roles they use, both type_1 is a type of playbook and type_2 is "role". """ Node = namedtuple('Node', ['name', 'type']) # for each play directory for d in dirs: d = pathlib2.Path(git_dir, d) # for all files/sub-directories in directory for item in d.iterdir(): # if item is a file ending in .yml if item.match("*.yml"): # open .yml file for playbook yaml_file = _open_yaml_file(item) # if not an empty yaml file if yaml_file is not None: # for each play in yaml file for play in yaml_file: # if specified key in yaml file (e.g. "roles") if key in play: # for each role for role in play[key]: # get role name name = _get_role_name(role) #add node for type_1, typically for playbook node_1 = Node(item.stem, type_1) # add node for type_2, typically for role node_2 = Node(name, type_2) # add edge, typically role - playbook that uses it graph.add_edge(node_2, node_1) def _open_yaml_file(file_str): """ Opens yaml file. Input: file_str: The path to yaml file to be opened. """ with (file_str.open(mode='r')) as file: try: yaml_file = yaml.load(file) return yaml_file except yaml.YAMLError, exc: LOGGER.error("error in configuration file: %s" % str(exc)) sys.exit(1) def change_set_to_roles(files, git_dir, roles_dirs, playbooks_dirs, graph): """ Converts change set consisting of a number of files to the roles that they represent/contain. Input: files: A list of files modified by a commit range. git_dir: A path to the top-most directory in the local git repository tool is to be run in. roles_dirs: A list of relative paths to directories in which Ansible roles reside. playbook_dirs: A list of relative paths to directories in which Ansible playbooks reside. graph: A networkx digraph that is used to map Ansible dependencies. """ # set of roles items = set() # for all directories containing roles for role_dir in roles_dirs: role_dir_path = pathlib2.Path(git_dir, role_dir) # get all files in the directories containing roles (i.e. all the roles in that directory) candidate_files = {f for f in role_dir_path.glob("**/*")} # for all the files in the change set for f in files: file_path = pathlib2.Path(git_dir, f) # if the change set file is in the set of role files if file_path in candidate_files: # get name of role and add it to set of roles of the change set items.add(_get_role_name_from_file(file_path)) return items def get_plays(files, git_dir, playbooks_dirs): """ Determines which files in the change set are aws playbooks files: A list of files modified by a commit range. git_dir: A path to the top-most directory in the local git repository tool is to be run in. playbook_dirs: A list of relative paths to directories in which Ansible playbooks reside. """ plays = set() # for all directories containing playbooks for play_dir in playbooks_dirs: play_dir_path = pathlib2.Path(git_dir, play_dir) # get all files in directory containing playbook that end with yml extension # (i.e. all playbooks in that directory) candidate_files = {f for f in play_dir_path.glob("*.yml")} # for all filse in the change set for f in files: file_path = pathlib2.Path(git_dir, f) # if the change set file is in the set of playbook files if file_path in candidate_files: plays.add(_get_playbok_name_from_file(file_path)) return plays def _get_playbook_name_from_file(path): """ Gets name of playbook from the filepath, which is the last part of the filepath. Input: path: A path to the playbook """ # get last part of filepath return path.stem def _get_role_name_from_file(path): """ Gets name of role from the filepath, which is the directory following occurence of the word "roles". Input: path: A path to the role """ # get individual parts of a file path dirs = path.parts # name of role is the next part of the file path after "roles" return dirs[dirs.index("roles")+1] def get_dependencies(roles, graph): """ Determines all roles dependent on set of roles and returns set containing both. Input: roles: A set of roles. graph: A networkx digraph that is used to map Ansible dependencies. """ items = set() for role in roles: # add the role itself items.add(role) # add all the roles that depend on the role dependents = nx.descendants(graph, (role, "role")) items |= {dependent.name for dependent in dependents} return items def get_docker_plays(roles, graph): """Gets all docker plays that contain at least role in common with roles.""" # dict to determine coverage of plays coverage = dict.fromkeys(roles, False) items = set() docker_plays = {node.name for node in graph.nodes() if node.type == "docker_playbook"} for play in docker_plays: # all roles that are used by play roles_nodes = nx.all_neighbors(graph, (play, "docker_playbook")) docker_roles = {role.name for role in roles_nodes} # compares roles and docker roles common_roles = roles & docker_roles # if their intersection is non-empty, add the docker role if common_roles: items.add(play) # each aws role that was in common is marked as being covered by a docker play for role in common_roles: coverage[role] = True # check coverage of roles for role in coverage: if not coverage[role]: LOGGER.warning("role '%s' is not covered." % role) return items def filter_docker_plays(plays, repo_path): """Filters out docker plays that do not have a Dockerfile.""" items = set() for play in plays: dockerfile = pathlib2.Path(DOCKER_PATH_ROOT, play, "Dockerfile") if dockerfile.exists(): items.add(play) else: LOGGER.warning("covered playbook '%s' does not have Dockerfile." % play) return items def _get_role_name(role): """ Resolves a role name from either a simple declaration or a dictionary style declaration. A simple declaration would look like: - foo A dictionary style declaration would look like: - role: rbenv rbenv_user: "{{ forum_user }}" rbenv_dir: "{{ forum_app_dir }}" rbenv_ruby_version: "{{ forum_ruby_version }}" :param role: :return: """ if isinstance(role, dict): return role['role'] elif isinstance(role, basestring): return role else: LOGGER.warning("role %s could not be resolved to a role name." % role) return None def arg_parse(): parser = argparse.ArgumentParser(description = 'Given a commit range, analyze Ansible dependencies between roles and playbooks ' 'and output a list of Docker plays affected by this commit range via these dependencies.') parser.add_argument('--verbose', help="set warnings to be displayed", action="store_true") return parser.parse_args() if __name__ == '__main__': args = arg_parse() # configure logging logging.basicConfig() if not args.verbose: logging.disable(logging.WARNING) # set of modified files in the commit range change_set = set() # read from standard in for line in sys.stdin: change_set.add(line.rstrip()) # configuration file is expected to be in the following format: # # roles_paths: # - <all paths relative to configuration repository that contain Ansible roles> # aws_plays_paths: # - <all paths relative to configuration repository that contain aws Ansible playbooks> # docker_plays_paths: # - <all paths relative to configuration repository that contain Docker Ansible playbooks> # read config file config = _open_yaml_file(CONFIG_FILE_PATH) # build graph graph = build_graph(TRAVIS_BUILD_DIR, config["roles_paths"], config["aws_plays_paths"], config["docker_plays_paths"]) # gets any playbooks in the commit range plays = get_plays(change_set, TRAVIS_BUILD_DIR, config["aws_plays_paths"]) # transforms list of roles and plays into list of original roles and the roles contained in the plays roles = change_set_to_roles(change_set, TRAVIS_BUILD_DIR, config["roles_paths"], config["aws_plays_paths"], graph) # expands roles set to include roles that are dependent on existing roles dependent_roles = get_dependencies(roles, graph) # determine which docker plays cover at least one role docker_plays = get_docker_plays(dependent_roles, graph) docker_plays = docker_plays | plays # filter out docker plays without a Dockerfile docker_plays = filter_docker_plays(docker_plays, TRAVIS_BUILD_DIR) # prints Docker plays print " ".join(str(play) for play in docker_plays)