Address comments from code review

3854108e · Michael Roytman · 63f04864 · 3854108e
Commit 3854108e authored Jun 30, 2016 by Michael Roytman
Hide whitespace changes
Inline Side-by-side

Showing with 281 additions and 200 deletions

util/parsefiles.py
+281 -200

No files found.
--- a/util/parsefiles.py
+++ b/util/parsefiles.py
@@ -5,251 +5,334 @@ import yaml
 import sys
 import networkx as nx
 from collections import namedtuple
+import argparse
-class FileParser:
+TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
-    def __init__(self):
+DOCKER_PATH_ROOT = pathlib2.Path(TRAVIS_BUILD_DIR, "docker", "build")
-        self._load_repo_path()
+CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
+LOGGER = logging.getLogger(__name__)
-    def _load_repo_path(self):
-        """Loads the path for the configuration repository from TRAVIS_BUILD_DIR environment variable."""
+def build_graph(git_dir, roles_dirs, aws_play_dirs, docker_play_dirs):
+    """
-        if os.environ.get("TRAVIS_BUILD_DIR"):
+    Builds a dependency graph that shows relationships between roles and playbooks.
-            self.repo_path = os.environ.get("TRAVIS_BUILD_DIR")
+    An edge [A, B], where A and B are roles, signifies that A depends on B. An edge
-        else:
+    [C, D], where C is a playbook and D is a role, signifies that C uses D.
-            raise EnvironmentError("TRAVIS_BUILD_DIR environment variable is not set.")
+    Input:
-    def build_graph(self, git_dir, roles_dirs, aws_play_dirs, docker_play_dirs):
+    git_dir: A path to the top-most directory in the local git repository tool is to be run in.
+    roles_dirs: A list of relative paths to directories in which Ansible roles reside.
-        """
+    aws_play_dirs: A list of relative paths to directories in which AWS Ansible playbooks reside.
-        Builds a dependency graph that shows relationships between roles and playbooks.
+    docker_play_dirs: A list of relative paths to directories in which Docker Ansible playbooks reside.
-        An edge [A, B], where A and B are roles, signifies that A depends on B. An edge
-        [C, D], where C is a playbook and D is a role, signifies that C uses D.
+    """
-        """
+    graph = nx.DiGraph()
-        graph = nx.DiGraph()
+    _map_roles_to_roles(graph, roles_dirs, git_dir, "dependencies", "role", "role")
-        self._map_roles_to_roles(graph, roles_dirs, git_dir, "dependencies", "role", "role")
+    _map_plays_to_roles(graph, aws_play_dirs, git_dir, "roles", "aws_playbook", "role")
-        self._map_plays_to_roles(graph, aws_play_dirs, git_dir, "roles", "aws_playbook", "role")
+    _map_plays_to_roles(graph, docker_play_dirs, git_dir, "roles", "docker_playbook", "role")
-        self._map_plays_to_roles(graph, docker_play_dirs, git_dir, "roles", "docker_playbook", "role")
+    return graph
-        return graph
+def _map_roles_to_roles(graph, dirs, git_dir, key, type_1, type_2):
-    def _map_roles_to_roles(self, graph, dirs, git_dir, key, type_1, type_2):
+    """
-        """Maps roles to the roles that they depend on."""
+    Maps roles to the roles that they depend on.
-        Node = namedtuple('Node', ['name', 'type'])
+    Input:
+    graph: A networkx digraph that is used to map Ansible dependencies.
-        # for each role directory
+    dirs: A list of relative paths to directories in which Ansible roles reside.
-        for d in dirs:
+    git_dir: A path to the top-most directory in the local git repository tool is to be run in.
-            d = pathlib2.Path(git_dir, d)
+    key: The key in a role yaml file in dirs that maps to relevant role data. In this case, key is
+        "dependencies", because a role's dependent roles is of interest.
-            if d.is_dir():
+    type_1: Given edges A-B, the type of node A.
-                # for all files/sub-directories in directory
+    type_2: Given edges A-B, the type of node B.
-                for directory in d.iterdir():
+        Since this function maps roles to their dependent roles, both type_1 and type_2 are "role".
+    """
-                    # attempts to find meta/*.yml file in directory
-                    role = [file for file in directory.glob("meta/*.yml")]
+    Node = namedtuple('Node', ['name', 'type'])
-                    # if role exists
+    # for each role directory
-                    if role:
+    for d in dirs:
-                        with (open(str(role[0]), "r")) as file:
+        d = pathlib2.Path(git_dir, d)
-                            yaml_file = yaml.load(file)
+        # for all files/sub-directories in directory
-                        # if a yaml file and key in file
+        for item in d.iterdir():
-                        if yaml_file is not None and key in yaml_file:
-                            # for each dependent role
+            # attempts to find meta/*.yml file in item directory tree
-                            for dependent in yaml_file[key]:
+            roles = [f for f in item.glob("meta/*.yml")]
+            # if a meta/*.yml file(s) exists for a role
+            if roles:
+                # for each role
+                for role in roles:
+                    yaml_file = _open_yaml_file(role)
+                    # if not an empty yaml file and key in file
+                    if yaml_file is not None and key in yaml_file:
+                        # for each dependent role; yaml_file["dependencies"] returns list of
+                        # dependent roles
+                        for dependent in yaml_file[key]:
+                            # get role name of each dependent role
+                            name = _get_role_name(dependent)
+                            # add node for type_1, typically role
+                            node_1 = Node(item.name, type_1)
+                            # add node for type_2, typically dependent role
+                            node_2 = Node(name, type_2)
+                            # add edge, typically role - dependent role
+                            graph.add_edge(node_1, node_2)
+def _map_plays_to_roles(graph, dirs, git_dir, key, type_1, type_2):
+    """
+    Maps plays to the roles they use.
+    Input:
+    graph: A networkx digraph that is used to map Ansible dependencies.
+    dirs: A list of relative paths to directories in which Ansible playbooks reside.
+    git_dir: A path to the top-most directory in the local git repository tool is to be run in.
+    key: The key in a playbook yaml file in dirs that maps to relevant playbook data. In this case, key is
+        "roles", because the roles used by a playbook is of interest.
+    type_1: Given edges A-B, the type of node A.
+    type_2: Given edges A-B, the type of node B.
+        Since this function maps plays to the roles they use, both type_1 is a type of playbook and type_2 is "role".
+    """
+    Node = namedtuple('Node', ['name', 'type'])
+    # for each play directory
+    for d in dirs:
+        d = pathlib2.Path(git_dir, d)
+        # for all files/sub-directories in directory
+        for item in d.iterdir():
+            # if item is a file ending in .yml
+            if item.match("*.yml"):
+                # open .yml file for playbook
+                yaml_file = _open_yaml_file(item)
+                # if not an empty yaml file
+                if yaml_file is not None:
+                    # for each play in yaml file
+                    for play in yaml_file:
+                        # if specified key in yaml file (e.g. "roles")
+                        if key in play:
+                            # for each role
+                            for role in play[key]:
                                # get role name
-                                name = self._get_role_name(dependent)
+                                name = _get_role_name(role)
+                                #add node for type_1, typically for playbook
+                                node_1 = Node(item.stem, type_1)
-                                # add node for role
+                                # add node for type_2, typically for role
-                                node_1 = Node(directory.name, type_1)
-                                # add node for dependent role
                                node_2 = Node(name, type_2)
-                                # add edge role - dependent role
-                                graph.add_edge(node_1, node_2)
-    def _map_plays_to_roles(self, graph, dirs, git_dir, key, type_1, type_2):
+                                 # add edge, typically playbook - role it uses
-        """Maps plays to the roles they use."""
+                                graph.add_edge(node_1, node_2)
-        Node = namedtuple('Node', ['name', 'type'])
+def _open_yaml_file(file_str):
+    """
+    Opens yaml file.
+    Input:
+    file_str: The path to yaml file to be opened.
+    """
+    with (file_str.open(mode='r')) as file:
+        try:
+            yaml_file = yaml.load(file)
+            return yaml_file
+        except yaml.YAMLError, exc:
+            LOGGER.warning("error in configuration file: %s" % str(exc))
+            sys.exit(1)
-        # for each play directory
+def change_set_to_roles(files, git_dir, roles_dirs, playbooks_dirs, graph):
-        for d in dirs:
+    """
-            d = pathlib2.Path(git_dir, d)
+    Converts change set consisting of a number of files to the roles that they represent/contain.
-            if d.is_dir():
+    Input:
-                # for all files/sub-directories in directory
+    files: A list of files modified by a commit range.
-                for directory in d.iterdir():
+    git_dir: A path to the top-most directory in the local git repository tool is to be run in.
-                    # if a yaml file
+    roles_dirs: A list of relative paths to directories in which Ansible roles reside.
-                    if directory.is_file() and directory.suffix == ".yml":
+    playbook_dirs: A list of relative paths to directories in which Ansible playbooks reside.
-                        with (open(str(directory), "r")) as file:
+    graph: A networkx digraph that is used to map Ansible dependencies.
-                            yaml_file = yaml.load(file)
+    """
-                        if yaml_file is not None:
+    # set of roles
-                            # for each play in yaml file
+    items = set()
-                            for play in yaml_file:
-                                # if specified key in yaml file (e.g. "roles")
-                                if key in play:
-                                    # for each role
-                                    for role in play[key]:
-                                        # get role name
-                                        name = self._get_role_name(role)
-                                        # add node for playbook
+    # for all directories containing roles
-                                        node_1 = Node(directory.stem, type_1)
+    for role_dir in roles_dirs:
-                                        # add node for role
+        role_dir_path = pathlib2.Path(git_dir, role_dir)
-                                        node_2 = Node(name, type_2)
-                                        # add edge playbook - role
-                                        graph.add_edge(node_1, node_2)
-    def change_set_to_roles(self, files, git_dir, roles_dirs, playbooks_dirs, graph):
+        # get all files in the directories containing roles (i.e. all the roles in that directory)
-        """Converts change set consisting of a number of files to the roles that they represent."""
+        candidate_files = (f for f in role_dir_path.glob("**/*"))
-        # set of roles
+        # for all the files in the change set
-        items = set()
+        for f in files:
+            file_path = pathlib2.Path(git_dir, f)
-        # for all directories containing roles
+            # if the change set file is in the set of role files
-        for role_dir in roles_dirs:
+            if file_path in candidate_files:
-            role_dir_path = pathlib2.Path(git_dir, role_dir)
+                # get name of role and add it to set of roles of the change set
+                items.add(_get_resource_name(file_path, "roles"))
-            # all files in role directory
+    # for all directories containing playbooks
-            candidate_files = [file for file in role_dir_path.glob("**/*")]
+    for play_dir in playbooks_dirs:
+        play_dir_path = pathlib2.Path(git_dir, play_dir)
-            for file in files:
+        # get all files in directory containing playbook that end with yml extension
-                file_path = pathlib2.Path(git_dir, file)
+        # (i.e. all playbooks in that directory)
+        candidate_files = (f for f in play_dir_path.glob("*.yml"))
-                if file_path in candidate_files:
+        # for all filse in the change set
-                    name = self.get_resource_name(file_path, "roles")
+        for f in files:
-                    items.add(name)
+            file_path = pathlib2.Path(git_dir, f)
-        # for all directories containing playbooks
+            # if the change set file is in teh set of playbook files
-        for play_dir in playbooks_dirs:
+            if file_path in candidate_files:
-            play_dir_path = pathlib2.Path(git_dir, play_dir)
-            # all files in role directory that end with yml extension
+                # gets first level of children of playbook in graph, which represents
-            candidate_files = [file for file in play_dir_path.glob("*.yml")]
+                # all roles the playbook uses
+                descendants = nx.all_neighbors(graph, (file_path.stem, "aws_playbook"))
-            for file in files:
+                # adds all the roles that a playbook uses to set of roles of the change set
-                file_path = pathlib2.Path(git_dir, file)
+                items |= {desc.name for desc in descendants}
+    return items
-                if file_path in candidate_files:
+def _get_resource_name(path, kind):
-                    name = self.get_resource_name(file_path, play_dir_path.name)
+    """
+    Gets name of resource from the filepath, which is the directory following occurence of kind.
-                    # gets first level of children of playbook in graph, which represents
+    Input:
-                    # roles the playbook uses
+    path: A path to the resource (e.g. a role or a playbook)
-                    descendants = nx.all_neighbors(graph, (file_path.stem, "aws_playbook"))
+    kind: A description of the type of resource; this keyword precedes the name of a role or a playbook
+        in a file path and allows for the separation of its name;
+        e.g. for "configuration/playbooks/roles/discovery/...", kind = "roles" returns
+        "discovery" as the role name
+    """
+    # get individual parts of a file path
+    dirs = path.parts
-                    items |= {desc.name for desc in descendants}
+    # type of resource is the next part of the file path after kind (e.g. after "roles" or "playbooks")
-        return items
+    return dirs[dirs.index(kind)+1]
-    def get_resource_name(self, path, kind):
+def get_dependencies(roles, graph):
-        """Gets name of resource from the filepath, which is the directory following occurence of kind."""
+    """
+    Determines all roles dependent on set of roles and returns set containing both.
-        dirs = path.parts
+    Input:
-        index = dirs.index(kind)
+    roles: A set of roles.
-        name = dirs[index+1]
+    graph: A networkx digraph that is used to map Ansible dependencies.
-        return name
+    """
-    def get_dependencies(self, roles, graph):
+    items = set()
-        """Determines all roles dependent on set of roles and returns set containing both."""
-        items = set()
+    for role in roles:
+        # add the role itself
+        items.add(role)
-        for role in roles:
+        # add all the roles that depend on the role
-            items.add(role)
+        dependents = nx.descendants(graph, (role, "role"))
-            dependents = nx.descendants(graph, (role, "role"))
+        items |= {dependent.name for dependent in dependents}
-            names = {dep.name for dep in dependents}
+    return items
-            items |= names
+def get_docker_plays(roles, graph):
+    """Gets all docker plays that contain at least role in common with roles."""
-        return items
+    # dict to determine coverage of plays
+    coverage = dict.fromkeys(roles, False)
-    def get_docker_plays(self, roles, graph):
+    items = set()
-        """Gets all docker plays that contain at least role in common with roles."""
-        # dict to determine coverage of plays
+    docker_plays = (node.name for node in graph.nodes() if node.type == "docker_playbook")
-        coverage = dict.fromkeys(roles, False)
-        items = set()
+    for play in docker_plays:
+        # all roles that are used by play
+        roles_nodes = nx.all_neighbors(graph, (play, "docker_playbook"))
-        docker_plays = [node.name for node in graph.nodes() if node.type == "docker_playbook"]
+        docker_roles = {role.name for role in roles_nodes}
-        for play in docker_plays:
+        # compares roles and docker roles
-            # all roles that are used by play
+        common_roles = roles & docker_roles
-            roles_nodes = nx.all_neighbors(graph, (play, "docker_playbook"))
-            docker_roles = {role.name for role in roles_nodes}
+        # if their intersection is non-empty, add the docker role
+        if common_roles:
+            items.add(play)
-            # compares roles and docker roles
+            # each aws role that was in common is marked as being covered by a docker play
-            common_roles = roles & docker_roles
+            for role in common_roles:
+                coverage[role] = True
-            # if their intersection is non-empty, add the docker role
+    # check coverage of roles
-            if common_roles:
+    for role in coverage:
-                items.add(play)
+        if not coverage[role]:
+            LOGGER.warning("role '%s' is not covered." % role)
-                # each aws role that was in common is marked as being covered by a docker play
+    return items
-                for role in common_roles:
-                    coverage[role] = True
-        self.check_coverage(coverage)
+def filter_docker_plays(plays, repo_path):
+    """Filters out docker plays that do not have a Dockerfile."""
-        return items
+    items = set()
-    def filter_docker_plays(self, plays, repo_path):
+    for play in plays:
-        """Filters out docker plays that do not have a Dockerfile."""
+        dockerfile = pathlib2.Path(DOCKER_PATH_ROOT, play, "Dockerfile")
-        items = set()
+        if dockerfile.exists():
-        logger = logging.getLogger(__name__)
+            items.add(play)
+        else:
+            LOGGER.warning("covered playbook '%s' does not have Dockerfile." % play)
-        for play in plays:
+    return items
-            dockerfile = pathlib2.Path(self.repo_path, "docker", "build", play, "Dockerfile")
-            if dockerfile.exists():
+def _get_role_name(role):
-                items.add(play)
+    """
-            else:
+    Resolves a role name from either a simple declaration or a dictionary style declaration.
-                logger.warning(" covered playbook '%s' does not have Dockerfile." % play)
-        return items
+    A simple declaration would look like:
+    - foo
-    def check_coverage(self, coverage):
+    A dictionary style declaration would look like:
-        """Checks which aws roles are not covered by docker plays."""
+    - role: rbenv
+      rbenv_user: "{{ forum_user }}"
+      rbenv_dir: "{{ forum_app_dir }}"
+      rbenv_ruby_version: "{{ forum_ruby_version }}"
-        logging.basicConfig(level=logging.WARNING)
+    :param role:
-        logger = logging.getLogger(__name__)
+    :return:
+    """
+    if isinstance(role, dict):
+        return role['role']
+    elif isinstance(role, basestring):
+        return role
+    else:
+        LOGGER.warning("role %s could not be resolved to a role name." % role)
+        return None
-        for role in coverage:
+def arg_parse():
-            if not coverage[role]:
+    parser = argparse.ArgumentParser(description = 'Given a commit range, analyze Ansible dependencies between roles and playbooks '
-                logger.warning(" role '%s' is not covered." % role)
+    'and output a list of Docker plays affected by this commit range via these dependencies.')
+    parser.add_argument('--verbose', help="set warnings to be displayed", action="store_true")
-    def _get_role_name(self, role):
+    return parser.parse_args()
-            """
-            Resolves a role name from either a simple declaration or a dictionary style declaration.
-            A simple declaration would look like:
+if __name__ == '__main__':
-            - foo
-            A dictionary style declaration would look like:
+    args = arg_parse()
-            - role: rbenv
-              rbenv_user: "{{ forum_user }}"
-              rbenv_dir: "{{ forum_app_dir }}"
-              rbenv_ruby_version: "{{ forum_ruby_version }}"
-            :param role:
+    # configure logging
-            :return:
+    logging.basicConfig()
-            """
-            if isinstance(role, dict):
-                return role['role']
-            elif isinstance(role, basestring):
-                return role
-            return None
-if __name__ == '__main__':
+    if not args.verbose:
-    parser = FileParser()
+        logging.disable(logging.WARNING)
+    # set of modified files in the commit range
    change_set = set()
    # read from standard in
@@ -257,24 +340,22 @@ if __name__ == '__main__':
        change_set.add(line.rstrip())
    # read config file
-    config_file_path = pathlib2.Path(parser.repo_path, "util", "parsefiles_config.yml")
+    config = _open_yaml_file(CONFIG_FILE_PATH)
-    with config_file_path.open() as config_file:
-            config = yaml.load(config_file)
-    # build grpah
+    # build graph
-    graph = parser.build_graph(parser.repo_path, config["roles_paths"], config["aws_plays_paths"], config["docker_plays_paths"])
+    graph = build_graph(TRAVIS_BUILD_DIR, config["roles_paths"], config["aws_plays_paths"], config["docker_plays_paths"])
    # transforms list of roles and plays into list of original roles and the roles contained in the plays
-    roles = parser.change_set_to_roles(change_set, parser.repo_path, config["roles_paths"], config["aws_plays_paths"], graph)
+    roles = change_set_to_roles(change_set, TRAVIS_BUILD_DIR, config["roles_paths"], config["aws_plays_paths"], graph)
    # expands roles set to include roles that are dependent on existing roles
-    dependent_roles = parser.get_dependencies(roles, graph)
+    dependent_roles = get_dependencies(roles, graph)
    # determine which docker plays cover at least one role
-    docker_plays = parser.get_docker_plays(dependent_roles, graph)
+    docker_plays = get_docker_plays(dependent_roles, graph)
    # filter out docker plays without a Dockerfile
-    docker_plays = parser.filter_docker_plays(docker_plays, parser.repo_path)
+    docker_plays = filter_docker_plays(docker_plays, TRAVIS_BUILD_DIR)
+    # prints Docker plays
    print " ".join(str(play) for play in docker_plays)