parsefiles.py 13.5 KB
Newer Older
1 2 3 4 5 6 7
import os
import pathlib2
import logging
import yaml
import sys
import networkx as nx
from collections import namedtuple
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
import argparse

TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
DOCKER_PATH_ROOT = pathlib2.Path(TRAVIS_BUILD_DIR, "docker", "build")
CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
LOGGER = logging.getLogger(__name__)

def build_graph(git_dir, roles_dirs, aws_play_dirs, docker_play_dirs):
    """
    Builds a dependency graph that shows relationships between roles and playbooks.
    An edge [A, B], where A and B are roles, signifies that A depends on B. An edge
    [C, D], where C is a playbook and D is a role, signifies that C uses D.

    Input:
    git_dir: A path to the top-most directory in the local git repository tool is to be run in.
    roles_dirs: A list of relative paths to directories in which Ansible roles reside.
    aws_play_dirs: A list of relative paths to directories in which AWS Ansible playbooks reside.
    docker_play_dirs: A list of relative paths to directories in which Docker Ansible playbooks reside.

    """

    graph = nx.DiGraph()

    _map_roles_to_roles(graph, roles_dirs, git_dir, "dependencies", "role", "role")
    _map_plays_to_roles(graph, aws_play_dirs, git_dir, "roles", "aws_playbook", "role")
    _map_plays_to_roles(graph, docker_play_dirs, git_dir, "roles", "docker_playbook", "role")

    return graph

def _map_roles_to_roles(graph, dirs, git_dir, key, type_1, type_2):
    """
    Maps roles to the roles that they depend on.

    Input:
    graph: A networkx digraph that is used to map Ansible dependencies.
    dirs: A list of relative paths to directories in which Ansible roles reside.
    git_dir: A path to the top-most directory in the local git repository tool is to be run in.
    key: The key in a role yaml file in dirs that maps to relevant role data. In this case, key is
        "dependencies", because a role's dependent roles is of interest.
    type_1: Given edges A-B, the type of node A.
    type_2: Given edges A-B, the type of node B.
        Since this function maps roles to their dependent roles, both type_1 and type_2 are "role".
    """

    Node = namedtuple('Node', ['name', 'type'])

    # for each role directory
    for d in dirs:
        d = pathlib2.Path(git_dir, d)

        # for all files/sub-directories in directory
        for item in d.iterdir():

            # attempts to find meta/*.yml file in item directory tree
62
            roles = {f for f in item.glob("meta/*.yml")}
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123

            # if a meta/*.yml file(s) exists for a role
            if roles:
                # for each role
                for role in roles:
                    yaml_file = _open_yaml_file(role)

                    # if not an empty yaml file and key in file
                    if yaml_file is not None and key in yaml_file:
                        # for each dependent role; yaml_file["dependencies"] returns list of
                        # dependent roles
                        for dependent in yaml_file[key]:
                            # get role name of each dependent role
                            name = _get_role_name(dependent)

                            # add node for type_1, typically role
                            node_1 = Node(item.name, type_1)

                            # add node for type_2, typically dependent role
                            node_2 = Node(name, type_2)

                            # add edge, typically role - dependent role
                            graph.add_edge(node_1, node_2)

def _map_plays_to_roles(graph, dirs, git_dir, key, type_1, type_2):
    """
    Maps plays to the roles they use.

    Input:
    graph: A networkx digraph that is used to map Ansible dependencies.
    dirs: A list of relative paths to directories in which Ansible playbooks reside.
    git_dir: A path to the top-most directory in the local git repository tool is to be run in.
    key: The key in a playbook yaml file in dirs that maps to relevant playbook data. In this case, key is
        "roles", because the roles used by a playbook is of interest.
    type_1: Given edges A-B, the type of node A.
    type_2: Given edges A-B, the type of node B.
        Since this function maps plays to the roles they use, both type_1 is a type of playbook and type_2 is "role".
    """

    Node = namedtuple('Node', ['name', 'type'])

    # for each play directory
    for d in dirs:
        d = pathlib2.Path(git_dir, d)

        # for all files/sub-directories in directory
        for item in d.iterdir():

            # if item is a file ending in .yml
            if item.match("*.yml"):
                # open .yml file for playbook
                yaml_file = _open_yaml_file(item)

                # if not an empty yaml file
                if yaml_file is not None:
                    # for each play in yaml file
                    for play in yaml_file:
                        # if specified key in yaml file (e.g. "roles")
                        if key in play:
                            # for each role
                            for role in play[key]:
124
                                # get role name
125 126 127 128
                                name = _get_role_name(role)

                                #add node for type_1, typically for playbook
                                node_1 = Node(item.stem, type_1)
129

130
                                # add node for type_2, typically for role
131 132
                                node_2 = Node(name, type_2)

133 134
                                 # add edge, typically playbook - role it uses
                                graph.add_edge(node_1, node_2)
135

136 137 138 139 140 141 142 143 144 145 146 147 148
def _open_yaml_file(file_str):
    """
    Opens yaml file.

    Input:
    file_str: The path to yaml file to be opened.
    """

    with (file_str.open(mode='r')) as file:
        try:
            yaml_file = yaml.load(file)
            return yaml_file
        except yaml.YAMLError, exc:
149
            LOGGER.error("error in configuration file: %s" % str(exc))
150
            sys.exit(1)
151

152 153 154
def change_set_to_roles(files, git_dir, roles_dirs, playbooks_dirs, graph):
    """
    Converts change set consisting of a number of files to the roles that they represent/contain.
155

156 157 158 159 160 161 162
    Input:
    files: A list of files modified by a commit range.
    git_dir: A path to the top-most directory in the local git repository tool is to be run in.
    roles_dirs: A list of relative paths to directories in which Ansible roles reside.
    playbook_dirs: A list of relative paths to directories in which Ansible playbooks reside.
    graph: A networkx digraph that is used to map Ansible dependencies.
    """
163

164 165
    # set of roles
    items = set()
166

167 168 169
    # for all directories containing roles
    for role_dir in roles_dirs:
        role_dir_path = pathlib2.Path(git_dir, role_dir)
170

171
        # get all files in the directories containing roles (i.e. all the roles in that directory)
172
        candidate_files = {f for f in role_dir_path.glob("**/*")}
173

174 175 176
        # for all the files in the change set
        for f in files:
            file_path = pathlib2.Path(git_dir, f)
177

178 179 180 181
            # if the change set file is in the set of role files
            if file_path in candidate_files:
                # get name of role and add it to set of roles of the change set
                items.add(_get_resource_name(file_path, "roles"))
182

183 184 185
    # for all directories containing playbooks
    for play_dir in playbooks_dirs:
        play_dir_path = pathlib2.Path(git_dir, play_dir)
186

187 188
        # get all files in directory containing playbook that end with yml extension
        # (i.e. all playbooks in that directory)
189
        candidate_files = {f for f in play_dir_path.glob("*.yml")}
190

191 192 193
        # for all filse in the change set
        for f in files:
            file_path = pathlib2.Path(git_dir, f)
194

195 196
            # if the change set file is in teh set of playbook files
            if file_path in candidate_files:
197

198 199 200
                # gets first level of children of playbook in graph, which represents
                # all roles the playbook uses
                descendants = nx.all_neighbors(graph, (file_path.stem, "aws_playbook"))
201

202 203 204
                # adds all the roles that a playbook uses to set of roles of the change set
                items |= {desc.name for desc in descendants}
    return items
205

206 207 208
def _get_resource_name(path, kind):
    """
    Gets name of resource from the filepath, which is the directory following occurence of kind.
209

210 211 212 213 214 215 216 217 218
    Input:
    path: A path to the resource (e.g. a role or a playbook)
    kind: A description of the type of resource; this keyword precedes the name of a role or a playbook
        in a file path and allows for the separation of its name;
        e.g. for "configuration/playbooks/roles/discovery/...", kind = "roles" returns
        "discovery" as the role name
    """
    # get individual parts of a file path
    dirs = path.parts
219

220 221
    # type of resource is the next part of the file path after kind (e.g. after "roles" or "playbooks")
    return dirs[dirs.index(kind)+1]
222

223 224 225
def get_dependencies(roles, graph):
    """
    Determines all roles dependent on set of roles and returns set containing both.
226

227 228 229 230
    Input:
    roles: A set of roles.
    graph: A networkx digraph that is used to map Ansible dependencies.
    """
231

232
    items = set()
233

234 235 236
    for role in roles:
        # add the role itself
        items.add(role)
237

238 239
        # add all the roles that depend on the role
        dependents = nx.descendants(graph, (role, "role"))
240

241
        items |= {dependent.name for dependent in dependents}
242

243
    return items
244

245 246
def get_docker_plays(roles, graph):
    """Gets all docker plays that contain at least role in common with roles."""
247

248 249
    # dict to determine coverage of plays
    coverage = dict.fromkeys(roles, False)
250

251
    items = set()
252

253
    docker_plays = {node.name for node in graph.nodes() if node.type == "docker_playbook"}
254

255 256 257
    for play in docker_plays:
        # all roles that are used by play
        roles_nodes = nx.all_neighbors(graph, (play, "docker_playbook"))
258

259
        docker_roles = {role.name for role in roles_nodes}
260

261 262
        # compares roles and docker roles
        common_roles = roles & docker_roles
263

264 265 266
        # if their intersection is non-empty, add the docker role
        if common_roles:
            items.add(play)
267

268 269 270
            # each aws role that was in common is marked as being covered by a docker play
            for role in common_roles:
                coverage[role] = True
271

272 273 274 275
    # check coverage of roles
    for role in coverage:
        if not coverage[role]:
            LOGGER.warning("role '%s' is not covered." % role)
276

277
    return items
278

279 280
def filter_docker_plays(plays, repo_path):
    """Filters out docker plays that do not have a Dockerfile."""
281

282
    items = set()
283

284 285
    for play in plays:
        dockerfile = pathlib2.Path(DOCKER_PATH_ROOT, play, "Dockerfile")
286

287 288 289 290
        if dockerfile.exists():
            items.add(play)
        else:
            LOGGER.warning("covered playbook '%s' does not have Dockerfile." % play)
291

292
    return items
293

294 295 296
def _get_role_name(role):
    """
    Resolves a role name from either a simple declaration or a dictionary style declaration.
297

298 299
    A simple declaration would look like:
    - foo
300

301 302 303 304 305
    A dictionary style declaration would look like:
    - role: rbenv
      rbenv_user: "{{ forum_user }}"
      rbenv_dir: "{{ forum_app_dir }}"
      rbenv_ruby_version: "{{ forum_ruby_version }}"
306

307 308 309 310 311 312 313 314 315 316
    :param role:
    :return:
    """
    if isinstance(role, dict):
        return role['role']
    elif isinstance(role, basestring):
        return role
    else:
        LOGGER.warning("role %s could not be resolved to a role name." % role)
        return None
317

318
def arg_parse():
319

320 321 322
    parser = argparse.ArgumentParser(description = 'Given a commit range, analyze Ansible dependencies between roles and playbooks '
    'and output a list of Docker plays affected by this commit range via these dependencies.')
    parser.add_argument('--verbose', help="set warnings to be displayed", action="store_true")
323

324
    return parser.parse_args()
325

326
if __name__ == '__main__':
327

328
    args = arg_parse()
329

330 331
    # configure logging
    logging.basicConfig()
332

333
    if not args.verbose:
334
        logging.disable(logging.WARNING)
335

336
    # set of modified files in the commit range
337 338 339 340 341 342
    change_set = set()

    # read from standard in
    for line in sys.stdin:
        change_set.add(line.rstrip())

343 344 345 346 347 348 349
    # configuration file is expected to be in the following format:
    #
    # roles_paths:
    #       - <all paths relative to configuration repository that contain Ansible roles>
    # aws_plays_paths:
    #       - <all paths relative to configuration repository that contain aws Ansible playbooks>
    # docker_plays_paths:
350
    #       - <all paths relative to configuration repository that contain Docker Ansible playbooks>
351

352
    # read config file
353
    config = _open_yaml_file(CONFIG_FILE_PATH)
354

355 356
    # build graph
    graph = build_graph(TRAVIS_BUILD_DIR, config["roles_paths"], config["aws_plays_paths"], config["docker_plays_paths"])
357 358

    # transforms list of roles and plays into list of original roles and the roles contained in the plays
359
    roles = change_set_to_roles(change_set, TRAVIS_BUILD_DIR, config["roles_paths"], config["aws_plays_paths"], graph)
360 361

    # expands roles set to include roles that are dependent on existing roles
362
    dependent_roles = get_dependencies(roles, graph)
363 364

    # determine which docker plays cover at least one role
365
    docker_plays = get_docker_plays(dependent_roles, graph)
366 367

    # filter out docker plays without a Dockerfile
368
    docker_plays = filter_docker_plays(docker_plays, TRAVIS_BUILD_DIR)
369

370
    # prints Docker plays
371
    print " ".join(str(play) for play in docker_plays)