Merge pull request #3292 from edx/michael/new-dockerfiles

Michael/new dockerfiles

Merge pull request #3292 from edx/michael/new-dockerfiles
Michael/new dockerfiles
84c1e6be · MichaelRoytman · GitHub · e6d47d49 · 3ff28203 · 84c1e6be
Commit 84c1e6be authored Aug 12, 2016 by MichaelRoytman Committed by GitHub Aug 12, 2016
Hide whitespace changes
Inline Side-by-side

Showing with 184 additions and 43 deletions

test.mk
+5 -1

util/README.md
+52 -0

util/balancecontainers.py
+34 -40

util/check_dockerfile_coverage.py
+54 -0

util/docker_images.py
+36 -0

util/parsefiles_config.yml
+3 -2

No files found.
--- a/test.mk
+++ b/test.mk
@@ -2,10 +2,11 @@
 yml_files:=$(shell find . -name "*.yml")
 json_files:=$(shell find . -name "*.json")
 jinja_files:=$(shell find . -name "*.j2")
+images = $(shell git diff --name-only $(TRAVIS_COMMIT_RANGE) | python util/parsefiles.py)
 test: test.syntax test.edx_east_roles
-test.syntax: test.syntax.yml test.syntax.json test.syntax.jinja
+test.syntax: test.syntax.yml test.syntax.json test.syntax.jinja test.syntax.dockerfiles
 test.syntax.yml: $(patsubst %,test.syntax.yml/%,$(yml_files))
@@ -22,5 +23,8 @@ test.syntax.jinja: $(patsubst %,test.syntax.jinja/%,$(jinja_files))
 test.syntax.jinja/%:
 	cd playbooks && python ../tests/jinja_check.py ../$*
+test.syntax.dockerfiles:
+	python util/check_dockerfile_coverage.py "$(images)"
 test.edx_east_roles:
 	tests/test_edx_east_roles.sh
--- a/util/README.md
+++ b/util/README.md
+# How to add Dockerfiles to configuration file
+The script that handles distributing build jobs across Travis CI shards relies on the parsefiles_config YAML file. This file contains a mapping from each application that has a Dockerfile to its corresponding weight/rank. The rank refers to the approximate running time of a Travis Docker build for that application's Dockerfile. When adding a new Dockerfile to the configuration repository, this configuration file needs to be manually updated in order to ensure that the Dockerfile is also built.
+To modify configuration file:
+1. Edit the docker.mk file:
+  1. Modify docker_test to include date commands.
+    Replace 
+    ```$(docker_test)%: .build/%/Dockerfile.test
+        docker build -t $*:test -f $< .```
+    with
+    ```$(docker_test)%: .build/%/Dockerfile.test
+        date
+        docker build -t $*:test -f $< .
+        date```
+  2. Replace the command that runs the dependency analyzer with a line to build your Dockerfiles.
+    For example, if adding Dockerfile for ecommerce, rabbit mq, replace
+    `images:=$(shell git diff --name-only $(TRAVIS_COMMIT_RANGE) | python util/parsefiles.py)`
+    with
+    `images:= ecommerce rabbitmq`
+  3. Replace the command that runs the balancing script with a line to build all images.
+    Replace
+    `docker.test.shard: $(foreach image,$(shell echo $(images) | python util/balancecontainers.py $(SHARDS) | awk 'NR%$(SHARDS)==$(SHARD)'),$(docker_test)$(image))`
+    with
+    `docker.test.shard: $(foreach image,$(shell echo $(images) | tr ' ' '\n' | awk 'NR%$(SHARDS)==$(SHARD)'),$(docker_test)$(image))`
+2. Commit and push to your branch.
+3. Wait for Travis CI to run the builds.
+4. Upon completion, examine the Travis CI logs to find where your Dockerfile was built (search for "docker build -t"). Find the amount of time the build took by comparing the output of the date command before the build command starts and the date command after the build command completes.
+4. Round build time to a whole number, and add it to the configuration/util/parsefiles_config.yml file.
+5. Undo steps 1a, 1b, 1c to revert back to the original state of the docker.mk file.
+6. Commit and push to your branch. Your Dockerfile should now be built as a part of the Travis CI tests.
--- a/util/balancecontainers.py
+++ b/util/balancecontainers.py
@@ -5,64 +5,62 @@ import itertools
 import sys
 import argparse
 import logging
+import docker_images
 TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
 CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
 LOGGER = logging.getLogger(__name__)
-def pack_containers(containers, num_shards):
+def pack_shards(used_images, num_shards):
    """
-    Determines an approximation of the optimal way to pack the containers into a given number of shards so as to
+    Determines an approximation of the optimal way to pack the images into a given number of shards so as to
    equalize the execution time amongst the shards.
    Input:
-    containers: A set of Docker containers
+    used_images: A set of Docker images and their ranks
-    num_shards: A number of shards amongst which to distribute the Docker containers
+    num_shards: A number of shards amongst which to distribute the Docker images
    """
-    # open config file containing container weights
-    config_file_path = pathlib2.Path(CONFIG_FILE_PATH)
-    with (config_file_path.open(mode='r')) as file:
-        try:
-            config = yaml.load(file)
-        except yaml.YAMLError, exc:
-            LOGGER.error("error in configuration file: %s" % str(exc))
-            sys.exit(1)
-    # get container weights
-    weights = config.get("weights")
-    # convert all containers in config file to a list of tuples (<container>, <weight>)
-    weights_list = [x.items() for x in weights]
-    weights_list = list(itertools.chain.from_iterable(weights_list))
-    # performs intersection between weighted containers and input containers
-    used_containers = [x for x in weights_list if x[0] in containers]
    # sorts used containers in descending order on the weight
-    sorted_containers = sorted(used_containers, key = lambda x: x[1], reverse=True) 
+    sorted_images = sorted(used_images, key = lambda x: x[1], reverse=True) 
    shards = []
    # for the number of shards
    for i in range(0, num_shards):
        # initialize initial dict
-        shards.append({"containers": [], "sum": 0})
+        shards.append({"images": [], "sum": 0})
    # for each container
-    for container in sorted_containers:
+    for image in sorted_images:
        # find the shard with the current minimum execution time
        shard = min(shards, key = lambda x: x["sum"])
        # add the current container to the shard
-        shard["containers"].append(container)
+        shard["images"].append(image)
        # add the current container's weight to the shard's total expected execution time
-        shard["sum"] += container[1]
+        shard["sum"] += image[1]
    return shards
+def read_input():
+    """
+    Reads input from standard input.
+    """
+    images = []
+    # get images from standard in
+    for line in sys.stdin:
+        line = line.strip()
+        line = line.strip("[]")
+        items = line.split()
+        images.extend(items)
+    return images
 def arg_parse():
    parser = argparse.ArgumentParser(description = 'Given a list of containers as input and a number of shards, '
@@ -79,24 +77,20 @@ if __name__ == '__main__':
    # configure logging
    logging.basicConfig()
-    containers = []
+    # get input from standard in
+    images = read_input()
-    # get containers from standard in
+    # get images that are used and described in configuration file
-    for line in sys.stdin:
+    used_images = docker_images.get_used_images(images)
-        line = line.strip()
-        line = line.strip("[]")
-        items = line.split()
-        containers.extend(items)
-    # find optimal packing of the containers amongst shards
+    # find optimal packing of the images amongst shards
-    shards = pack_containers(containers, args.num_shards)
+    shards = pack_shards(used_images, args.num_shards)
    # print space separated list of containers for each shard
    for shard in shards:
        middle = " "
-        conts = [x[0] for x in shard["containers"]]
+        conts = [x[0] for x in shard["images"]]
        line = middle.join(conts)
        print line
--- a/util/check_dockerfile_coverage.py
+++ b/util/check_dockerfile_coverage.py
+import yaml
+import os
+import pathlib2
+import itertools
+import argparse
+import logging
+import sys
+import docker_images
+TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
+CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
+LOGGER = logging.getLogger(__name__)
+def check_coverage(images, used_images):
+    """
+    Checks whether all images are described in parsefiles_config.yml and raises an error otherwise, directing toward documentation to resolving the error.
+    Input:
+    images: the set of images scheduled to be built
+    used_images: the subset of images with their ranks that are in the parsefiles_config.yml file
+    """
+    # determine which Dockerfiles are not covered; i.e. the set difference of the Dockerfiles to build minus the Dockerfile
+    # available to be built is non-empty
+    uncovered = set(images) - set([x[0] for x in used_images])
+    # exit with error code if uncovered Dockerfiles exist
+    if uncovered:
+        LOGGER.error("The following Dockerfiles are not described in the parsefiles_config.yml file: {}. Please see the following documentation on how to add Dockerfile ranks to the configuration file: {}".format(uncovered, "https://github.com/edx/configuration/blob/master/util/README.md"))
+        sys.exit(1)
+def arg_parse():
+    parser = argparse.ArgumentParser(description = 'Given a list of images as input checks that each input image is described correctly in parsefiles_config.yml')
+    parser.add_argument('images', help = "the Dockerfiles that need to be built as the result of some commit change and whose coverage is checked")
+    return parser.parse_args()
+if __name__ == '__main__':
+    args = arg_parse()
+    # configure logging
+    logging.basicConfig()
+    # read input
+    images = []
+    for i in args.images.split():
+        images.append(i)
+    # get images that are used and described in configuration file
+    used_images = docker_images.get_used_images(images)
+    check_coverage(images, used_images)
--- a/util/docker_images.py
+++ b/util/docker_images.py
+import yaml
+import os
+import pathlib2
+import itertools
+import sys
+TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
+CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
+def get_used_images(images):
+    """
+    Returns the images and their ranks that are scheduled to be built and that exist in the configuration file.
+    Input:
+    images: A set of Docker images
+    """
+    # open config file containing container weights
+    config_file_path = pathlib2.Path(CONFIG_FILE_PATH)
+    with (config_file_path.open(mode='r')) as file:
+        try:
+            config = yaml.load(file)
+        except yaml.YAMLError, exc:
+            LOGGER.error("error in configuration file: %s" % str(exc))
+            sys.exit(1)
+    # get container weights
+    weights = config.get("weights")
+    # convert all images in config file to a list of tuples (<image>, <weight>)
+    weights_list = [x.items() for x in weights]
+    weights_list = list(itertools.chain.from_iterable(weights_list))
+    # performs intersection between weighted images and input images
+    return [x for x in weights_list if x[0] in images]
--- a/util/parsefiles_config.yml
+++ b/util/parsefiles_config.yml
@@ -17,4 +17,6 @@ weights:
  - nginx: 1
  - xqueue: 2
  - trusty-common: 5
  - precise-common: 4
\ No newline at end of file
+  - ecommerce: 6 
+  - rabbitmq: 2