Commit 84c1e6be by MichaelRoytman Committed by GitHub

Merge pull request #3292 from edx/michael/new-dockerfiles

Michael/new dockerfiles
parents e6d47d49 3ff28203
......@@ -2,10 +2,11 @@
yml_files:=$(shell find . -name "*.yml")
json_files:=$(shell find . -name "*.json")
jinja_files:=$(shell find . -name "*.j2")
images = $(shell git diff --name-only $(TRAVIS_COMMIT_RANGE) | python util/parsefiles.py)
test: test.syntax test.edx_east_roles
test.syntax: test.syntax.yml test.syntax.json test.syntax.jinja
test.syntax: test.syntax.yml test.syntax.json test.syntax.jinja test.syntax.dockerfiles
test.syntax.yml: $(patsubst %,test.syntax.yml/%,$(yml_files))
......@@ -22,5 +23,8 @@ test.syntax.jinja: $(patsubst %,test.syntax.jinja/%,$(jinja_files))
test.syntax.jinja/%:
cd playbooks && python ../tests/jinja_check.py ../$*
test.syntax.dockerfiles:
python util/check_dockerfile_coverage.py "$(images)"
test.edx_east_roles:
tests/test_edx_east_roles.sh
# How to add Dockerfiles to configuration file
The script that handles distributing build jobs across Travis CI shards relies on the parsefiles_config YAML file. This file contains a mapping from each application that has a Dockerfile to its corresponding weight/rank. The rank refers to the approximate running time of a Travis Docker build for that application's Dockerfile. When adding a new Dockerfile to the configuration repository, this configuration file needs to be manually updated in order to ensure that the Dockerfile is also built.
To modify configuration file:
1. Edit the docker.mk file:
1. Modify docker_test to include date commands.
Replace
```$(docker_test)%: .build/%/Dockerfile.test
docker build -t $*:test -f $< .```
with
```$(docker_test)%: .build/%/Dockerfile.test
date
docker build -t $*:test -f $< .
date```
2. Replace the command that runs the dependency analyzer with a line to build your Dockerfiles.
For example, if adding Dockerfile for ecommerce, rabbit mq, replace
`images:=$(shell git diff --name-only $(TRAVIS_COMMIT_RANGE) | python util/parsefiles.py)`
with
`images:= ecommerce rabbitmq`
3. Replace the command that runs the balancing script with a line to build all images.
Replace
`docker.test.shard: $(foreach image,$(shell echo $(images) | python util/balancecontainers.py $(SHARDS) | awk 'NR%$(SHARDS)==$(SHARD)'),$(docker_test)$(image))`
with
`docker.test.shard: $(foreach image,$(shell echo $(images) | tr ' ' '\n' | awk 'NR%$(SHARDS)==$(SHARD)'),$(docker_test)$(image))`
2. Commit and push to your branch.
3. Wait for Travis CI to run the builds.
4. Upon completion, examine the Travis CI logs to find where your Dockerfile was built (search for "docker build -t"). Find the amount of time the build took by comparing the output of the date command before the build command starts and the date command after the build command completes.
4. Round build time to a whole number, and add it to the configuration/util/parsefiles_config.yml file.
5. Undo steps 1a, 1b, 1c to revert back to the original state of the docker.mk file.
6. Commit and push to your branch. Your Dockerfile should now be built as a part of the Travis CI tests.
......@@ -5,64 +5,62 @@ import itertools
import sys
import argparse
import logging
import docker_images
TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
LOGGER = logging.getLogger(__name__)
def pack_containers(containers, num_shards):
def pack_shards(used_images, num_shards):
"""
Determines an approximation of the optimal way to pack the containers into a given number of shards so as to
Determines an approximation of the optimal way to pack the images into a given number of shards so as to
equalize the execution time amongst the shards.
Input:
containers: A set of Docker containers
num_shards: A number of shards amongst which to distribute the Docker containers
used_images: A set of Docker images and their ranks
num_shards: A number of shards amongst which to distribute the Docker images
"""
# open config file containing container weights
config_file_path = pathlib2.Path(CONFIG_FILE_PATH)
with (config_file_path.open(mode='r')) as file:
try:
config = yaml.load(file)
except yaml.YAMLError, exc:
LOGGER.error("error in configuration file: %s" % str(exc))
sys.exit(1)
# get container weights
weights = config.get("weights")
# convert all containers in config file to a list of tuples (<container>, <weight>)
weights_list = [x.items() for x in weights]
weights_list = list(itertools.chain.from_iterable(weights_list))
# performs intersection between weighted containers and input containers
used_containers = [x for x in weights_list if x[0] in containers]
# sorts used containers in descending order on the weight
sorted_containers = sorted(used_containers, key = lambda x: x[1], reverse=True)
sorted_images = sorted(used_images, key = lambda x: x[1], reverse=True)
shards = []
# for the number of shards
for i in range(0, num_shards):
# initialize initial dict
shards.append({"containers": [], "sum": 0})
shards.append({"images": [], "sum": 0})
# for each container
for container in sorted_containers:
for image in sorted_images:
# find the shard with the current minimum execution time
shard = min(shards, key = lambda x: x["sum"])
# add the current container to the shard
shard["containers"].append(container)
shard["images"].append(image)
# add the current container's weight to the shard's total expected execution time
shard["sum"] += container[1]
shard["sum"] += image[1]
return shards
def read_input():
"""
Reads input from standard input.
"""
images = []
# get images from standard in
for line in sys.stdin:
line = line.strip()
line = line.strip("[]")
items = line.split()
images.extend(items)
return images
def arg_parse():
parser = argparse.ArgumentParser(description = 'Given a list of containers as input and a number of shards, '
......@@ -79,24 +77,20 @@ if __name__ == '__main__':
# configure logging
logging.basicConfig()
containers = []
# get input from standard in
images = read_input()
# get containers from standard in
for line in sys.stdin:
line = line.strip()
line = line.strip("[]")
items = line.split()
containers.extend(items)
# get images that are used and described in configuration file
used_images = docker_images.get_used_images(images)
# find optimal packing of the containers amongst shards
shards = pack_containers(containers, args.num_shards)
# find optimal packing of the images amongst shards
shards = pack_shards(used_images, args.num_shards)
# print space separated list of containers for each shard
for shard in shards:
middle = " "
conts = [x[0] for x in shard["containers"]]
conts = [x[0] for x in shard["images"]]
line = middle.join(conts)
print line
import yaml
import os
import pathlib2
import itertools
import argparse
import logging
import sys
import docker_images
TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
LOGGER = logging.getLogger(__name__)
def check_coverage(images, used_images):
"""
Checks whether all images are described in parsefiles_config.yml and raises an error otherwise, directing toward documentation to resolving the error.
Input:
images: the set of images scheduled to be built
used_images: the subset of images with their ranks that are in the parsefiles_config.yml file
"""
# determine which Dockerfiles are not covered; i.e. the set difference of the Dockerfiles to build minus the Dockerfile
# available to be built is non-empty
uncovered = set(images) - set([x[0] for x in used_images])
# exit with error code if uncovered Dockerfiles exist
if uncovered:
LOGGER.error("The following Dockerfiles are not described in the parsefiles_config.yml file: {}. Please see the following documentation on how to add Dockerfile ranks to the configuration file: {}".format(uncovered, "https://github.com/edx/configuration/blob/master/util/README.md"))
sys.exit(1)
def arg_parse():
parser = argparse.ArgumentParser(description = 'Given a list of images as input checks that each input image is described correctly in parsefiles_config.yml')
parser.add_argument('images', help = "the Dockerfiles that need to be built as the result of some commit change and whose coverage is checked")
return parser.parse_args()
if __name__ == '__main__':
args = arg_parse()
# configure logging
logging.basicConfig()
# read input
images = []
for i in args.images.split():
images.append(i)
# get images that are used and described in configuration file
used_images = docker_images.get_used_images(images)
check_coverage(images, used_images)
import yaml
import os
import pathlib2
import itertools
import sys
TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
def get_used_images(images):
"""
Returns the images and their ranks that are scheduled to be built and that exist in the configuration file.
Input:
images: A set of Docker images
"""
# open config file containing container weights
config_file_path = pathlib2.Path(CONFIG_FILE_PATH)
with (config_file_path.open(mode='r')) as file:
try:
config = yaml.load(file)
except yaml.YAMLError, exc:
LOGGER.error("error in configuration file: %s" % str(exc))
sys.exit(1)
# get container weights
weights = config.get("weights")
# convert all images in config file to a list of tuples (<image>, <weight>)
weights_list = [x.items() for x in weights]
weights_list = list(itertools.chain.from_iterable(weights_list))
# performs intersection between weighted images and input images
return [x for x in weights_list if x[0] in images]
......@@ -17,4 +17,6 @@ weights:
- nginx: 1
- xqueue: 2
- trusty-common: 5
- precise-common: 4
\ No newline at end of file
- precise-common: 4
- ecommerce: 6
- rabbitmq: 2
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment