Commit b6ea11cc by MichaelRoytman Committed by GitHub

Merge pull request #3214 from edx/michael/refactor-balance

Refactor previous script for readability and commenting
parents c6d21053 333f4b0d
...@@ -26,7 +26,7 @@ pkg: docker.pkg ...@@ -26,7 +26,7 @@ pkg: docker.pkg
clean: clean:
rm -rf .build rm -rf .build
docker.test.shard: $(foreach image,$(shell echo $(images) | python util/balancecontainers.py | awk 'NR%$(SHARDS)==$(SHARD)'),$(docker_test)$(image)) docker.test.shard: $(foreach image,$(shell echo $(images) | python util/balancecontainers.py $(SHARDS) | awk 'NR%$(SHARDS)==$(SHARD)'),$(docker_test)$(image))
docker.build: $(foreach image,$(images),$(docker_build)$(image)) docker.build: $(foreach image,$(images),$(docker_build)$(image))
docker.test: $(foreach image,$(images),$(docker_test)$(image)) docker.test: $(foreach image,$(images),$(docker_test)$(image))
......
...@@ -3,57 +3,85 @@ import os ...@@ -3,57 +3,85 @@ import os
import pathlib2 import pathlib2
import itertools import itertools
import sys import sys
import argparse
import logging
class ContainerBalancer: TRAVIS_BUILD_DIR = os.environ.get("TRAVIS_BUILD_DIR")
def __init__(self): CONFIG_FILE_PATH = pathlib2.Path(TRAVIS_BUILD_DIR, "util", "parsefiles_config.yml")
self.load_repo_path() LOGGER = logging.getLogger(__name__)
def load_repo_path(self): def pack_containers(containers, num_shards):
"""Loads the path for the configuration repository from TRAVIS_BUILD_DIR environment variable.""" """
Determines an approximation of the optimal way to pack the containers into a given number of shards so as to
equalize the execution time amongst the shards.
if os.environ.get("TRAVIS_BUILD_DIR"): Input:
self.repo_path = os.environ.get("TRAVIS_BUILD_DIR") containers: A set of Docker containers
else: num_shards: A number of shards amongst which to distribute the Docker containers
raise EnvironmentError("TRAVIS_BUILD_DIR environment variable is not set.") """
def pack_containers(self, containers): # open config file containing container weights
config_file_path = pathlib2.Path(CONFIG_FILE_PATH)
num_shards = 3 with (config_file_path.open(mode='r')) as file:
try:
config = yaml.load(file)
except yaml.YAMLError, exc:
LOGGER.error("error in configuration file: %s" % str(exc))
sys.exit(1)
config_file_path = pathlib2.Path(self.repo_path, "util", "parsefiles_config.yml") # get container weights
weights = config.get("weights")
with config_file_path.open() as config_file: # convert all containers in config file to a list of tuples (<container>, <weight>)
config = yaml.load(config_file) weights_list = [x.items() for x in weights]
weights_list = list(itertools.chain.from_iterable(weights_list))
weights = config.get("weights") # performs intersection between weighted containers and input containers
used_containers = [x for x in weights_list if x[0] in containers]
weights_list = [x.items() for x in weights] # sorts used containers in descending order on the weight
weights_list = list(itertools.chain.from_iterable(weights_list)) sorted_containers = sorted(used_containers, key = lambda x: x[1], reverse=True)
used_containers = [x for x in weights_list if x[0] in containers] shards = []
sorted_containers = sorted(used_containers, key = lambda x: x[1], reverse=True) # for the number of shards
for i in range(0, num_shards):
# initialize initial dict
shards.append({"containers": [], "sum": 0})
shards = [] # for each container
for container in sorted_containers:
# find the shard with the current minimum execution time
shard = min(shards, key = lambda x: x["sum"])
for i in range(0, num_shards): # add the current container to the shard
shards.append({"containers": [], "sum": 0}) shard["containers"].append(container)
for container in sorted_containers: # add the current container's weight to the shard's total expected execution time
# shard with minimum execution time shard["sum"] += container[1]
shard = min(shards, key = lambda x: x["sum"])
shard["containers"].append(container) return shards
shard["sum"] += container[1]
return shards def arg_parse():
parser = argparse.ArgumentParser(description = 'Given a list of containers as input and a number of shards, '
'finds an approximation of the optimal distribution of the containers over the shards, provided a set of hard-coded weights '
'in parsefiles_config.yml.')
parser.add_argument('num_shards', type = int, help = "the number of shards amongst which to distribute Docker builds")
return parser.parse_args()
if __name__ == '__main__': if __name__ == '__main__':
balancer = ContainerBalancer() args = arg_parse()
# configure logging
logging.basicConfig()
containers = [] containers = []
# get containers from standard in
for line in sys.stdin: for line in sys.stdin:
line = line.strip() line = line.strip()
line = line.strip("[]") line = line.strip("[]")
...@@ -61,8 +89,10 @@ if __name__ == '__main__': ...@@ -61,8 +89,10 @@ if __name__ == '__main__':
items = line.split() items = line.split()
containers.extend(items) containers.extend(items)
shards = balancer.pack_containers(containers) # find optimal packing of the containers amongst shards
shards = pack_containers(containers, args.num_shards)
# print space separated list of containers for each shard
for shard in shards: for shard in shards:
middle = " " middle = " "
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment