Commit e1e87e1a by David Baumgold

Merge pull request #3570 from edx/db/pr-table-for-release

Make release table by PR, instead of by commit
parents 1a9b5055 b3190865
...@@ -2,30 +2,74 @@ ...@@ -2,30 +2,74 @@
""" """
a release-master multitool a release-master multitool
""" """
from __future__ import print_function, unicode_literals
import sys
from path import path from path import path
from git import Repo from git import Repo, Commit
from git.refs.symbolic import SymbolicReference
import argparse import argparse
from datetime import date, timedelta from datetime import date, timedelta
from dateutil.parser import parse as parse_datestring from dateutil.parser import parse as parse_datestring
import re import re
from collections import OrderedDict import collections
import functools
import textwrap import textwrap
import requests
import json
import getpass
try:
from pygments.console import colorize
except ImportError:
colorize = lambda color, text: text
IGNORED_EMAILS = set(("vagrant@precise32.(none)",))
JIRA_RE = re.compile(r"\b[A-Z]{2,}-\d+\b") JIRA_RE = re.compile(r"\b[A-Z]{2,}-\d+\b")
PR_BRANCH_RE = re.compile(r"remotes/edx/pr/(\d+)")
PROJECT_ROOT = path(__file__).abspath().dirname() PROJECT_ROOT = path(__file__).abspath().dirname()
repo = Repo(PROJECT_ROOT) repo = Repo(PROJECT_ROOT)
git = repo.git git = repo.git
class memoized(object):
"""
Decorator. Caches a function's return value each time it is called.
If called later with the same arguments, the cached value is returned
(not reevaluated).
https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize
"""
def __init__(self, func):
self.func = func
self.cache = {}
def __call__(self, *args):
if not isinstance(args, collections.Hashable):
# uncacheable. a list, for instance.
# better to not cache than blow up.
return self.func(*args)
if args in self.cache:
return self.cache[args]
else:
value = self.func(*args)
self.cache[args] = value
return value
def __repr__(self):
'''Return the function's docstring.'''
return self.func.__doc__
def __get__(self, obj, objtype):
'''Support instance methods.'''
return functools.partial(self.__call__, obj)
def make_parser(): def make_parser():
parser = argparse.ArgumentParser(description="release master multitool") parser = argparse.ArgumentParser(description="release master multitool")
parser.add_argument( parser.add_argument(
'--previous', '--prev', '-p', metavar="GITREV", default="origin/release", '--previous', '--prev', '-p', metavar="GITREV", default="edx/release",
help="previous release [origin/release]") help="previous release [%(default)s]")
parser.add_argument( parser.add_argument(
'--current', '--curr', '-c', metavar="GITREV", default="HEAD", '--current', '--curr', '-c', metavar="GITREV", default="HEAD",
help="current release candidate [HEAD]") help="current release candidate [%(default)s]")
parser.add_argument( parser.add_argument(
'--date', '-d', '--date', '-d',
help="expected release date: defaults to " help="expected release date: defaults to "
...@@ -39,6 +83,132 @@ def make_parser(): ...@@ -39,6 +83,132 @@ def make_parser():
return parser return parser
def ensure_pr_fetch():
"""
Make sure that the git repository contains a remote called "edx" that has
two fetch URLs; one for the main codebase, and one for pull requests.
Returns True if the environment was modified in any way, False otherwise.
"""
modified = False
remotes = git.remote().splitlines()
if not "edx" in remotes:
git.remote("add", "edx", "https://github.com/edx/edx-platform.git")
modified = True
# it would be nice to use the git-python API to do this, but it doesn't seem
# to support configurations with more than one value per key. :(
edx_fetches = git.config("remote.edx.fetch", get_all=True).splitlines()
pr_fetch = '+refs/pull/*/head:refs/remotes/edx/pr/*'
if pr_fetch not in edx_fetches:
git.config("remote.edx.fetch", pr_fetch, add=True)
git.fetch("edx")
modified = True
return modified
def get_github_creds():
"""
Returns Github credentials if they exist, as a two-tuple of (username, token).
Otherwise, return None.
"""
netrc_auth = requests.utils.get_netrc_auth("https://api.github.com")
if netrc_auth:
return netrc_auth
config_file = path("~/.config/edx-release").expand()
if config_file.isfile():
with open(config_file) as f:
config = json.load(f)
github_creds = config.get("credentials", {}).get("api.github.com", {})
username = github_creds.get("username", "")
token = github_creds.get("token", "")
if username and token:
return (username, token)
return None
def create_github_creds():
"""
https://developer.github.com/v3/oauth_authorizations/#create-a-new-authorization
"""
headers = {"User-Agent": "edx-release"}
payload = {"note": "edx-release"}
username = raw_input("Github username: ")
password = getpass.getpass("Github password: ")
response = requests.post(
"https://api.github.com/authorizations",
auth=(username, password),
headers=headers, data=json.dumps(payload),
)
# is the user using two-factor authentication?
otp_header = response.headers.get("X-GitHub-OTP")
if not response.ok and otp_header and otp_header.startswith("required;"):
# get two-factor code, redo the request
headers["X-GitHub-OTP"] = raw_input("Two-factor authentication code: ")
response = requests.post(
"https://api.github.com/authorizations",
auth=(username, password),
headers=headers, data=json.dumps(payload),
)
if not response.ok:
raise requests.exceptions.RequestException(response.json()["message"])
return (username, response.json()["token"])
def ensure_github_creds(attempts=3):
"""
Make sure that we have Github OAuth credentials. This will check the user's
.netrc file, as well as the ~/.config/edx-release file. If no credentials
exist in either place, it will prompt the user to create OAuth credentials,
and store them in ~/.config/edx-release.
Returns False if we found credentials, True if we had to create them.
"""
if get_github_creds():
return False
# Looks like we need to create the OAuth creds
print("We need to set up OAuth authentication with Github's API. "
"Your password will not be stored.", file=sys.stderr)
token = None
for _ in range(attempts):
try:
username, token = create_github_creds()
except requests.exceptions.RequestException as e:
print(
"Invalid authentication: {}".format(e.message),
file=sys.stderr,
)
continue
else:
break
if token:
print("Successfully authenticated to Github", file=sys.stderr)
if not token:
print("Too many invalid authentication attempts.", file=sys.stderr)
return False
config_file = path("~/.config/edx-release").expand()
# make sure parent directory exists
config_file.parent.makedirs_p()
# read existing config if it exists
if config_file.isfile():
with open(config_file) as f:
config = json.load(f)
else:
config = {}
# update config
if not "credentials" in config:
config["credentials"] = {}
if not "api.github.com" in config["credentials"]:
config["credentials"]["api.github.com"] = {}
config["credentials"]["api.github.com"]["username"] = username
config["credentials"]["api.github.com"]["token"] = token
# write it back out
with open(config_file, "w") as f:
json.dump(config, f)
return True
def default_release_date(): def default_release_date():
""" """
Returns a date object corresponding to the expected date of the next release: Returns a date object corresponding to the expected date of the next release:
...@@ -58,83 +228,202 @@ def parse_ticket_references(text): ...@@ -58,83 +228,202 @@ def parse_ticket_references(text):
return JIRA_RE.findall(text) return JIRA_RE.findall(text)
def emails(commit_range): class DoesNotExist(Exception):
def __init__(self, message, commit, branch):
self.message = message
self.commit = commit
self.branch = branch
def get_merge_commit(commit, branch="master"):
""" """
Returns a set of all email addresses responsible for the commits between Given a commit that was merged into the given branch, return the merge commit
the two commit references. for that event.
http://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit
""" """
# %ae prints the authored_by email for the commit commit_range = "{}..{}".format(commit, branch)
# %n prints a newline ancestry_paths = git.rev_list(commit_range, ancestry_path=True).splitlines()
# %ce prints the committed_by email for the commit first_parents = git.rev_list(commit_range, first_parent=True).splitlines()
emails = set(git.log(commit_range, format='%ae%n%ce').splitlines()) both = set(ancestry_paths) & set(first_parents)
return emails - IGNORED_EMAILS for commit_hash in reversed(ancestry_paths):
if commit_hash in both:
return repo.commit(commit_hash)
# no merge commit!
msg = "No merge commit for {commit} in {branch}!".format(
commit=commit, branch=branch,
)
raise DoesNotExist(msg, commit, branch)
def commits_by_email(commit_range, include_merge=False): def get_pr_info(num):
""" """
Return a ordered dictionary of {email: commit_list} Returns the info from the Github API
"""
url = "https://api.github.com/repos/edx/edx-platform/pulls/{num}".format(num=num)
username, token = get_github_creds()
headers = {
"Authorization": "token {}".format(token),
"User-Agent": "edx-release",
}
response = requests.get(url, headers=headers)
result = response.json()
if not response.ok:
raise requests.exceptions.RequestException(result["message"])
return result
def get_merged_prs(start_ref, end_ref):
"""
Return the set of all pull requests (as integers) that were merged between
the start_ref and end_ref.
"""
ensure_pr_fetch()
start_unmerged_branches = set(
branch.strip() for branch in
git.branch(all=True, no_merged=start_ref).splitlines()
)
end_merged_branches = set(
branch.strip() for branch in
git.branch(all=True, merged=end_ref).splitlines()
)
merged_between_refs = start_unmerged_branches & end_merged_branches
merged_prs = set()
for branch in merged_between_refs:
match = PR_BRANCH_RE.search(branch)
if match:
merged_prs.add(int(match.group(1)))
return merged_prs
@memoized
def prs_by_email(start_ref, end_ref):
"""
Returns an ordered dictionary of {email: pr_list}
Email is the email address of the person who merged the pull request
The dictionary is alphabetically ordered by email address The dictionary is alphabetically ordered by email address
The commit list is ordered by commit author date The pull request list is ordered by merge date
""" """
kwargs = {} unordered_data = collections.defaultdict(set)
if not include_merge: for pr_num in get_merged_prs(start_ref, end_ref):
kwargs["no-merges"] = True ref = "refs/remotes/edx/pr/{num}".format(num=pr_num)
branch = SymbolicReference(repo, ref)
try:
merge = get_merge_commit(branch.commit, end_ref)
except DoesNotExist:
pass # this commit will be included in the commits_without_prs table
else:
unordered_data[merge.author.email].add((pr_num, merge))
data = OrderedDict() ordered_data = collections.OrderedDict()
for email in sorted(emails(commit_range)): for email in sorted(unordered_data.keys()):
authored_commits = set(repo.iter_commits( ordered = sorted(unordered_data[email], key=lambda pair: pair[1].authored_date)
commit_range, author=email, **kwargs ordered_data[email] = [num for num, merge in ordered]
return ordered_data
def generate_pr_table(start_ref, end_ref):
"""
Return a string corresponding to a pull request table to embed in Confluence
"""
header = "|| Merged By || Author || Title || PR || JIRA || Verified? ||"
pr_link = "[#{num}|https://github.com/edx/edx-platform/pull/{num}]"
user_link = "[@{user}|https://github.com/{user}]"
rows = [header]
prbe = prs_by_email(start_ref, end_ref)
for email, pull_requests in prbe.items():
for i, pull_request in enumerate(pull_requests):
try:
pr_info = get_pr_info(pull_request)
title = pr_info["title"] or ""
body = pr_info["body"] or ""
author = pr_info["user"]["login"]
except requests.exceptions.RequestException as e:
message = (
"Warning: could not fetch data for #{num}: "
"{message}".format(num=pull_request, message=e.message)
)
print(colorize("red", message), file=sys.stderr)
title = "?"
body = "?"
author = ""
rows.append("| {merged_by} | {author} | {title} | {pull_request} | {jira} | {verified} |".format(
merged_by=email if i == 0 else "",
author=user_link.format(user=author) if author else "",
title=title.replace("|", "\|"),
pull_request=pr_link.format(num=pull_request),
jira=", ".join(parse_ticket_references(body)),
verified="",
)) ))
committed_commits = set(repo.iter_commits( return "\n".join(rows)
commit_range, committer=email, **kwargs
@memoized
def get_commits_not_in_prs(start_ref, end_ref):
"""
Return a tuple of commits that exist between start_ref and end_ref,
but were not merged to the end_ref. If everyone is following the
pull request process correctly, this should return an empty tuple.
"""
return tuple(Commit.iter_items(
repo,
"{start}..{end}".format(start=start_ref, end=end_ref),
first_parent=True, no_merges=True,
)) ))
commits = authored_commits | committed_commits
data[email] = sorted(commits, key=lambda c: c.authored_date)
return data
def generate_table(commit_range, include_merge=False): def generate_commit_table(start_ref, end_ref):
""" """
Return a string corresponding to a commit table to embed in Confluence Return a string corresponding to a commit table to embed in Comfluence.
The commits in the table should only be commits that are not in the
pull request table.
""" """
header = u"||Author||Summary||Commit||JIRA||Verified?||" header = "|| Author || Summary || Commit || JIRA || Verified? ||"
commit_link = "[commit|https://github.com/edx/edx-platform/commit/{sha}]" commit_link = "[commit|https://github.com/edx/edx-platform/commit/{sha}]"
rows = [header] rows = [header]
cbe = commits_by_email(commit_range, include_merge) commits = get_commits_not_in_prs(start_ref, end_ref)
for email, commits in cbe.items(): for commit in commits:
for i, commit in enumerate(commits): rows.append("| {author} | {summary} | {commit} | {jira} | {verified} |".format(
rows.append(u"| {author} | {summary} | {commit} | {jira} | {verified} |".format( author=commit.author.email,
author=email if i == 0 else "",
summary=commit.summary.replace("|", "\|"), summary=commit.summary.replace("|", "\|"),
commit=commit_link.format(sha=commit.hexsha), commit=commit_link.format(sha=commit.hexsha),
jira=", ".join(parse_ticket_references(commit.message)), jira=", ".join(parse_ticket_references(commit.message)),
verified="", verified="",
)) ))
return u"\n".join(rows) return "\n".join(rows)
def generate_email(commit_range, release_date=None): def generate_email(start_ref, end_ref, release_date=None):
""" """
Returns a string roughly approximating an email. Returns a string roughly approximating an email.
""" """
if release_date is None: if release_date is None:
release_date = default_release_date() release_date = default_release_date()
prbe = prs_by_email(start_ref, end_ref)
email = """ email = """
To: {emails} To: {emails}
You've made changes that are about to be released. All of the commits You merged at least one pull request for edx-platform that is going out
that you either authored or committed are listed below. Please verify them on in this upcoming release, and you are responsible for verifying those
stage.edx.org and stage-edge.edx.org. changes on the staging servers before the code is released. Please go
to the release page to do so:
https://edx-wiki.atlassian.net/wiki/display/ENG/Release+Page%3A+{date}
Please record your notes on https://edx-wiki.atlassian.net/wiki/display/ENG/Release+Page%3A+{date} The staging servers are:
and add any bugs found to the Release Candidate Bugs section.
If you are a non-affiliated open-source contributor to edx-platform, https://www.stage.edx.org
the edX employee who merged in your pull request will manually verify https://stage-edge.edx.org
your change(s), and you may disregard this message.
Note that you are responsible for verifying any pull requests that you
merged, whether you wrote the code or not. (If you didn't write the code,
you can and should try to get the person who wrote the code to help
verify the changes -- but even if you can't, you're still responsible!)
If you find any bugs, please notify me and record the bugs on the
release page. Thanks!
""".format( """.format(
emails=", ".join(sorted(emails(commit_range))), emails=", ".join(prbe.keys()),
date=release_date.isoformat(), date=release_date.isoformat(),
) )
return textwrap.dedent(email).strip() return textwrap.dedent(email).strip()
...@@ -146,14 +435,15 @@ def main(): ...@@ -146,14 +435,15 @@ def main():
if isinstance(args.date, basestring): if isinstance(args.date, basestring):
# user passed in a custom date, so we need to parse it # user passed in a custom date, so we need to parse it
args.date = parse_datestring(args.date).date() args.date = parse_datestring(args.date).date()
commit_range = "{0}..{1}".format(args.previous, args.current)
ensure_github_creds()
if args.table: if args.table:
print(generate_table(commit_range, include_merge=args.merge)) print(generate_pr_table(args.previous, args.current))
return return
print("EMAIL:") print("EMAIL:")
print(generate_email(commit_range, release_date=args.date).encode('UTF-8')) print(generate_email(args.previous, args.current, release_date=args.date).encode('UTF-8'))
print("\n") print("\n")
print("Wiki Table:") print("Wiki Table:")
print( print(
...@@ -161,7 +451,22 @@ def main(): ...@@ -161,7 +451,22 @@ def main():
"in your release wiki page" "in your release wiki page"
) )
print("\n") print("\n")
print(generate_table(commit_range, include_merge=args.merge).encode('UTF-8')) print(generate_pr_table(args.previous, args.current))
commits_without_prs = get_commits_not_in_prs(args.previous, args.current)
if commits_without_prs:
num = len(commits_without_prs)
plural = num > 1
print("\n")
print(
"There {are} {num} {commits} in this release that did not come in "
"through pull requests!".format(
num=num, are="are" if plural else "is",
commits="commits" if plural else "commit"
)
)
print("\n")
print(generate_commit_table(args.previous, args.current))
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment