arti/maint/gen_md_links

141 lines
3.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Given some markdown text of the general kind we use in Arti changelogs,
look for reference-style links to MRs, issues, and commits, and generate
the appropriate https URLs for them.
Takes input either from a file, or from stdin.
Example:
./gen_md_links < new_changelog
"""
import re
import subprocess
LINK_PATTERN = re.compile(r'''
\[([^\]]*)\] # Match [, then some non-] characters, then ]...
(?!\() # But only when not followed by (.
''', re.X)
def links(s):
"""Extract unresolved markdown links from a string.
This is a naive algorithm, and could eventually be replaced
with a real markdown parsing library. But for our changelogs,
it should be fine.
>>> list(links("Hello [world]. This [is a link]"))
['world', 'is a link']
>>> list(links("This [link](is resolved)."))
[]
"""
for m in LINK_PATTERN.finditer(s):
yield m.group(1)
def is_commit(s):
"""Return true if `s` looks like a git commit.
>>> is_commit("a3bcD445")
True
>>> is_commit("xyzzy123")
False
"""
if len(s) >= 6:
try:
int(s, 16)
return True
except ValueError:
pass
return False
def lookup_git_commit(short):
"""Expand a git commit from its short version.
>>> lookup_git_commit("214c251e41")
'214c251e41a7583397cc5939b9447b89752ee323'
>>> lookup_git_commit("00000000000000")
Traceback (most recent call last):
...
ValueError: Unrecognized git commit 00000000000000
"""
p = subprocess.Popen(["git", "rev-parse", short], stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL)
p.wait()
if p.returncode != 0:
raise ValueError(f"Unrecognized git commit {short}")
return p.stdout.read().strip().decode("ascii")
class LinkType:
MergeRequest = 1
Issue = 2
Commit = 3
Other = 4
class Link:
def __init__(self, s):
self._s = s
if s.startswith("!") and s[1:].isdecimal():
self._type = LinkType.MergeRequest
self._id = int(s[1:])
elif s.startswith("#") and s[1:].isdecimal():
self._type = LinkType.Issue
self._id = int(s[1:])
elif is_commit(s):
self._type = LinkType.Commit
self._id = s.lower()
else:
self._type = LinkType.Other
self._id = s
def sort_key(self):
return (self._type, self._id)
def link(self):
if self._type == LinkType.MergeRequest:
return f"https://gitlab.torproject.org/tpo/core/arti/-/merge_requests/{self._id}"
elif self._type == LinkType.Issue:
return f"https://gitlab.torproject.org/tpo/core/arti/-/issues/{self._id}"
elif self._type == LinkType.Commit:
full_id = lookup_git_commit(self._id)
return f"https://gitlab.torproject.org/tpo/core/arti/-/commit/{full_id}"
elif self._type == LinkType.Other:
return ""
def text(self):
return "[{}]: {}\n".format(self._s, self.link())
def process(s):
"""Given a string with a bunch of markdown links in the style we use
in our changelog, generate the following material to insert in
the changelog.
>>> print(process("Hello [#123] [!456]"), end="")
[!456]: https://gitlab.torproject.org/tpo/core/arti/-/merge_requests/456
[#123]: https://gitlab.torproject.org/tpo/core/arti/-/issues/123
"""
items = sorted((Link(lnk) for lnk in set(links(s))), key=Link.sort_key)
return "".join(lnk.text() for lnk in items)
if __name__ == '__main__':
import sys
if len(sys.argv) == 1 or sys.argv[1] == "-":
in_file = sys.stdin
else:
in_file = open(sys.argv[1], 'r')
text = in_file.read()
print(process(text))