salt/tests/committer_parser.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

166 lines
4.5 KiB
Python
Raw Normal View History

#!/usr/bin/python
#
# committer_parser.py
#
# Simple script to parse the output of 'git log' and generate some statistics.
# May leverage GitHub API in the future
#
2020-04-02 20:10:20 -05:00
"""
2015-03-10 10:17:33 -06:00
To use this commit parser script pipe git log into the stdin:
git log | committer_parser.py -c -
2020-04-02 20:10:20 -05:00
"""
2017-04-05 23:18:00 +01:00
# pylint: disable=resource-leakage
2015-03-10 10:17:33 -06:00
2020-04-02 20:10:20 -05:00
2015-03-09 17:16:47 -06:00
import datetime
import email.utils
import getopt
import re
import sys
2015-03-10 10:17:33 -06:00
class Usage(Exception):
2015-03-10 10:21:16 -06:00
def __init__(self, msg): # pylint: disable=W0231
2021-08-03 08:40:21 +01:00
self.msg = "committer_parser.py [-c | --contributor-detail] - | <logfilename>\n"
2015-03-09 17:16:47 -06:00
self.msg += (
" : Parse commit log from git and print number of "
"commits and unique committers\n"
2020-04-02 20:10:20 -05:00
)
2015-03-09 17:16:47 -06:00
self.msg += " : by month. Accepts a filename or reads from stdin.\n"
self.msg += (
" : -c | --contributor-detail generates output by "
"contributor, by month, in a tab-separated table\n"
2020-04-02 20:10:20 -05:00
)
2015-03-09 17:16:47 -06:00
if msg:
self.msg += "\n"
self.msg += msg
def parse_date(datestr):
d = email.utils.parsedate(datestr)
2015-03-10 10:17:33 -06:00
return datetime.datetime(d[0], d[1], d[2], d[3], d[4], d[5], d[6])
def parse_gitlog(filename=None):
2020-04-02 20:10:20 -05:00
"""
2015-03-10 10:17:33 -06:00
Parse out the gitlog cli data
2020-04-02 20:10:20 -05:00
"""
results = {}
commits = {}
commits_by_contributor = {}
if not filename or filename == "-":
fh = sys.stdin
else:
fh = open(filename, "r+", encoding="utf-8")
try:
commitcount = 0
for line in fh.readlines():
line = line.rstrip()
if line.startswith("commit "):
new_commit = True
commitcount += 1
continue
if line.startswith("Author:"):
author = re.match(r"Author:\s+(.*)\s+<(.*)>", line)
if author:
email = author.group(2)
continue
if line.startswith("Date:"):
isodate = re.match(r"Date:\s+(.*)", line)
d = parse_date(isodate.group(1))
continue
if len(line) < 2 and new_commit:
new_commit = False
key = f"{d.year}-{str(d.month).zfill(2)}"
if key not in results:
results[key] = []
if key not in commits:
commits[key] = 0
if email not in commits_by_contributor:
commits_by_contributor[email] = {}
if key not in commits_by_contributor[email]:
commits_by_contributor[email][key] = 1
else:
commits_by_contributor[email][key] += 1
if email not in results[key]:
results[key].append(email)
commits[key] += commitcount
commitcount = 0
finally:
fh.close()
return (results, commits, commits_by_contributor)
2015-03-10 10:17:33 -06:00
def counts_by_contributor(commits_by_contributor, results):
output = ""
2015-03-09 17:16:47 -06:00
dates = sorted(results.keys())
for d in dates:
output += f"\t{d}"
output += "\n"
2015-03-09 17:16:47 -06:00
for email in sorted(commits_by_contributor.keys()):
output += f"'{email}"
for d in dates:
if d in commits_by_contributor[email]:
output += f"\t{commits_by_contributor[email][d]}"
else:
output += "\t"
output += "\n"
return output
2015-03-10 10:17:33 -06:00
def count_results(results, commits):
result_str = ""
print("Date\tContributors\tCommits")
2015-03-09 17:16:47 -06:00
for k in sorted(results.keys()):
result_str += f"{k}\t{len(results[k])}\t{commits[k]}"
result_str += "\n"
return result_str
def main(argv=None):
if argv is None:
argv = sys.argv
try:
try:
2015-03-10 10:17:33 -06:00
opts, args = getopt.getopt(argv[1:], "hc", ["help", "contributor-detail"])
2015-03-09 17:16:47 -06:00
if len(args) < 1:
raise Usage(
"committer_parser.py needs a filename or '-' to read from stdin"
)
except getopt.error as msg:
2015-03-10 15:37:49 -06:00
raise Usage(msg)
2015-03-09 17:16:47 -06:00
except Usage as err:
print(err.msg, file=sys.stderr)
return 2
if len(opts) > 0:
if "-h" in opts[0] or "--help" in opts[0]:
return 0
data, counts, commits_by_contributor = parse_gitlog(filename=args[0])
if len(opts) > 0:
if "-c" or "--contributor-detail":
2015-03-09 17:16:47 -06:00
print(counts_by_contributor(commits_by_contributor, data))
else:
2015-03-09 17:16:47 -06:00
print(count_results(data, counts))
2018-12-20 16:19:01 -08:00
if __name__ == "__main__":
2015-03-10 10:17:33 -06:00
sys.exit(main())