Skip to content

Commit 7fc7dfe

Browse files
committed
Add --analyze-stacks option
That using `git rev-base`, prints total number of stacks, and its average, mean and max depth At the time of submission here is top 10 ghstack uses of pytorch: ``` ezyang has 462 stacks max depth is 15 avg depth is 1.70 mean is 1 awgu has 240 stacks max depth is 28 avg depth is 4.30 mean is 1 peterbell10 has 146 stacks max depth is 7 avg depth is 1.84 mean is 1 zou3519 has 128 stacks max depth is 7 avg depth is 1.98 mean is 1 jerryzh168 has 113 stacks max depth is 16 avg depth is 1.45 mean is 1 bdhirsh has 111 stacks max depth is 7 avg depth is 1.85 mean is 2 wconstab has 108 stacks max depth is 7 avg depth is 2.15 mean is 1 SherlockNoMad has 99 stacks max depth is 4 avg depth is 1.24 mean is 1 zasdfgbnm has 80 stacks max depth is 11 avg depth is 2.52 mean is 6 desertfire has 73 stacks max depth is 3 avg depth is 1.14 mean is 1 ```
1 parent ae69de5 commit 7fc7dfe

File tree

1 file changed

+38
-2
lines changed

1 file changed

+38
-2
lines changed

analytics/github_analyze.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,12 @@ def __init__(self, path, remote='upstream'):
161161
self.repo_dir = path
162162
self.remote = remote
163163

164+
def _run_git_cmd(self, *args) -> str:
165+
return _check_output(['git', '-C', self.repo_dir] + list(args))
166+
164167
def _run_git_log(self, revision_range) -> List[GitCommit]:
165-
log = _check_output(['git', '-C', self.repo_dir, 'log',
166-
'--format=fuller', '--date=unix', revision_range, '--', '.']).split("\n")
168+
log = self._run_git_cmd('log', '--format=fuller',
169+
'--date=unix', revision_range, '--', '.').split("\n")
167170
rc: List[GitCommit] = []
168171
cur_msg: List[str] = []
169172
for line in log:
@@ -179,6 +182,18 @@ def _run_git_log(self, revision_range) -> List[GitCommit]:
179182
def get_commit_list(self, from_ref, to_ref) -> List[GitCommit]:
180183
return self._run_git_log(f"{self.remote}/{from_ref}..{self.remote}/{to_ref}")
181184

185+
def get_ghstack_orig_branches(self) -> List[str]:
186+
return [x.strip() for x in self._run_git_cmd("branch", "--remotes", "--list", self.remote + "/gh/*/orig").strip().split("\n")]
187+
188+
def show_ref(self, ref) -> str:
189+
return self._run_git_cmd("show-ref", ref).split(" ")[0]
190+
191+
def merge_base(self, ref1, ref2) -> str:
192+
return self._run_git_cmd("merge-base", ref1, ref2).strip()
193+
194+
def rev_list(self, ref):
195+
return self._run_git_cmd("rev-list", f"{self.remote}/master..{ref}").strip().split()
196+
182197

183198
def build_commit_dict(commits: List[GitCommit]) -> Dict[str, GitCommit]:
184199
rc = {}
@@ -358,6 +373,22 @@ def get_commits_dict(x, y):
358373
print(f'{html_url};{issue["title"]};{state}')
359374

360375

376+
def analyze_stacks(repo: GitRepo) -> None:
377+
from tqdm.contrib.concurrent import thread_map
378+
branches = repo.get_ghstack_orig_branches()
379+
stacks_by_author: Dict[str, List[int]] = {}
380+
for branch,rv_commits in thread_map(lambda x: (x, repo.rev_list(x)), branches, max_workers=10):
381+
author = branch.split("/")[2]
382+
if author not in stacks_by_author:
383+
stacks_by_author[author]=[]
384+
stacks_by_author[author].append(len(rv_commits))
385+
for author, slen in sorted(stacks_by_author.items(), key=lambda x:len(x[1]), reverse=True):
386+
if len(slen) == 1:
387+
print(f"{author} has 1 stack of depth {slen[0]}")
388+
continue
389+
print(f"{author} has {len(slen)} stacks max depth is {max(slen)} avg depth is {sum(slen)/len(slen):.2f} mean is {slen[len(slen)//2]}")
390+
391+
361392
def parse_arguments():
362393
from argparse import ArgumentParser
363394
parser = ArgumentParser(description="Print GitHub repo stats")
@@ -375,6 +406,7 @@ def parse_arguments():
375406
parser.add_argument("--print-reverts", action="store_true")
376407
parser.add_argument("--contributor-stats", action="store_true")
377408
parser.add_argument("--missing-in-branch", action="store_true")
409+
parser.add_argument("--analyze-stacks", action="store_true")
378410
return parser.parse_args()
379411

380412

@@ -392,6 +424,10 @@ def main():
392424

393425
repo = GitRepo(args.repo_path, remote)
394426

427+
if args.analyze_stacks:
428+
analyze_stacks(repo)
429+
return
430+
395431
if args.missing_in_branch:
396432
# Use milestone idx or search it along milestone titles
397433
try:

0 commit comments

Comments
 (0)