|
1 | 1 | #!/usr/bin/env python
|
2 | 2 |
|
3 | 3 | import argparse
|
4 |
| -import tempfile |
5 | 4 | import time
|
6 | 5 |
|
7 | 6 | from os import path, makedirs
|
| 7 | +from datetime import datetime |
8 | 8 | from collections import defaultdict
|
9 | 9 | from typing import Iterator, List, Type, Dict, Set, TypeVar, Optional
|
10 |
| -from re import sub, match |
| 10 | +from re import sub, match, search |
11 | 11 | from packaging.version import parse
|
12 | 12 |
|
13 |
| -import botocore |
14 | 13 | import boto3
|
15 | 14 |
|
16 | 15 |
|
|
31 | 30 | "whl/test": "torch_test.html",
|
32 | 31 | }
|
33 | 32 |
|
| 33 | +# Should match torch-2.0.0.dev20221221+cu118-cp310-cp310-linux_x86_64.whl as: |
| 34 | +# Group 1: torch-2.0.0.dev |
| 35 | +# Group 2: 20221221 |
| 36 | +PACKAGE_DATE_REGEX = r"([a-zA-z]*-[0-9.]*.dev)([0-9]*)" |
| 37 | + |
34 | 38 | # How many packages should we keep of a specific package?
|
35 | 39 | KEEP_THRESHOLD = 60
|
36 | 40 |
|
37 | 41 | S3IndexType = TypeVar('S3IndexType', bound='S3Index')
|
38 | 42 |
|
| 43 | +def extract_package_build_time(full_package_name: str) -> datetime: |
| 44 | + result = search(PACKAGE_DATE_REGEX, full_package_name) |
| 45 | + if result is not None: |
| 46 | + return datetime.strptime(result.group(2), "%Y%M%d") |
| 47 | + return datetime.now() |
| 48 | + |
| 49 | +def between_bad_dates(package_build_time: datetime): |
| 50 | + start_bad = datetime(year=2022, month=11, day=29) |
| 51 | + end_bad = datetime(year=2022, month=12, day=29) |
| 52 | + return start_bad <= package_build_time <= end_bad |
| 53 | + |
39 | 54 |
|
40 | 55 | class S3Index:
|
41 | 56 | def __init__(self: S3IndexType, objects: List[str], prefix: str) -> None:
|
@@ -70,8 +85,10 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[str]:
|
70 | 85 | packages: Dict[str, int] = defaultdict(int)
|
71 | 86 | to_hide: Set[str] = set()
|
72 | 87 | for obj in all_sorted_packages:
|
73 |
| - package_name = path.basename(obj).split('-')[0] |
74 |
| - if packages[package_name] >= KEEP_THRESHOLD: |
| 88 | + full_package_name = path.basename(obj) |
| 89 | + package_name = full_package_name.split('-')[0] |
| 90 | + package_build_time = extract_package_build_time(full_package_name) |
| 91 | + if packages[package_name] >= KEEP_THRESHOLD or between_bad_dates(package_build_time): |
75 | 92 | to_hide.add(obj)
|
76 | 93 | else:
|
77 | 94 | packages[package_name] += 1
|
|
0 commit comments