|
| 1 | +# Copyright 2025 Pex project contributors. |
| 2 | +# Licensed under the Apache License, Version 2.0 (see LICENSE). |
| 3 | + |
| 4 | +from __future__ import absolute_import |
| 5 | + |
| 6 | +import codecs |
| 7 | +import hashlib |
| 8 | +import re |
| 9 | +from collections import defaultdict |
| 10 | + |
| 11 | +from pex import hashing |
| 12 | +from pex.compatibility import PY3, url_unquote, url_unquote_plus, urlparse |
| 13 | +from pex.dist_metadata import is_wheel |
| 14 | +from pex.enum import Enum |
| 15 | +from pex.hashing import HashlibHasher |
| 16 | +from pex.tracer import TRACER |
| 17 | +from pex.typing import TYPE_CHECKING, cast |
| 18 | + |
| 19 | +if TYPE_CHECKING: |
| 20 | + from typing import ( |
| 21 | + BinaryIO, |
| 22 | + Container, |
| 23 | + DefaultDict, |
| 24 | + Dict, |
| 25 | + Iterable, |
| 26 | + List, |
| 27 | + Mapping, |
| 28 | + Optional, |
| 29 | + Sequence, |
| 30 | + Text, |
| 31 | + Tuple, |
| 32 | + Union, |
| 33 | + ) |
| 34 | + |
| 35 | + import attr # vendor:skip |
| 36 | +else: |
| 37 | + from pex.third_party import attr |
| 38 | + |
| 39 | + |
| 40 | +class VCS(Enum["VCS.Value"]): |
| 41 | + class Value(Enum.Value): |
| 42 | + pass |
| 43 | + |
| 44 | + Bazaar = Value("bzr") |
| 45 | + Git = Value("git") |
| 46 | + Mercurial = Value("hg") |
| 47 | + Subversion = Value("svn") |
| 48 | + |
| 49 | + |
| 50 | +VCS.seal() |
| 51 | + |
| 52 | + |
| 53 | +@attr.s(frozen=True) |
| 54 | +class VCSScheme(object): |
| 55 | + vcs = attr.ib() # type: VCS.Value |
| 56 | + scheme = attr.ib() # type: str |
| 57 | + |
| 58 | + |
| 59 | +class ArchiveScheme(Enum["ArchiveScheme.Value"]): |
| 60 | + class Value(Enum.Value): |
| 61 | + pass |
| 62 | + |
| 63 | + FTP = Value("ftp") |
| 64 | + HTTP = Value("http") |
| 65 | + HTTPS = Value("https") |
| 66 | + |
| 67 | + |
| 68 | +ArchiveScheme.seal() |
| 69 | + |
| 70 | + |
| 71 | +def parse_scheme(scheme): |
| 72 | + # type: (str) -> Union[str, ArchiveScheme.Value, VCSScheme] |
| 73 | + match = re.match( |
| 74 | + r""" |
| 75 | + ^ |
| 76 | + (?: |
| 77 | + (?P<archive_scheme> |
| 78 | + # Archives |
| 79 | + ftp |
| 80 | + | https? |
| 81 | + ) |
| 82 | + | |
| 83 | + (?P<vcs_type> |
| 84 | + # VCSs: https://pip.pypa.io/en/stable/reference/pip_install/#vcs-support |
| 85 | + bzr |
| 86 | + | git |
| 87 | + | hg |
| 88 | + | svn |
| 89 | + )\+(?P<vcs_scheme>.+) |
| 90 | + ) |
| 91 | + $ |
| 92 | + """, |
| 93 | + scheme, |
| 94 | + re.VERBOSE, |
| 95 | + ) |
| 96 | + if not match: |
| 97 | + return scheme |
| 98 | + |
| 99 | + archive_scheme = match.group("archive_scheme") |
| 100 | + if archive_scheme: |
| 101 | + return cast(ArchiveScheme.Value, ArchiveScheme.for_value(archive_scheme)) |
| 102 | + |
| 103 | + return VCSScheme(vcs=VCS.for_value(match.group("vcs_type")), scheme=match.group("vcs_scheme")) |
| 104 | + |
| 105 | + |
| 106 | +@attr.s(frozen=True) |
| 107 | +class Fingerprint(object): |
| 108 | + @classmethod |
| 109 | + def from_stream( |
| 110 | + cls, |
| 111 | + stream, # type: BinaryIO |
| 112 | + algorithm="sha256", # type: str |
| 113 | + ): |
| 114 | + # type: (...) -> Fingerprint |
| 115 | + digest = hashlib.new(algorithm) |
| 116 | + hashing.update_hash(filelike=stream, digest=digest) |
| 117 | + return cls(algorithm=algorithm, hash=digest.hexdigest()) |
| 118 | + |
| 119 | + @classmethod |
| 120 | + def from_digest(cls, digest): |
| 121 | + # type: (HashlibHasher) -> Fingerprint |
| 122 | + return cls.from_hashing_fingerprint(digest.hexdigest()) |
| 123 | + |
| 124 | + @classmethod |
| 125 | + def from_hashing_fingerprint(cls, fingerprint): |
| 126 | + # type: (hashing.Fingerprint) -> Fingerprint |
| 127 | + return cls(algorithm=fingerprint.algorithm, hash=fingerprint) |
| 128 | + |
| 129 | + algorithm = attr.ib() # type: str |
| 130 | + hash = attr.ib() # type: str |
| 131 | + |
| 132 | + |
| 133 | +# These ranks prefer the highest digest size and then use alphabetic order for a tie-break. |
| 134 | +RANKED_ALGORITHMS = tuple( |
| 135 | + sorted( |
| 136 | + hashlib.algorithms_guaranteed, |
| 137 | + key=lambda alg: (-hashlib.new(alg).digest_size, alg), |
| 138 | + ) |
| 139 | +) |
| 140 | + |
| 141 | + |
| 142 | +def split_requested_revision(artifact_url): |
| 143 | + # type: (ArtifactURL) -> Tuple[str, Optional[str]] |
| 144 | + vcs_url, _, requested_revision = artifact_url.normalized_url.partition("@") |
| 145 | + return vcs_url, requested_revision or None |
| 146 | + |
| 147 | + |
| 148 | +def parse_qs(query_string): |
| 149 | + # type: (str) -> Dict[str, List[str]] |
| 150 | + if PY3: |
| 151 | + return urlparse.parse_qs(query_string) |
| 152 | + else: |
| 153 | + # N.B.: Python2.7 splits parameters on `&` _and_ `;`. We only want splits on `&`. |
| 154 | + parameters = defaultdict(list) # type: DefaultDict[str, List[str]] |
| 155 | + for parameter in query_string.split("&"): |
| 156 | + raw_name, sep, raw_value = parameter.partition("=") |
| 157 | + if not sep: |
| 158 | + continue |
| 159 | + name = url_unquote_plus(raw_name) |
| 160 | + value = url_unquote_plus(raw_value) |
| 161 | + parameters[name].append(value) |
| 162 | + return parameters |
| 163 | + |
| 164 | + |
| 165 | +@attr.s(frozen=True) |
| 166 | +class ArtifactURL(object): |
| 167 | + @staticmethod |
| 168 | + def create_fragment( |
| 169 | + fragment_parameters, # type: Mapping[str, Iterable[str]] |
| 170 | + excludes=(), # type: Container[str] |
| 171 | + ): |
| 172 | + # type: (...) -> str |
| 173 | + return "&".join( |
| 174 | + sorted( |
| 175 | + "{name}={value}".format(name=name, value=value) |
| 176 | + for name, values in fragment_parameters.items() |
| 177 | + for value in values |
| 178 | + if name not in excludes |
| 179 | + ) |
| 180 | + ) |
| 181 | + |
| 182 | + @classmethod |
| 183 | + def parse(cls, url): |
| 184 | + # type: (Text) -> ArtifactURL |
| 185 | + |
| 186 | + try: |
| 187 | + codecs.encode(url, "ascii") |
| 188 | + except ValueError as e: |
| 189 | + raise ValueError( |
| 190 | + "Invalid URL:{url}\n" |
| 191 | + "URLs can only contain ASCII octets: {err}".format(url=url, err=e) |
| 192 | + ) |
| 193 | + else: |
| 194 | + raw_url = str(url) |
| 195 | + |
| 196 | + url_info = urlparse.urlparse(raw_url) |
| 197 | + scheme = parse_scheme(url_info.scheme) if url_info.scheme else "file" |
| 198 | + path = url_unquote(url_info.path) |
| 199 | + |
| 200 | + parameters = url_unquote(url_info.params) |
| 201 | + |
| 202 | + fingerprints = [] |
| 203 | + fragment_parameters = parse_qs(url_info.fragment) |
| 204 | + if fragment_parameters: |
| 205 | + # Artifact URLs from indexes may contain pre-computed hashes. We isolate those here, |
| 206 | + # centrally, if present. |
| 207 | + # See: https://peps.python.org/pep-0503/#specification |
| 208 | + for alg in RANKED_ALGORITHMS: |
| 209 | + hashes = fragment_parameters.pop(alg, None) |
| 210 | + if not hashes: |
| 211 | + continue |
| 212 | + if len(hashes) > 1 and len(set(hashes)) > 1: |
| 213 | + TRACER.log( |
| 214 | + "The artifact url contains multiple distinct hash values for the {alg} " |
| 215 | + "algorithm, not trusting any of these: {url}".format(alg=alg, url=url) |
| 216 | + ) |
| 217 | + continue |
| 218 | + fingerprints.append(Fingerprint(algorithm=alg, hash=hashes[0])) |
| 219 | + |
| 220 | + subdirectories = fragment_parameters.get("subdirectory") |
| 221 | + subdirectory = subdirectories[-1] if subdirectories else None |
| 222 | + |
| 223 | + download_url = urlparse.urlunparse( |
| 224 | + url_info._replace(fragment=cls.create_fragment(fragment_parameters)) |
| 225 | + ) |
| 226 | + normalized_url = urlparse.urlunparse( |
| 227 | + url_info._replace(path=path, params="", query="", fragment="") |
| 228 | + ) |
| 229 | + return cls( |
| 230 | + raw_url=raw_url, |
| 231 | + url_info=url_info, |
| 232 | + download_url=download_url, |
| 233 | + normalized_url=normalized_url, |
| 234 | + scheme=scheme, |
| 235 | + path=path, |
| 236 | + subdirectory=subdirectory, |
| 237 | + parameters=parameters, |
| 238 | + fragment_parameters=fragment_parameters, |
| 239 | + fingerprints=tuple(fingerprints), |
| 240 | + ) |
| 241 | + |
| 242 | + raw_url = attr.ib(eq=False) # type: str |
| 243 | + url_info = attr.ib(eq=False) # type: urlparse.ParseResult |
| 244 | + download_url = attr.ib(eq=False) # type: str |
| 245 | + normalized_url = attr.ib() # type: str |
| 246 | + scheme = attr.ib(eq=False) # type: Union[str, ArchiveScheme.Value, VCSScheme] |
| 247 | + path = attr.ib(eq=False) # type: str |
| 248 | + subdirectory = attr.ib(eq=False) # type: Optional[str] |
| 249 | + parameters = attr.ib(eq=False) # type: str |
| 250 | + fragment_parameters = attr.ib(eq=False) # type: Mapping[str, Sequence[str]] |
| 251 | + fingerprints = attr.ib(eq=False) # type: Tuple[Fingerprint, ...] |
| 252 | + |
| 253 | + def fragment(self, excludes=()): |
| 254 | + # type: (Container[str]) -> str |
| 255 | + return self.create_fragment(self.fragment_parameters, excludes=excludes) |
| 256 | + |
| 257 | + @property |
| 258 | + def is_wheel(self): |
| 259 | + # type: () -> bool |
| 260 | + return is_wheel(self.path) |
| 261 | + |
| 262 | + @property |
| 263 | + def fingerprint(self): |
| 264 | + # type: () -> Optional[Fingerprint] |
| 265 | + return self.fingerprints[0] if self.fingerprints else None |
0 commit comments