Skip to content

Commit f9a9d94

Browse files
authored
Migrate many types from str to Text. (#2263)
This supports an up coming refactor of `pex.{dist_metadata,pep_376}` which also requires re-formulating the pyc filters from `Iterable` -> `Iterable` form to boolean predicates to make typing easier at the loss of ~nothing.
1 parent 89842f7 commit f9a9d94

File tree

16 files changed

+156
-130
lines changed

16 files changed

+156
-130
lines changed

pex/bin/pex.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
global_environment,
2424
register_global_arguments,
2525
)
26-
from pex.common import die, filter_pyc_dirs, filter_pyc_files, safe_mkdtemp
26+
from pex.common import die, is_pyc_dir, is_pyc_file, safe_mkdtemp
2727
from pex.enum import Enum
2828
from pex.inherit_path import InheritPath
2929
from pex.interpreter_constraints import InterpreterConstraints
@@ -48,7 +48,7 @@
4848

4949
if TYPE_CHECKING:
5050
from argparse import Namespace
51-
from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
51+
from typing import Dict, Iterable, Iterator, List, Optional, Set, Text, Tuple
5252

5353
import attr # vendor:skip
5454

@@ -495,7 +495,7 @@ def parse(cls, name):
495495
subdir = attr.ib(default=None) # type: Optional[str]
496496

497497
def iter_files(self):
498-
# type: () -> Iterator[Tuple[str, str]]
498+
# type: () -> Iterator[Tuple[Text, Text]]
499499
components = self.name.split(".")
500500
parent_package_dirs = components[:-1]
501501
source = components[-1]
@@ -520,7 +520,7 @@ def _iter_source_files(
520520
parent_package_path, # type: List[str]
521521
source, # type: str
522522
):
523-
# type: (...) -> Iterator[Tuple[str, str]]
523+
# type: (...) -> Iterator[Tuple[Text, Text]]
524524
raise NotImplementedError()
525525

526526

@@ -530,11 +530,13 @@ def _iter_source_files(
530530
parent_package_path, # type: List[str]
531531
source, # type: str
532532
):
533-
# type: (...) -> Iterator[Tuple[str, str]]
533+
# type: (...) -> Iterator[Tuple[Text, Text]]
534534
package_dir = os.path.join(*(parent_package_path + [source]))
535535
for root, dirs, files in os.walk(package_dir):
536-
dirs[:] = list(filter_pyc_dirs(dirs))
537-
for f in filter_pyc_files(files):
536+
dirs[:] = [d for d in dirs if not is_pyc_dir(d)]
537+
for f in files:
538+
if is_pyc_file(f):
539+
continue
538540
src = os.path.join(root, f)
539541
dst = os.path.relpath(src, self.subdir) if self.subdir else src
540542
yield src, dst
@@ -546,7 +548,7 @@ def _iter_source_files(
546548
parent_package_path, # type: List[str]
547549
source, # type: str
548550
):
549-
# type: (...) -> Iterator[Tuple[str, str]]
551+
# type: (...) -> Iterator[Tuple[Text, Text]]
550552
module_src = os.path.join(*(parent_package_path + ["{module}.py".format(module=source)]))
551553
module_dest = os.path.relpath(module_src, self.subdir) if self.subdir else module_src
552554
yield module_src, module_dest
@@ -726,7 +728,7 @@ def _iter_directory_sources(directories):
726728

727729

728730
def _iter_python_sources(python_sources):
729-
# type: (Iterable[PythonSource]) -> Iterator[Tuple[str, str]]
731+
# type: (Iterable[PythonSource]) -> Iterator[Tuple[Text, Text]]
730732
for python_source in python_sources:
731733
for src, dst in python_source.iter_files():
732734
yield src, dst
@@ -778,7 +780,7 @@ def build_pex(
778780
"dependency cache."
779781
)
780782

781-
seen = set() # type: Set[Tuple[str, str]]
783+
seen = set() # type: Set[Tuple[Text, Text]]
782784
for src, dst in itertools.chain(
783785
_iter_directory_sources(
784786
OrderedSet(options.sources_directory + options.resources_directory)

pex/common.py

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from datetime import datetime
2121
from uuid import uuid4
2222

23-
from pex.typing import TYPE_CHECKING, cast
23+
from pex.typing import TYPE_CHECKING
2424

2525
if TYPE_CHECKING:
2626
from typing import (
@@ -35,6 +35,7 @@
3535
Optional,
3636
Set,
3737
Sized,
38+
Text,
3839
Tuple,
3940
Union,
4041
)
@@ -49,26 +50,22 @@
4950
DETERMINISTIC_DATETIME_TIMESTAMP = (DETERMINISTIC_DATETIME - _UNIX_EPOCH).total_seconds()
5051

5152

52-
def filter_pyc_dirs(dirs):
53-
# type: (Iterable[str]) -> Iterator[str]
54-
"""Return an iterator over the input `dirs` filtering out Python bytecode cache directories."""
55-
for d in dirs:
56-
if d != "__pycache__":
57-
yield d
53+
def is_pyc_dir(dir_path):
54+
# type: (Text) -> bool
55+
"""Return `True` if `dir_path` is a Python bytecode cache directory."""
56+
return os.path.basename(dir_path) == "__pycache__"
5857

5958

60-
def filter_pyc_files(files):
61-
# type: (Iterable[str]) -> Iterator[str]
62-
"""Iterate the input `files` filtering out any Python bytecode files."""
63-
for f in files:
64-
# For Python 2.7, `.pyc` files are compiled as siblings to `.py` files (there is no
65-
# __pycache__ dir).
66-
if not f.endswith((".pyc", ".pyo")) and not is_pyc_temporary_file(f):
67-
yield f
59+
def is_pyc_file(file_path):
60+
# type: (Text) -> bool
61+
"""Return `True` if `file_path` is a Python bytecode file."""
62+
# N.B.: For Python 2.7, `.pyc` files are compiled as siblings to `.py` files (there is no
63+
# __pycache__ dir).
64+
return file_path.endswith((".pyc", ".pyo")) or is_pyc_temporary_file(file_path)
6865

6966

7067
def is_pyc_temporary_file(file_path):
71-
# type: (str) -> bool
68+
# type: (Text) -> bool
7269
"""Check if `file` is a temporary Python bytecode file."""
7370
# We rely on the fact that the temporary files created by CPython have object id (integer)
7471
# suffixes to avoid picking up files where Python bytecode compilation is in-flight; i.e.:
@@ -99,7 +96,7 @@ def pluralize(
9996

10097

10198
def safe_copy(source, dest, overwrite=False):
102-
# type: (str, str, bool) -> None
99+
# type: (Text, Text, bool) -> None
103100
def do_copy():
104101
# type: () -> None
105102
temp_dest = dest + uuid4().hex
@@ -224,6 +221,7 @@ def _chmod(self, info, path):
224221

225222
@contextlib.contextmanager
226223
def open_zip(path, *args, **kwargs):
224+
# type: (Text, *Any, **Any) -> Iterator[PermPreservingZipFile]
227225
"""A contextmanager for zip files.
228226
229227
Passes through positional and kwargs to zipfile.ZipFile.
@@ -289,7 +287,7 @@ def register_rmtree(directory):
289287

290288

291289
def safe_mkdir(directory, clean=False):
292-
# type: (str, bool) -> str
290+
# type: (Text, bool) -> Text
293291
"""Safely create a directory.
294292
295293
Ensures a directory is present. If it's not there, it is created. If it is, it's a no-op. If
@@ -332,7 +330,7 @@ def safe_delete(filename):
332330

333331

334332
def safe_rmtree(directory):
335-
# type: (str) -> None
333+
# type: (Text) -> None
336334
"""Delete a directory if it's present.
337335
338336
If it's not present, no-op.

pex/dist_metadata.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,12 @@ def _parse_sdist_package_info(sdist_path):
137137
class DistMetadataFile(object):
138138
project_name = attr.ib() # type: ProjectName
139139
version = attr.ib() # type: Version
140-
path = attr.ib() # type: str
140+
path = attr.ib() # type: Text
141141

142142

143143
def find_dist_info_files(
144144
filename, # type: Text
145-
listing, # type: Iterable[str]
145+
listing, # type: Iterable[Text]
146146
):
147147
# type: (...) -> Iterator[DistMetadataFile]
148148

@@ -168,10 +168,10 @@ def find_dist_info_files(
168168
def find_dist_info_file(
169169
project_name, # type: Union[Text, ProjectName]
170170
filename, # type: Text
171-
listing, # type: Iterable[str]
171+
listing, # type: Iterable[Text]
172172
version=None, # type: Optional[Union[Text, Version]]
173173
):
174-
# type: (...) -> Optional[str]
174+
# type: (...) -> Optional[Text]
175175

176176
normalized_project_name = (
177177
project_name if isinstance(project_name, ProjectName) else ProjectName(project_name)
@@ -581,24 +581,24 @@ def _realpath(path):
581581
class Distribution(object):
582582
@staticmethod
583583
def _read_metadata_lines(metadata_path):
584-
# type: (str) -> Iterator[str]
585-
with open(os.path.join(metadata_path)) as fp:
584+
# type: (Text) -> Iterator[Text]
585+
with open(os.path.join(metadata_path), "rb") as fp:
586586
for line in fp:
587587
# This is pkg_resources.IMetadataProvider.get_metadata_lines behavior, which our
588588
# code expects.
589-
normalized = line.strip()
589+
normalized = line.decode("utf-8").strip()
590590
if normalized and not normalized.startswith("#"):
591591
yield normalized
592592

593593
@classmethod
594594
def parse_entry_map(cls, entry_points_metadata_path):
595-
# type: (str) -> Dict[str, Dict[str, EntryPoint]]
595+
# type: (Text) -> Dict[Text, Dict[Text, EntryPoint]]
596596

597597
# This file format is defined here:
598598
# https://packaging.python.org/en/latest/specifications/entry-points/#file-format
599599

600-
entry_map = defaultdict(dict) # type: DefaultDict[str, Dict[str, EntryPoint]]
601-
group = None # type: Optional[str]
600+
entry_map = defaultdict(dict) # type: DefaultDict[Text, Dict[Text, EntryPoint]]
601+
group = None # type: Optional[Text]
602602
for index, line in enumerate(cls._read_metadata_lines(entry_points_metadata_path), start=1):
603603
if line.startswith("[") and line.endswith("]"):
604604
group = line[1:-1]
@@ -624,7 +624,7 @@ def load(cls, location):
624624
metadata = attr.ib() # type: DistMetadata
625625
_metadata_files_cache = attr.ib(
626626
factory=dict, init=False, eq=False, repr=False
627-
) # type: Dict[str, str]
627+
) # type: Dict[Text, Text]
628628

629629
@property
630630
def key(self):
@@ -658,7 +658,7 @@ def requires(self):
658658
return self.metadata.requires_dists
659659

660660
def _get_metadata_file(self, name):
661-
# type: (str) -> Optional[str]
661+
# type: (Text) -> Optional[Text]
662662
normalized_name = os.path.normpath(name)
663663
if os.path.isabs(normalized_name):
664664
raise ValueError(
@@ -691,7 +691,7 @@ def has_metadata(self, name):
691691
return self._get_metadata_file(name) is not None
692692

693693
def get_metadata_lines(self, name):
694-
# type: (str) -> Iterator[str]
694+
# type: (Text) -> Iterator[Text]
695695
relative_path = self._get_metadata_file(name)
696696
if relative_path is None:
697697
raise MetadataNotFoundError(
@@ -707,7 +707,7 @@ def get_metadata_lines(self, name):
707707
yield line
708708

709709
def get_entry_map(self):
710-
# type: () -> Dict[str, Dict[str, EntryPoint]]
710+
# type: () -> Dict[Text, Dict[Text, EntryPoint]]
711711
entry_points_metadata_relpath = self._get_metadata_file("entry_points.txt")
712712
if entry_points_metadata_relpath is None:
713713
return defaultdict(dict)
@@ -724,7 +724,7 @@ def __str__(self):
724724
class EntryPoint(object):
725725
@classmethod
726726
def parse(cls, spec):
727-
# type: (str) -> EntryPoint
727+
# type: (Text) -> EntryPoint
728728

729729
# This file format is defined here:
730730
# https://packaging.python.org/en/latest/specifications/entry-points/#file-format
@@ -734,7 +734,8 @@ def parse(cls, spec):
734734
raise ValueError("Invalid entry point specification: {spec}.".format(spec=spec))
735735

736736
name, value = components
737-
module, sep, attrs = value.strip().partition(":")
737+
# N.B.: Python identifiers must be ascii.
738+
module, sep, attrs = str(value).strip().partition(":")
738739
if sep and not attrs:
739740
raise ValueError("Invalid entry point specification: {spec}.".format(spec=spec))
740741

@@ -746,7 +747,7 @@ def parse(cls, spec):
746747

747748
return cls(name=entry_point_name, module=module)
748749

749-
name = attr.ib() # type: str
750+
name = attr.ib() # type: Text
750751
module = attr.ib() # type: str
751752

752753
def __str__(self):

pex/hashing.py

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pex.typing import TYPE_CHECKING, Generic
1111

1212
if TYPE_CHECKING:
13-
from typing import IO, Callable, Iterable, Iterator, Optional, Protocol, Type, TypeVar
13+
from typing import IO, Callable, Iterable, Iterator, Optional, Protocol, Text, Type, TypeVar
1414

1515
class HintedDigest(Protocol):
1616
@property
@@ -177,7 +177,7 @@ def update_hash(
177177

178178

179179
def file_hash(
180-
path, # type: str
180+
path, # type: Text
181181
digest, # type: HintedDigest
182182
):
183183
# type: (...) -> None
@@ -187,21 +187,22 @@ def file_hash(
187187

188188

189189
def dir_hash(
190-
directory, # type: str
190+
directory, # type: Text
191191
digest, # type: HintedDigest
192-
dir_filter=lambda dirs: dirs, # type: Callable[[Iterable[str]], Iterable[str]]
193-
file_filter=lambda files: files, # type: Callable[[Iterable[str]], Iterable[str]]
192+
dir_filter=lambda d: True, # type: Callable[[Text], bool]
193+
file_filter=lambda f: True, # type: Callable[[Text], bool]
194194
):
195195
# type: (...) -> None
196196
"""Digest the contents of a directory in a reproducible manner."""
197197

198198
def iter_files():
199-
# type: () -> Iterator[str]
199+
# type: () -> Iterator[Text]
200200
normpath = os.path.realpath(os.path.normpath(directory))
201201
for root, dirs, files in os.walk(normpath):
202-
dirs[:] = list(dir_filter(dirs))
203-
for f in file_filter(files):
204-
yield os.path.relpath(os.path.join(root, f), normpath)
202+
dirs[:] = [d for d in dirs if dir_filter(d)]
203+
for f in files:
204+
if file_filter(f):
205+
yield os.path.relpath(os.path.join(root, f), normpath)
205206

206207
names = sorted(iter_files())
207208

@@ -214,11 +215,11 @@ def iter_files():
214215

215216

216217
def zip_hash(
217-
zip_path, # type: str
218+
zip_path, # type: Text
218219
digest, # type: HintedDigest
219-
relpath=None, # type: Optional[str]
220-
dir_filter=lambda dirs: dirs, # type: Callable[[Iterable[str]], Iterable[str]]
221-
file_filter=lambda files: files, # type: Callable[[Iterable[str]], Iterable[str]]
220+
relpath=None, # type: Optional[Text]
221+
dir_filter=lambda d: True, # type: Callable[[Text], bool]
222+
file_filter=lambda f: True, # type: Callable[[Text], bool]
222223
):
223224
# type: (...) -> None
224225
"""Digest the contents of a zip file in a reproducible manner.
@@ -234,19 +235,15 @@ def zip_hash(
234235
)
235236

236237
dirs = frozenset(name.rstrip("/") for name in namelist if name.endswith("/"))
237-
accept_dir_names = frozenset(dir_filter(os.path.basename(d) for d in dirs))
238-
accept_dirs = frozenset(d for d in dirs if os.path.basename(d) in accept_dir_names)
238+
accept_dirs = frozenset(d for d in dirs if dir_filter(os.path.basename(d)))
239239
reject_dirs = dirs - accept_dirs
240240

241241
accept_files = sorted(
242-
file_filter(
243-
name
244-
for name in namelist
245-
if not (
246-
name.endswith("/")
247-
or any(name.startswith(reject_dir) for reject_dir in reject_dirs)
248-
)
249-
)
242+
name
243+
for name in namelist
244+
if not name.endswith("/")
245+
and not any(name.startswith(reject_dir) for reject_dir in reject_dirs)
246+
and file_filter(os.path.basename(name))
250247
)
251248

252249
hashed_names = (

0 commit comments

Comments
 (0)