Skip to content

Commit 3112653

Browse files
authored
Merge pull request #437 from jherland/egg-metadata-work
Fix .files and inferred packages_distributions for .egg-info packages
2 parents 5811d73 + b8a8b5d commit 3112653

File tree

5 files changed

+226
-17
lines changed

5 files changed

+226
-17
lines changed

CHANGES.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
v6.3.0
2+
======
3+
4+
* #115: Support ``installed-files.txt`` for ``Distribution.files``
5+
when present.
6+
17
v6.2.1
28
======
39

importlib_metadata/__init__.py

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import functools
1313
import itertools
1414
import posixpath
15+
import contextlib
1516
import collections
1617
import inspect
1718

@@ -461,8 +462,8 @@ def files(self):
461462
:return: List of PackagePath for this distribution or None
462463
463464
Result is `None` if the metadata file that enumerates files
464-
(i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
465-
missing.
465+
(i.e. RECORD for dist-info, or installed-files.txt or
466+
SOURCES.txt for egg-info) is missing.
466467
Result may be empty if the metadata exists but is empty.
467468
"""
468469

@@ -475,9 +476,19 @@ def make_file(name, hash=None, size_str=None):
475476

476477
@pass_none
477478
def make_files(lines):
478-
return list(starmap(make_file, csv.reader(lines)))
479+
return starmap(make_file, csv.reader(lines))
479480

480-
return make_files(self._read_files_distinfo() or self._read_files_egginfo())
481+
@pass_none
482+
def skip_missing_files(package_paths):
483+
return list(filter(lambda path: path.locate().exists(), package_paths))
484+
485+
return skip_missing_files(
486+
make_files(
487+
self._read_files_distinfo()
488+
or self._read_files_egginfo_installed()
489+
or self._read_files_egginfo_sources()
490+
)
491+
)
481492

482493
def _read_files_distinfo(self):
483494
"""
@@ -486,10 +497,43 @@ def _read_files_distinfo(self):
486497
text = self.read_text('RECORD')
487498
return text and text.splitlines()
488499

489-
def _read_files_egginfo(self):
500+
def _read_files_egginfo_installed(self):
490501
"""
491-
SOURCES.txt might contain literal commas, so wrap each line
492-
in quotes.
502+
Read installed-files.txt and return lines in a similar
503+
CSV-parsable format as RECORD: each file must be placed
504+
relative to the site-packages directory, and must also be
505+
quoted (since file names can contain literal commas).
506+
507+
This file is written when the package is installed by pip,
508+
but it might not be written for other installation methods.
509+
Hence, even if we can assume that this file is accurate
510+
when it exists, we cannot assume that it always exists.
511+
"""
512+
text = self.read_text('installed-files.txt')
513+
# We need to prepend the .egg-info/ subdir to the lines in this file.
514+
# But this subdir is only available in the PathDistribution's self._path
515+
# which is not easily accessible from this base class...
516+
subdir = getattr(self, '_path', None)
517+
if not text or not subdir:
518+
return
519+
with contextlib.suppress(Exception):
520+
ret = [
521+
str((subdir / line).resolve().relative_to(self.locate_file('')))
522+
for line in text.splitlines()
523+
]
524+
return map('"{}"'.format, ret)
525+
526+
def _read_files_egginfo_sources(self):
527+
"""
528+
Read SOURCES.txt and return lines in a similar CSV-parsable
529+
format as RECORD: each file name must be quoted (since it
530+
might contain literal commas).
531+
532+
Note that SOURCES.txt is not a reliable source for what
533+
files are installed by a package. This file is generated
534+
for a source archive, and the files that are present
535+
there (e.g. setup.py) may not correctly reflect the files
536+
that are present after the package has been installed.
493537
"""
494538
text = self.read_text('SOURCES.txt')
495539
return text and map('"{}"'.format, text.splitlines())

tests/fixtures.py

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,10 @@ def setUp(self):
8686

8787

8888
# Except for python/mypy#731, prefer to define
89-
# FilesDef = Dict[str, Union['FilesDef', str]]
90-
FilesDef = Dict[str, Union[Dict[str, Union[Dict[str, str], str]], str]]
89+
# FilesDef = Dict[str, Union['FilesDef', str, bytes]]
90+
FilesDef = Dict[
91+
str, Union[Dict[str, Union[Dict[str, Union[str, bytes]], str, bytes]], str, bytes]
92+
]
9193

9294

9395
class DistInfoPkg(OnSysPath, SiteDir):
@@ -214,6 +216,97 @@ def setUp(self):
214216
build_files(EggInfoPkg.files, prefix=self.site_dir)
215217

216218

219+
class EggInfoPkgPipInstalledNoToplevel(OnSysPath, SiteDir):
220+
files: FilesDef = {
221+
"egg_with_module_pkg.egg-info": {
222+
"PKG-INFO": "Name: egg_with_module-pkg",
223+
# SOURCES.txt is made from the source archive, and contains files
224+
# (setup.py) that are not present after installation.
225+
"SOURCES.txt": """
226+
egg_with_module.py
227+
setup.py
228+
egg_with_module_pkg.egg-info/PKG-INFO
229+
egg_with_module_pkg.egg-info/SOURCES.txt
230+
egg_with_module_pkg.egg-info/top_level.txt
231+
""",
232+
# installed-files.txt is written by pip, and is a strictly more
233+
# accurate source than SOURCES.txt as to the installed contents of
234+
# the package.
235+
"installed-files.txt": """
236+
../egg_with_module.py
237+
PKG-INFO
238+
SOURCES.txt
239+
top_level.txt
240+
""",
241+
# missing top_level.txt (to trigger fallback to installed-files.txt)
242+
},
243+
"egg_with_module.py": """
244+
def main():
245+
print("hello world")
246+
""",
247+
}
248+
249+
def setUp(self):
250+
super().setUp()
251+
build_files(EggInfoPkgPipInstalledNoToplevel.files, prefix=self.site_dir)
252+
253+
254+
class EggInfoPkgPipInstalledNoModules(OnSysPath, SiteDir):
255+
files: FilesDef = {
256+
"egg_with_no_modules_pkg.egg-info": {
257+
"PKG-INFO": "Name: egg_with_no_modules-pkg",
258+
# SOURCES.txt is made from the source archive, and contains files
259+
# (setup.py) that are not present after installation.
260+
"SOURCES.txt": """
261+
setup.py
262+
egg_with_no_modules_pkg.egg-info/PKG-INFO
263+
egg_with_no_modules_pkg.egg-info/SOURCES.txt
264+
egg_with_no_modules_pkg.egg-info/top_level.txt
265+
""",
266+
# installed-files.txt is written by pip, and is a strictly more
267+
# accurate source than SOURCES.txt as to the installed contents of
268+
# the package.
269+
"installed-files.txt": """
270+
PKG-INFO
271+
SOURCES.txt
272+
top_level.txt
273+
""",
274+
# top_level.txt correctly reflects that no modules are installed
275+
"top_level.txt": b"\n",
276+
},
277+
}
278+
279+
def setUp(self):
280+
super().setUp()
281+
build_files(EggInfoPkgPipInstalledNoModules.files, prefix=self.site_dir)
282+
283+
284+
class EggInfoPkgSourcesFallback(OnSysPath, SiteDir):
285+
files: FilesDef = {
286+
"sources_fallback_pkg.egg-info": {
287+
"PKG-INFO": "Name: sources_fallback-pkg",
288+
# SOURCES.txt is made from the source archive, and contains files
289+
# (setup.py) that are not present after installation.
290+
"SOURCES.txt": """
291+
sources_fallback.py
292+
setup.py
293+
sources_fallback_pkg.egg-info/PKG-INFO
294+
sources_fallback_pkg.egg-info/SOURCES.txt
295+
""",
296+
# missing installed-files.txt (i.e. not installed by pip) and
297+
# missing top_level.txt (to trigger fallback to SOURCES.txt)
298+
},
299+
"sources_fallback.py": """
300+
def main():
301+
print("hello world")
302+
""",
303+
}
304+
305+
def setUp(self):
306+
super().setUp()
307+
build_files(EggInfoPkgSourcesFallback.files, prefix=self.site_dir)
308+
309+
217310
class EggInfoFile(OnSysPath, SiteDir):
218311
files: FilesDef = {
219312
"egginfo_file.egg-info": """

tests/test_api.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ def suppress_known_deprecation():
2727

2828
class APITests(
2929
fixtures.EggInfoPkg,
30+
fixtures.EggInfoPkgPipInstalledNoToplevel,
31+
fixtures.EggInfoPkgPipInstalledNoModules,
32+
fixtures.EggInfoPkgSourcesFallback,
3033
fixtures.DistInfoPkg,
3134
fixtures.DistInfoPkgWithDot,
3235
fixtures.EggInfoFile,
@@ -62,15 +65,28 @@ def test_prefix_not_matched(self):
6265
distribution(prefix)
6366

6467
def test_for_top_level(self):
65-
self.assertEqual(
66-
distribution('egginfo-pkg').read_text('top_level.txt').strip(), 'mod'
67-
)
68+
tests = [
69+
('egginfo-pkg', 'mod'),
70+
('egg_with_no_modules-pkg', ''),
71+
]
72+
for pkg_name, expect_content in tests:
73+
with self.subTest(pkg_name):
74+
self.assertEqual(
75+
distribution(pkg_name).read_text('top_level.txt').strip(),
76+
expect_content,
77+
)
6878

6979
def test_read_text(self):
70-
top_level = [
71-
path for path in files('egginfo-pkg') if path.name == 'top_level.txt'
72-
][0]
73-
self.assertEqual(top_level.read_text(), 'mod\n')
80+
tests = [
81+
('egginfo-pkg', 'mod\n'),
82+
('egg_with_no_modules-pkg', '\n'),
83+
]
84+
for pkg_name, expect_content in tests:
85+
with self.subTest(pkg_name):
86+
top_level = [
87+
path for path in files(pkg_name) if path.name == 'top_level.txt'
88+
][0]
89+
self.assertEqual(top_level.read_text(), expect_content)
7490

7591
def test_entry_points(self):
7692
eps = entry_points()
@@ -184,6 +200,9 @@ def test_files_dist_info(self):
184200

185201
def test_files_egg_info(self):
186202
self._test_files(files('egginfo-pkg'))
203+
self._test_files(files('egg_with_module-pkg'))
204+
self._test_files(files('egg_with_no_modules-pkg'))
205+
self._test_files(files('sources_fallback-pkg'))
187206

188207
def test_version_egg_info_file(self):
189208
self.assertEqual(version('egginfo-file'), '0.1')

tests/test_main.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,21 @@ def test_metadata_loads_egg_info(self):
171171
assert meta['Description'] == 'pôrˈtend'
172172

173173

174-
class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase):
174+
class DiscoveryTests(
175+
fixtures.EggInfoPkg,
176+
fixtures.EggInfoPkgPipInstalledNoToplevel,
177+
fixtures.EggInfoPkgPipInstalledNoModules,
178+
fixtures.EggInfoPkgSourcesFallback,
179+
fixtures.DistInfoPkg,
180+
unittest.TestCase,
181+
):
175182
def test_package_discovery(self):
176183
dists = list(distributions())
177184
assert all(isinstance(dist, Distribution) for dist in dists)
178185
assert any(dist.metadata['Name'] == 'egginfo-pkg' for dist in dists)
186+
assert any(dist.metadata['Name'] == 'egg_with_module-pkg' for dist in dists)
187+
assert any(dist.metadata['Name'] == 'egg_with_no_modules-pkg' for dist in dists)
188+
assert any(dist.metadata['Name'] == 'sources_fallback-pkg' for dist in dists)
179189
assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists)
180190

181191
def test_invalid_usage(self):
@@ -362,3 +372,40 @@ def test_packages_distributions_all_module_types(self):
362372
assert distributions[f'in_package_{i}'] == ['all_distributions']
363373

364374
assert not any(name.endswith('.dist-info') for name in distributions)
375+
376+
377+
class PackagesDistributionsEggTest(
378+
fixtures.EggInfoPkg,
379+
fixtures.EggInfoPkgPipInstalledNoToplevel,
380+
fixtures.EggInfoPkgPipInstalledNoModules,
381+
fixtures.EggInfoPkgSourcesFallback,
382+
unittest.TestCase,
383+
):
384+
def test_packages_distributions_on_eggs(self):
385+
"""
386+
Test old-style egg packages with a variation of 'top_level.txt',
387+
'SOURCES.txt', and 'installed-files.txt', available.
388+
"""
389+
distributions = packages_distributions()
390+
391+
def import_names_from_package(package_name):
392+
return {
393+
import_name
394+
for import_name, package_names in distributions.items()
395+
if package_name in package_names
396+
}
397+
398+
# egginfo-pkg declares one import ('mod') via top_level.txt
399+
assert import_names_from_package('egginfo-pkg') == {'mod'}
400+
401+
# egg_with_module-pkg has one import ('egg_with_module') inferred from
402+
# installed-files.txt (top_level.txt is missing)
403+
assert import_names_from_package('egg_with_module-pkg') == {'egg_with_module'}
404+
405+
# egg_with_no_modules-pkg should not be associated with any import names
406+
# (top_level.txt is empty, and installed-files.txt has no .py files)
407+
assert import_names_from_package('egg_with_no_modules-pkg') == set()
408+
409+
# sources_fallback-pkg has one import ('sources_fallback') inferred from
410+
# SOURCES.txt (top_level.txt and installed-files.txt is missing)
411+
assert import_names_from_package('sources_fallback-pkg') == {'sources_fallback'}

0 commit comments

Comments
 (0)