4
4
import tarfile
5
5
import tempfile
6
6
from pathlib import Path
7
- from typing import Optional , Tuple
7
+ from typing import TYPE_CHECKING , Optional , Tuple
8
8
from urllib .parse import urlparse
9
9
from zipfile import ZipFile , is_zipfile
10
10
21
21
resource_to_filename ,
22
22
)
23
23
24
+ if TYPE_CHECKING :
25
+ from rich .progress import Progress
26
+
24
27
logger = logging .getLogger ("cached_path" )
25
28
26
29
@@ -29,6 +32,8 @@ def cached_path(
29
32
cache_dir : Optional [PathOrStr ] = None ,
30
33
extract_archive : bool = False ,
31
34
force_extract : bool = False ,
35
+ quiet : bool = False ,
36
+ progress : Optional ["Progress" ] = None ,
32
37
) -> Path :
33
38
"""
34
39
Given something that might be a URL or local path, determine which.
@@ -97,6 +102,13 @@ def cached_path(
97
102
Use this flag with caution! This can lead to race conditions if used
98
103
from multiple processes on the same file.
99
104
105
+ quiet :
106
+ If ``True``, progress displays won't be printed.
107
+
108
+ progress :
109
+ A custom progress display to use. If not set and ``quiet=False``, a default display
110
+ from :func:`~cached_path.get_download_progress()` will be used.
111
+
100
112
Returns
101
113
-------
102
114
:class:`pathlib.Path`
@@ -133,7 +145,14 @@ def cached_path(
133
145
file_name = url_or_filename [exclamation_index + 1 :]
134
146
135
147
# Call 'cached_path' recursively now to get the local path to the archive itself.
136
- cached_archive_path = cached_path (archive_path , cache_dir , True , force_extract )
148
+ cached_archive_path = cached_path (
149
+ archive_path ,
150
+ cache_dir = cache_dir ,
151
+ extract_archive = True ,
152
+ force_extract = force_extract ,
153
+ quiet = quiet ,
154
+ progress = progress ,
155
+ )
137
156
if not cached_archive_path .is_dir ():
138
157
raise ValueError (
139
158
f"{ url_or_filename } uses the ! syntax, but does not specify an archive file."
@@ -151,7 +170,7 @@ def cached_path(
151
170
152
171
if parsed .scheme in get_supported_schemes ():
153
172
# URL, so get it from the cache (downloading if necessary)
154
- file_path , etag = get_from_cache (url_or_filename , cache_dir )
173
+ file_path , etag = get_from_cache (url_or_filename , cache_dir , quiet = quiet , progress = progress )
155
174
156
175
if extract_archive and (is_zipfile (file_path ) or tarfile .is_tarfile (file_path )):
157
176
# This is the path the file should be extracted to.
@@ -243,7 +262,12 @@ def cached_path(
243
262
return file_path
244
263
245
264
246
- def get_from_cache (url : str , cache_dir : Optional [PathOrStr ] = None ) -> Tuple [Path , Optional [str ]]:
265
+ def get_from_cache (
266
+ url : str ,
267
+ cache_dir : Optional [PathOrStr ] = None ,
268
+ quiet : bool = False ,
269
+ progress : Optional ["Progress" ] = None ,
270
+ ) -> Tuple [Path , Optional [str ]]:
247
271
"""
248
272
Given a URL, look for the corresponding dataset in the local cache.
249
273
If it's not there, download it. Then return the path to the cached file and the ETag.
@@ -301,9 +325,31 @@ def get_from_cache(url: str, cache_dir: Optional[PathOrStr] = None) -> Tuple[Pat
301
325
if os .path .exists (cache_path ):
302
326
logger .info ("cache of %s is up-to-date" , url )
303
327
else :
328
+ size = client .get_size ()
304
329
with CacheFile (cache_path ) as cache_file :
305
330
logger .info ("%s not found in cache, downloading to %s" , url , cache_path )
306
- client .get_resource (cache_file )
331
+
332
+ from .progress import BufferedWriterWithProgress , get_download_progress
333
+
334
+ start_and_cleanup = progress is None
335
+ progress = progress or get_download_progress (quiet = quiet )
336
+
337
+ if start_and_cleanup :
338
+ progress .start ()
339
+
340
+ try :
341
+ display_url = url if len (url ) <= 50 else f"{ url [:49 ]} \N{horizontal ellipsis} "
342
+ task_id = progress .add_task (f"Downloading [cyan i]{ display_url } [/]" , total = size )
343
+ writer_with_progress = BufferedWriterWithProgress (cache_file , progress , task_id )
344
+ client .get_resource (writer_with_progress )
345
+ progress .update (
346
+ task_id ,
347
+ total = writer_with_progress .total_written ,
348
+ completed = writer_with_progress .total_written ,
349
+ )
350
+ finally :
351
+ if start_and_cleanup :
352
+ progress .stop ()
307
353
308
354
logger .debug ("creating metadata file for %s" , cache_path )
309
355
meta = Meta .new (
0 commit comments