@@ -23,6 +23,7 @@ def download_and_load_gpt2(model_size, models_dir):
23
23
# Define paths
24
24
model_dir = os .path .join (models_dir , model_size )
25
25
base_url = "https://openaipublic.blob.core.windows.net/gpt-2/models"
26
+ backup_base_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/gpt2"
26
27
filenames = [
27
28
"checkpoint" , "encoder.json" , "hparams.json" ,
28
29
"model.ckpt.data-00000-of-00001" , "model.ckpt.index" ,
@@ -33,22 +34,21 @@ def download_and_load_gpt2(model_size, models_dir):
33
34
os .makedirs (model_dir , exist_ok = True )
34
35
for filename in filenames :
35
36
file_url = os .path .join (base_url , model_size , filename )
37
+ backup_url = os .path .join (backup_base_url , model_size , filename )
36
38
file_path = os .path .join (model_dir , filename )
37
- download_file (file_url , file_path )
39
+ download_file (file_url , file_path , backup_url )
38
40
39
41
# Load settings and params
40
42
tf_ckpt_path = tf .train .latest_checkpoint (model_dir )
41
- settings = json .load (open (os .path .join (model_dir , "hparams.json" )))
43
+ settings = json .load (open (os .path .join (model_dir , "hparams.json" ), "r" , encoding = "utf-8" ))
42
44
params = load_gpt2_params_from_tf_ckpt (tf_ckpt_path , settings )
43
45
44
46
return settings , params
45
47
46
48
47
- def download_file (url , destination ):
48
- # Send a GET request to download the file
49
-
50
- try :
51
- with urllib .request .urlopen (url ) as response :
49
+ def download_file (url , destination , backup_url = None ):
50
+ def _attempt_download (download_url ):
51
+ with urllib .request .urlopen (download_url ) as response :
52
52
# Get the total file size from headers, defaulting to 0 if not present
53
53
file_size = int (response .headers .get ("Content-Length" , 0 ))
54
54
@@ -57,29 +57,44 @@ def download_file(url, destination):
57
57
file_size_local = os .path .getsize (destination )
58
58
if file_size == file_size_local :
59
59
print (f"File already exists and is up-to-date: { destination } " )
60
- return
60
+ return True # Indicate success without re-downloading
61
61
62
- # Define the block size for reading the file
63
62
block_size = 1024 # 1 Kilobyte
64
63
65
64
# Initialize the progress bar with total file size
66
- progress_bar_description = os .path .basename (url ) # Extract filename from URL
65
+ progress_bar_description = os .path .basename (download_url )
67
66
with tqdm (total = file_size , unit = "iB" , unit_scale = True , desc = progress_bar_description ) as progress_bar :
68
- # Open the destination file in binary write mode
69
67
with open (destination , "wb" ) as file :
70
- # Read the file in chunks and write to destination
71
68
while True :
72
69
chunk = response .read (block_size )
73
70
if not chunk :
74
71
break
75
72
file .write (chunk )
76
- progress_bar .update (len (chunk )) # Update progress bar
77
- except urllib .error .HTTPError :
78
- s = (
79
- f"The specified URL ({ url } ) is incorrect, the internet connection cannot be established,"
80
- "\n or the requested file is temporarily unavailable.\n Please visit the following website"
81
- " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273" )
82
- print (s )
73
+ progress_bar .update (len (chunk ))
74
+ return True
75
+
76
+ try :
77
+ if _attempt_download (url ):
78
+ return
79
+ except (urllib .error .HTTPError , urllib .error .URLError ):
80
+ if backup_url is not None :
81
+ print (f"Primary URL ({ url } ) failed. Attempting backup URL: { backup_url } " )
82
+ try :
83
+ if _attempt_download (backup_url ):
84
+ return
85
+ except urllib .error .HTTPError :
86
+ pass
87
+
88
+ # If we reach here, both attempts have failed
89
+ error_message = (
90
+ f"Failed to download from both primary URL ({ url } )"
91
+ f"{ ' and backup URL (' + backup_url + ')' if backup_url else '' } ."
92
+ "\n Check your internet connection or the file availability.\n "
93
+ "For help, visit: https://github.com/rasbt/LLMs-from-scratch/discussions/273"
94
+ )
95
+ print (error_message )
96
+ except Exception as e :
97
+ print (f"An unexpected error occurred: { e } " )
83
98
84
99
85
100
# Alternative way using `requests`
0 commit comments