|
1 | 1 | from os import environ
|
2 | 2 | from pathlib import Path
|
3 | 3 | import json
|
| 4 | +import time, os |
4 | 5 | import logging
|
5 | 6 | from helpers.models.all import model_families
|
6 | 7 |
|
@@ -82,34 +83,42 @@ def delete_cache_files(
|
82 | 83 | if cache_path.exists():
|
83 | 84 | try:
|
84 | 85 | cache_path.unlink()
|
| 86 | + logger.warning(f"(rank={os.environ.get('RANK')}) Deleted cache file: {cache_path}") |
85 | 87 | except:
|
86 | 88 | pass
|
87 | 89 |
|
88 | 90 | @classmethod
|
89 | 91 | def _load_from_disk(cls, cache_name, retry_limit: int = 0):
|
90 | 92 | cache_path = Path(cls.args.output_dir) / f"{cache_name}.json"
|
91 | 93 | retry_count = 0
|
92 |
| - while retry_count < retry_limit and not cache_path.exists(): |
| 94 | + results = None |
| 95 | + while retry_count < retry_limit and (not cache_path.exists() or results is None): |
93 | 96 | if cache_path.exists():
|
94 | 97 | try:
|
95 | 98 | with cache_path.open("r") as f:
|
96 |
| - return json.load(f) |
| 99 | + results = json.load(f) |
97 | 100 | except Exception as e:
|
98 | 101 | logger.error(
|
99 | 102 | f"Invalidating cache: error loading {cache_name} from disk. {e}"
|
100 | 103 | )
|
101 | 104 | return None
|
102 |
| - retry_count += 1 |
103 |
| - if retry_count < retry_limit: |
104 |
| - logger.warning(f"Cache file {cache_name} does not exist. Retry {retry_count}/{retry_limit}.") |
105 |
| - logger.warning(f"No cache file was found: {cache_path}") |
106 |
| - return None |
| 105 | + else: |
| 106 | + retry_count += 1 |
| 107 | + if retry_count < retry_limit: |
| 108 | + logger.debug(f"Cache file {cache_name} does not exist. Retry {retry_count}/{retry_limit}.") |
| 109 | + time.sleep(1) |
| 110 | + else: |
| 111 | + logger.warning(f"No cache file was found: {cache_path}") |
| 112 | + logger.debug(f"Returning: {type(results)}") |
| 113 | + return results |
107 | 114 |
|
108 | 115 | @classmethod
|
109 | 116 | def _save_to_disk(cls, cache_name, data):
|
110 | 117 | cache_path = Path(cls.args.output_dir) / f"{cache_name}.json"
|
| 118 | + logger.debug(f"(rank={os.environ.get('RANK')}) Saving {cache_name} to disk: {cache_path}") |
111 | 119 | with cache_path.open("w") as f:
|
112 | 120 | json.dump(data, f)
|
| 121 | + logger.debug(f"(rank={os.environ.get('RANK')}) Save complete {cache_name} to disk: {cache_path}") |
113 | 122 |
|
114 | 123 | @classmethod
|
115 | 124 | def set_config_path(cls, config_path: str):
|
@@ -187,10 +196,19 @@ def set_image_files(cls, raw_file_list: list, data_backend_id: str):
|
187 | 196 |
|
188 | 197 | @classmethod
|
189 | 198 | def get_image_files(cls, data_backend_id: str, retry_limit: int = 0):
|
| 199 | + if data_backend_id in cls.all_image_files and cls.all_image_files[data_backend_id] is None: |
| 200 | + # we should probaby try to reload it from disk if it failed earlier. |
| 201 | + logger.debug(f"(rank={os.environ.get('RANK')}) Clearing out invalid pre-loaded cache entry for {data_backend_id}") |
| 202 | + del cls.all_image_files[data_backend_id] |
190 | 203 | if data_backend_id not in cls.all_image_files:
|
| 204 | + logger.debug(f"(rank={os.environ.get('RANK')}) Attempting to load from disk: {data_backend_id}") |
191 | 205 | cls.all_image_files[data_backend_id] = cls._load_from_disk(
|
192 | 206 | "all_image_files_{}".format(data_backend_id), retry_limit=retry_limit
|
193 | 207 | )
|
| 208 | + logger.debug(f"(rank={os.environ.get('RANK')}) Completed load from disk: {data_backend_id}: {type(cls.all_image_files[data_backend_id])}") |
| 209 | + else: |
| 210 | + logger.debug(f"()") |
| 211 | + logger.debug(f"(rank={os.environ.get('RANK')}) Returning {type(cls.all_image_files[data_backend_id])} for {data_backend_id}") |
194 | 212 | return cls.all_image_files[data_backend_id]
|
195 | 213 |
|
196 | 214 | @classmethod
|
@@ -571,7 +589,7 @@ def load_aspect_resolution_map(cls, dataloader_resolution: float, retry_limit: i
|
571 | 589 | cls.aspect_resolution_map = {dataloader_resolution: {}}
|
572 | 590 |
|
573 | 591 | cls.aspect_resolution_map[dataloader_resolution] = (
|
574 |
| - cls._load_from_disk(f"aspect_resolution_map-{dataloader_resolution}") or {}, , retry_limit=retry_limit |
| 592 | + cls._load_from_disk(f"aspect_resolution_map-{dataloader_resolution}", retry_limit=retry_limit) or {} |
575 | 593 | )
|
576 | 594 | logger.debug(
|
577 | 595 | f"Aspect resolution map: {cls.aspect_resolution_map[dataloader_resolution]}"
|
|
0 commit comments