6
6
"check_opsim_archive_resource" ,
7
7
"read_archived_sim_metadata" ,
8
8
"make_sim_archive_cli" ,
9
+ "compile_sim_metadata" ,
10
+ "read_sim_metadata_from_hdf" ,
11
+ "verify_compiled_sim_metadata" ,
9
12
"drive_sim" ,
13
+ "compile_sim_archive_metadata_cli" ,
14
+ "find_latest_prenight_sim_for_nights" ,
15
+ "fetch_latest_prenight_sim_for_nights" ,
16
+ "fetch_obsloctap_visits" ,
10
17
]
11
18
12
19
import argparse
@@ -243,7 +250,7 @@ def convert_mjd_to_dayobs(mjd):
243
250
return data_path
244
251
245
252
246
- def transfer_archive_dir (archive_dir , archive_base_uri = "s3://rubin-scheduler-prenight/opsim/" ):
253
+ def transfer_archive_dir (archive_dir , archive_base_uri = "s3://rubin:rubin -scheduler-prenight/opsim/" ):
247
254
"""Transfer the contents of an archive directory to an resource.
248
255
249
256
Parameters
@@ -253,22 +260,25 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre
253
260
transferred.
254
261
archive_base_uri : `str`, optional
255
262
The base URI where the archive files will be transferred to.
256
- Default is "s3://rubin-scheduler-prenight/opsim/".
263
+ Default is "s3://rubin:rubin -scheduler-prenight/opsim/".
257
264
258
265
Returns
259
266
-------
260
267
resource_rpath : `ResourcePath`
261
268
The destination resource.
262
269
"""
263
270
271
+ LOGGER .debug (f"Beginning copy of { archive_dir } to { archive_base_uri } ." )
264
272
metadata_fname = Path (archive_dir ).joinpath ("sim_metadata.yaml" )
265
273
with open (metadata_fname , "r" ) as metadata_io :
266
274
sim_metadata = yaml .safe_load (metadata_io )
275
+ LOGGER .debug (f"Completed read of { archive_dir } ." )
267
276
268
277
insert_date = datetime .datetime .utcnow ().date ().isoformat ()
269
278
insert_date_rpath = ResourcePath (archive_base_uri ).join (insert_date , forceDirectory = True )
270
279
if not insert_date_rpath .exists ():
271
280
insert_date_rpath .mkdir ()
281
+ LOGGER .debug (f"Created { insert_date_rpath } ." )
272
282
273
283
# Number the sims in the insert date dir by
274
284
# looing for all the interger directories, and choosing the next one.
@@ -285,6 +295,7 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre
285
295
new_id = max (found_ids ) + 1 if len (found_ids ) > 0 else 1
286
296
resource_rpath = insert_date_rpath .join (f"{ new_id } " , forceDirectory = True )
287
297
resource_rpath .mkdir ()
298
+ LOGGER .debug (f"Created { resource_rpath } ." )
288
299
289
300
# Include the metadata file itself.
290
301
sim_metadata ["files" ]["metadata" ] = {"name" : "sim_metadata.yaml" }
@@ -293,6 +304,7 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre
293
304
source_fname = Path (archive_dir ).joinpath (file_info ["name" ])
294
305
with open (source_fname , "rb" ) as source_io :
295
306
content = source_io .read ()
307
+ LOGGER .debug (f"Read { source_fname } ." )
296
308
297
309
destination_rpath = resource_rpath .join (file_info ["name" ])
298
310
destination_rpath .write (content )
@@ -315,17 +327,24 @@ def check_opsim_archive_resource(archive_uri):
315
327
validity: `dict`
316
328
A dictionary of files checked, and their validity.
317
329
"""
330
+ LOGGER .debug (f"Starting to check file hashes in opsim sim archive { archive_uri } ." )
318
331
metadata_path = ResourcePath (archive_uri ).join ("sim_metadata.yaml" )
319
332
with metadata_path .open (mode = "r" ) as metadata_io :
320
333
sim_metadata = yaml .safe_load (metadata_io )
334
+ LOGGER .debug (f"Read sim metadata from { metadata_path } .)" )
321
335
322
336
results = {}
323
337
324
338
for file_info in sim_metadata ["files" ].values ():
325
339
resource_path = ResourcePath (archive_uri ).join (file_info ["name" ])
340
+ LOGGER .info (f"Reading { resource_path } ." )
326
341
content = resource_path .read ()
327
342
328
343
results [file_info ["name" ]] = file_info ["md5" ] == hashlib .md5 (content ).hexdigest ()
344
+ if results [file_info ["name" ]]:
345
+ LOGGER .debug (f"{ resource_path } checked and found to match recorded md5." )
346
+ else :
347
+ LOGGER .debug (f"{ resource_path } has an md5 that differs from the recorded md5!" )
329
348
330
349
return results
331
350
@@ -386,11 +405,15 @@ def read_archived_sim_metadata(
386
405
"""
387
406
latest_mjd = int (Time .now ().mjd if latest is None else Time (latest ).mjd )
388
407
earliest_mjd = int (latest_mjd - (num_nights - 1 ))
408
+ LOGGER .debug (
409
+ f"Looking for simulation metadata with MJD between { earliest_mjd } and { latest_mjd } in { base_uri } ."
410
+ )
389
411
390
412
compilation = {}
391
413
compiled_uris_by_date = {}
392
414
max_compiled_date = "1900-01-01"
393
415
if compilation_resource is not None :
416
+ LOGGER .debug (f"Reading metadata cache { compilation_resource } ." )
394
417
try :
395
418
compilation .update (read_sim_metadata_from_hdf (compilation_resource ))
396
419
for uri in compilation :
@@ -399,6 +422,7 @@ def read_archived_sim_metadata(
399
422
compiled_uris_by_date [iso_date ] = []
400
423
compiled_uris_by_date [iso_date ].append (uri )
401
424
max_compiled_date = max (max_compiled_date , iso_date )
425
+ LOGGER .debug (f"Maximum simulation execution date in metadata cache: { max_compiled_date } " )
402
426
except FileNotFoundError :
403
427
LOGGER .warning (f"No metadata cache { compilation_resource } , not using cache." )
404
428
pass
@@ -418,17 +442,25 @@ def read_archived_sim_metadata(
418
442
):
419
443
for found_file in found_files :
420
444
found_resource = ResourcePath (base_dir ).join (found_file )
445
+ LOGGER .debug (f"Found { found_resource } " )
421
446
sim_uri = str (found_resource .dirname ())
422
447
if sim_uri in compilation :
448
+ LOGGER .debug (f"Not reading { found_resource } , already in the read compliation." )
423
449
these_metadata = compilation [sim_uri ]
424
450
else :
451
+ LOGGER .debug (f"Reading { found_resource } (absent from compilation)." )
425
452
these_metadata = yaml .safe_load (found_resource .read ().decode ("utf-8" ))
426
453
these_metadata ["label" ] = _build_archived_sim_label (
427
454
base_uri , found_resource , these_metadata
428
455
)
456
+ LOGGER .debug (f"Read successfully: { found_resource } " )
429
457
if iso_date < max_compiled_date :
430
- print (f"Simulation at { sim_uri } expected but not found in compilation." )
458
+ LOGGER .error (
459
+ f"Simulation at { sim_uri } expected but not found in compilation."
460
+ )
431
461
all_metadata [sim_uri ] = these_metadata
462
+ else :
463
+ LOGGER .debug (f"No simulations found with generation date of { iso_date } " )
432
464
else :
433
465
if iso_date in compiled_uris_by_date :
434
466
for sim_uri in compiled_uris_by_date [iso_date ]:
@@ -438,7 +470,18 @@ def read_archived_sim_metadata(
438
470
if iso_date in compiled_uris_by_date :
439
471
for sim_uri in compiled_uris_by_date [iso_date ]:
440
472
if sim_uri not in all_metadata :
441
- print (f"Simulation at { sim_uri } in compiled metadata but not archive." )
473
+ message = f"Simulation at { sim_uri } in compiled metadata but not archive."
474
+ print (message )
475
+ LOGGER .error (message )
476
+ else :
477
+ LOGGER .debug (
478
+ f"Date { iso_date } not expected to be in the metadata compilation, not checking for it."
479
+ )
480
+
481
+ if len (all_metadata ) == 0 :
482
+ earliest_iso = Time (earliest_mjd , format = "mjd" ).iso [:10 ]
483
+ latest_iso = Time (latest_mjd , format = "mjd" ).iso [:10 ]
484
+ LOGGER .info (f"No simulations run between { earliest_iso } through { latest_iso } found in { base_uri } " )
442
485
443
486
return all_metadata
444
487
@@ -488,7 +531,7 @@ def make_sim_archive_cli(*args):
488
531
parser .add_argument (
489
532
"--archive_base_uri" ,
490
533
type = str ,
491
- default = "s3://rubin-scheduler-prenight/opsim/" ,
534
+ default = "s3://rubin:rubin -scheduler-prenight/opsim/" ,
492
535
help = "Base URI for the archive" ,
493
536
)
494
537
parser .add_argument ("--tags" , type = str , default = [], nargs = "*" , help = "The tags on the simulation." )
@@ -529,8 +572,10 @@ def make_sim_archive_cli(*args):
529
572
label = arg_values .label ,
530
573
capture_env = arg_values .current_env ,
531
574
)
575
+ LOGGER .info (f"Created simulation archived directory: { data_path .name } " )
532
576
533
577
sim_archive_uri = transfer_archive_dir (data_path .name , arg_values .archive_base_uri )
578
+ LOGGER .info (f"Transferred { data_path } to { sim_archive_uri } " )
534
579
535
580
return sim_archive_uri
536
581
@@ -558,6 +603,7 @@ def compile_sim_metadata(
558
603
compilation_fname : `ResourcePath`
559
604
The resource to which the hdf5 file was written.
560
605
"""
606
+ LOGGER .debug ("Starting compile_sim_metadata." )
561
607
562
608
if append :
563
609
sim_metadata = read_archived_sim_metadata (
@@ -637,6 +683,7 @@ def read_sim_metadata_from_hdf(compilation_resource: str | ResourcePath) -> dict
637
683
638
684
with compilation_resource .as_local () as local_compilation_resource :
639
685
compilation_fname : str = local_compilation_resource .ospath
686
+ LOGGER .debug (f"{ compilation_resource } copied to { compilation_fname } ." )
640
687
sim_df = pd .read_hdf (compilation_fname , "simulations" )
641
688
file_df = pd .read_hdf (compilation_fname , "files" )
642
689
sim_runner_kwargs_df = pd .read_hdf (compilation_fname , "kwargs" )
@@ -868,14 +915,17 @@ def drive_sim(
868
915
in_files ["notebook" ] = notebook
869
916
870
917
with TemporaryDirectory () as local_data_dir :
918
+ LOGGER .debug (f"Using temporary directory { local_data_dir } ." )
871
919
# We want to store the state of the scheduler at the start of
872
920
# the sim, so we need to save it now before we run the simulation.
873
921
scheduler_path = Path (local_data_dir ).joinpath ("scheduler.pickle.xz" )
874
922
with lzma .open (scheduler_path , "wb" , format = lzma .FORMAT_XZ ) as pio :
875
923
pickle .dump (scheduler , pio )
876
924
in_files ["scheduler" ] = scheduler_path .as_posix ()
877
925
926
+ LOGGER .debug ("About to call sim_runner." )
878
927
sim_results = sim_runner (observatory , scheduler , ** kwargs )
928
+ LOGGER .debug ("sim_runner complete." )
879
929
880
930
observations = sim_results [2 ]
881
931
reward_df = sim_results [3 ] if scheduler .keep_rewards else None
@@ -924,6 +974,14 @@ def compile_sim_archive_metadata_cli(*args):
924
974
+ "but add new simulations with dates after the last current entry." ,
925
975
)
926
976
977
+ log_file = os .environ .get ("SIM_ARCHIVE_LOG_FILE" , None )
978
+ if log_file is not None :
979
+ logging .basicConfig (
980
+ filename = log_file , format = "%(asctime)s: %(message)s" , datefmt = "%Y-%m-%dT%H:%M:%S%z"
981
+ )
982
+ else :
983
+ logging .basicConfig (level = logging .INFO )
984
+
927
985
arg_values = parser .parse_args () if len (args ) == 0 else parser .parse_args (args )
928
986
archive_uri = arg_values .archive_base_uri
929
987
compilation_uri = arg_values .compilation_uri
@@ -983,6 +1041,7 @@ def find_latest_prenight_sim_for_nights(
983
1041
sim_metadata = read_archived_sim_metadata (
984
1042
archive_uri , num_nights = max_simulation_age , compilation_resource = compilation_uri
985
1043
)
1044
+ LOGGER .debug (f"Total simulations it the last { max_simulation_age } days: { len (sim_metadata )} ." )
986
1045
987
1046
best_sim = None
988
1047
for uri , sim in sim_metadata .items ():
@@ -1010,6 +1069,9 @@ def find_latest_prenight_sim_for_nights(
1010
1069
.join (f"{ best_sim ['date_index' ]} " , forceDirectory = True )
1011
1070
.join (best_sim ["files" ]["observations" ]["name" ])
1012
1071
)
1072
+ LOGGER .info (f"Most recent simulation meeting requested criteria is { best_sim ['uri' ]} ." )
1073
+ else :
1074
+ LOGGER .debug ("No simulations met the requested criteria." )
1013
1075
1014
1076
return best_sim
1015
1077
@@ -1022,7 +1084,7 @@ def fetch_latest_prenight_sim_for_nights(
1022
1084
archive_uri : str = "s3://rubin:rubin-scheduler-prenight/opsim/" ,
1023
1085
compilation_uri : str = "s3://rubin:rubin-scheduler-prenight/opsim/compiled_metadata_cache.h5" ,
1024
1086
** kwargs ,
1025
- ) -> pd .DataFrame :
1087
+ ) -> pd .DataFrame | None :
1026
1088
"""Fetches visit parameters from the latest archived pre-night simulation
1027
1089
with requested tags for a specified day of observing.
1028
1090
@@ -1059,9 +1121,15 @@ def fetch_latest_prenight_sim_for_nights(
1059
1121
sim_metadata = find_latest_prenight_sim_for_nights (
1060
1122
first_day_obs , last_day_obs , tags , max_simulation_age , archive_uri , compilation_uri
1061
1123
)
1062
- visits = get_sim_data (sim_metadata ["opsim_rp" ], ** kwargs )
1124
+ if sim_metadata is None :
1125
+ LOGGER .info ("No simulations meet requested criteria." )
1126
+ result = None
1127
+ else :
1128
+ visits = get_sim_data (sim_metadata ["opsim_rp" ], ** kwargs )
1129
+ LOGGER .debug (f"Loaded { len (visits )} from { sim_metadata ['opsim_rp' ]} " )
1130
+ result = pd .DataFrame (visits )
1063
1131
1064
- return pd . DataFrame ( visits )
1132
+ return result
1065
1133
1066
1134
1067
1135
def fetch_obsloctap_visits (day_obs : str | None = None , nights : int = 2 ) -> pd .DataFrame :
0 commit comments