From f58f2535890e5760aaacc624d4e87f56a98574e5 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sat, 18 Nov 2023 17:07:49 +0100 Subject: [PATCH 1/9] checkout version --- .github/workflows/linter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index a462185..31440e0 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -23,7 +23,7 @@ jobs: steps: - name: Checkout Code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 From 7bee986d86f767561de7f4981af20e6195aa8170 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sun, 17 Dec 2023 18:14:57 +0100 Subject: [PATCH 2/9] outlier check (preliminary) --- v2dl5/run_lists.py | 51 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/v2dl5/run_lists.py b/v2dl5/run_lists.py index df33778..5b527f0 100644 --- a/v2dl5/run_lists.py +++ b/v2dl5/run_lists.py @@ -218,11 +218,14 @@ def _dqm_report(obs_table): "OBS_ID", "RUNTYPE", "DATACAT", + "N_TELS", + "TELLIST", "DQMSTAT", "WEATHER", "L3RATE", "L3RATESD", "FIRMEAN1", + "FIRCORM1", "FIRSTD1", ].pprint_all() @@ -235,12 +238,28 @@ def _dqm_report(obs_table): _print_min_max(obs_table, epoch_mask, "L3RATESD", f"{epoch} (Hz)") _print_min_max(obs_table, epoch_mask, "FIRMEAN1", f"{epoch} (deg)") _print_min_max(obs_table, epoch_mask, "FIRSTD1", f"{epoch} (deg)") + _print_min_max(obs_table, epoch_mask, "FIRCORM1", f"{epoch} (deg)") _print_outlier(obs_table, epoch_mask, "L3RATE", f"{epoch} (Hz)") _print_outlier(obs_table, epoch_mask, "FIRMEAN1", f"{epoch} (deg)") + _print_outlier(obs_table, epoch_mask, "FIRCORM1", f"{epoch} (deg)") + _print_outlier(obs_table, epoch_mask, "FIRSTD1", f"{epoch} (deg)") -def _print_outlier(obs_table, mask, column, string, sigma=3): +def _reject_outliers(data, m=3.0): + """ + from + stackoverflow.com/questions/11686720/is-there-a-numpy-builtin-to-reject-outliers-from-a-list + + """ + d = np.abs(data - np.median(data)) + mdev = np.median(d) + s = d / mdev if mdev else np.zeros(len(d)) + print("AAAA", mdev, data[s > m]) + return data[s < m] + + +def _print_outlier(obs_table, mask, column, string, sigma=2): """ Print OBS_ID with more than sigma deviation from mean @@ -250,6 +269,9 @@ def _print_outlier(obs_table, mask, column, string, sigma=3): _mean = np.mean(_obs_table_cleaned[column]) _std = np.std(_obs_table_cleaned[column]) + _ff = np.ndarray.flatten(_obs_table_cleaned[column]) + _rr = _reject_outliers(_ff) + print("BBBB", len(_ff), len(_rr)) print(f"Mean {column} for {string}: {_mean:.2f} +- {_std:.2f}") # get list of obs_ids with more than sigma deviation from mean @@ -270,18 +292,21 @@ def _print_min_max(obs_table, mask, column, string): """ _obs_table_cleaned = obs_table[(obs_table[column] > -9998.0) & mask] - min_index = np.argmin(_obs_table_cleaned[column]) - max_index = np.argmax(_obs_table_cleaned[column]) - - print(f"{column} for {string}:") - print( - f" Max for obs_id {_obs_table_cleaned['OBS_ID'][max_index]}: " - f"{_obs_table_cleaned[column][max_index]:.2f}" - ) - print( - f" Min for obs_id {_obs_table_cleaned['OBS_ID'][min_index]}: " - f"{_obs_table_cleaned[column][min_index]:.2f}" - ) + try: + min_index = np.argmin(_obs_table_cleaned[column]) + max_index = np.argmax(_obs_table_cleaned[column]) + + print(f"{column} for {string}:") + print( + f" Max for obs_id {_obs_table_cleaned['OBS_ID'][max_index]}: " + f"{_obs_table_cleaned[column][max_index]:.2f}" + ) + print( + f" Min for obs_id {_obs_table_cleaned['OBS_ID'][min_index]}: " + f"{_obs_table_cleaned[column][min_index]:.2f}" + ) + except ValueError: + _logger.warning(f"Empty list for min/max determination of {column}") def _epoch_masks(obs_table): From 3456fb55b4534371465e766165cd648803678b86 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 15 Apr 2024 12:25:29 +0200 Subject: [PATCH 3/9] add plotting of distributions --- v2dl5/run_lists.py | 84 +++++++++++++++++++++++-------- v2dl5/scripts/generate_runlist.py | 2 +- 2 files changed, 64 insertions(+), 22 deletions(-) diff --git a/v2dl5/run_lists.py b/v2dl5/run_lists.py index 5b527f0..0ddd1e7 100644 --- a/v2dl5/run_lists.py +++ b/v2dl5/run_lists.py @@ -7,6 +7,7 @@ import astropy.table import astropy.units as u +import matplotlib.pyplot as plt import numpy as np from astropy.coordinates import SkyCoord from astropy.io import ascii @@ -21,15 +22,10 @@ def generate_run_list(args_dict, target): """ _logger.info("Generate run list. from %s", args_dict["obs_table"]) - obs_table = _read_observation_table(args_dict["obs_table"]) - obs_table = _apply_selection_cuts(obs_table, args_dict, target) - _logger.info("Selected %d runs.", len(obs_table)) - - _dqm_report(obs_table) - + _dqm_report(obs_table, args_dict["output_dir"]) _write_run_list(obs_table, args_dict["output_dir"]) @@ -44,6 +40,7 @@ def _read_observation_table(obs_table_file_name): """ obs_table = astropy.table.Table.read(obs_table_file_name) + # TODO - check if necessary obs_table["DQMSTAT"].fill_value = "unknown" return obs_table.filled() @@ -73,7 +70,7 @@ def _apply_selection_cuts(obs_table, args_dict, target): def _apply_cut_ntel_min(obs_table, args_dict): """ - Apply mininimum telescope cut cut. + Apply minimum telescope cut cut. """ @@ -206,7 +203,7 @@ def _write_run_list(obs_table, output_dir): obs_table.write(f"{output_dir}/run_list.fits.gz", overwrite=True) -def _dqm_report(obs_table): +def _dqm_report(obs_table, output_dir): """ Print list of selected runs to screen @@ -234,16 +231,17 @@ def _dqm_report(obs_table): for epoch_mask, epoch in zip( [mask_V4, mask_V5, mask_V6, mask_V6_redHV], ["V4", "V5", "V6", "V6_redHV"] ): + print("Outliers for epoch", epoch) _print_min_max(obs_table, epoch_mask, "L3RATE", f"{epoch} (Hz)") _print_min_max(obs_table, epoch_mask, "L3RATESD", f"{epoch} (Hz)") _print_min_max(obs_table, epoch_mask, "FIRMEAN1", f"{epoch} (deg)") _print_min_max(obs_table, epoch_mask, "FIRSTD1", f"{epoch} (deg)") _print_min_max(obs_table, epoch_mask, "FIRCORM1", f"{epoch} (deg)") - _print_outlier(obs_table, epoch_mask, "L3RATE", f"{epoch} (Hz)") - _print_outlier(obs_table, epoch_mask, "FIRMEAN1", f"{epoch} (deg)") - _print_outlier(obs_table, epoch_mask, "FIRCORM1", f"{epoch} (deg)") - _print_outlier(obs_table, epoch_mask, "FIRSTD1", f"{epoch} (deg)") + _print_outlier(obs_table, epoch_mask, "L3RATE", f"{epoch} (Hz)", output_dir) + _print_outlier(obs_table, epoch_mask, "FIRMEAN1", f"{epoch} (deg)", output_dir) + _print_outlier(obs_table, epoch_mask, "FIRCORM1", f"{epoch} (deg)", output_dir) + _print_outlier(obs_table, epoch_mask, "FIRSTD1", f"{epoch} (deg)", output_dir) def _reject_outliers(data, m=3.0): @@ -255,34 +253,78 @@ def _reject_outliers(data, m=3.0): d = np.abs(data - np.median(data)) mdev = np.median(d) s = d / mdev if mdev else np.zeros(len(d)) - print("AAAA", mdev, data[s > m]) - return data[s < m] + return data[s < m], data[s > m], np.median(data), mdev -def _print_outlier(obs_table, mask, column, string, sigma=2): +def _print_outlier(obs_table, mask, column, string, output_dir): """ Print OBS_ID with more than sigma deviation from mean """ + sigma = 2 _obs_table_cleaned = obs_table[(obs_table[column] > -9998.0) & mask] _mean = np.mean(_obs_table_cleaned[column]) _std = np.std(_obs_table_cleaned[column]) _ff = np.ndarray.flatten(_obs_table_cleaned[column]) - _rr = _reject_outliers(_ff) - print("BBBB", len(_ff), len(_rr)) + _rr, _med_rejected, _median, _abs_deviation = _reject_outliers(_ff) print(f"Mean {column} for {string}: {_mean:.2f} +- {_std:.2f}") + print(f"Median {column} for {string}: {_median:.2f} +- {_abs_deviation:.2f}") # get list of obs_ids with more than sigma deviation from mean _outlier_list = [ row["OBS_ID"] for row in _obs_table_cleaned if abs(row[column] - _mean) > sigma * _std ] + _outlier_list_med = [ + row["OBS_ID"] + for row in _obs_table_cleaned + if abs(row[column] - _median) > 3.0 * _abs_deviation + ] + print(f"{column} for {string}:") - print(f" Outliers: {_outlier_list}") - for _outlier in _outlier_list: - _tmp_obs = _obs_table_cleaned[_obs_table_cleaned["OBS_ID"] == _outlier] - _tmp_obs.pprint_all() + print(f" Outliers (mean,std): {_outlier_list}") + print(f" Outliers (median,abs): {_outlier_list_med}") + _outlier_mask = np.in1d( + _obs_table_cleaned["OBS_ID"], list(set(_outlier_list) | set(_outlier_list_med)) + ) + _obs_table_cleaned[_outlier_mask].pprint_all() + + _plot_outliers( + _obs_table_cleaned, + column, + string, + np.in1d(_obs_table_cleaned["OBS_ID"], _outlier_list), + np.in1d(_obs_table_cleaned["OBS_ID"], _outlier_list_med), + _mean, + _std, + _median, + _abs_deviation, + output_dir, + ) + + +def _plot_outliers( + obs_table, column, string, mask_std, mask_med, mean, std, median, abs_deviation, output_dir +): + """ + Plot distribution of column values include outliers. + Indicate mean and std as background color (gray) + + """ + + plt.axvspan(mean - std, mean + std, color="red", alpha=0.15) + plt.axvline(mean, color="red", linestyle="--", linewidth=2) + plt.axvspan(median - abs_deviation, median + abs_deviation, color="green", alpha=0.15) + plt.axvline(median, color="green", linestyle="--", linewidth=2) + plt.hist(obs_table[column], bins=20) + plt.hist(obs_table[column][mask_std], bins=20, color="red") + plt.hist(obs_table[column][mask_med], bins=20, color="green") + plt.title(f"{column} for {string}") + plt.xlabel(column) + plt.ylabel("Number of runs") + plt.savefig(f"{output_dir}/outliers_{column}_{string}.png") + plt.close() def _print_min_max(obs_table, mask, column, string): diff --git a/v2dl5/scripts/generate_runlist.py b/v2dl5/scripts/generate_runlist.py index d44bf63..30fea10 100644 --- a/v2dl5/scripts/generate_runlist.py +++ b/v2dl5/scripts/generate_runlist.py @@ -2,7 +2,7 @@ """ Generate a run list for a given sky region or a given target. -Reads in a yaml file (see example `examples/run_selection.yaml`) +Reads in a configuration yaml file (see example `examples/run_selection.yaml`) with criteria for selecting runs. Generates three output files: From fd4c07fb15424604bcef5753db819bc1877138d8 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Tue, 16 Apr 2024 08:59:25 +0200 Subject: [PATCH 4/9] improved outlier --- examples/run_selection.yml | 4 ++ v2dl5/run_lists.py | 101 ++++++++++++++++++------------ v2dl5/scripts/generate_runlist.py | 9 ++- 3 files changed, 72 insertions(+), 42 deletions(-) diff --git a/examples/run_selection.yml b/examples/run_selection.yml index fe1cb5b..85f5057 100644 --- a/examples/run_selection.yml +++ b/examples/run_selection.yml @@ -22,6 +22,10 @@ dqm: ontime_min: 3 min + # TODO + # l3_rate_min: (epoch, rate) + + atmosphere: weather: ["A", "B", "C"] diff --git a/v2dl5/run_lists.py b/v2dl5/run_lists.py index 0ddd1e7..2a8a024 100644 --- a/v2dl5/run_lists.py +++ b/v2dl5/run_lists.py @@ -5,12 +5,12 @@ import logging +import astropy.io import astropy.table import astropy.units as u import matplotlib.pyplot as plt import numpy as np from astropy.coordinates import SkyCoord -from astropy.io import ascii _logger = logging.getLogger(__name__) @@ -186,11 +186,10 @@ def _write_run_list(obs_table, output_dir): Output directory. """ - _logger.info(f"Write run list to {output_dir}/run_list.txt") column_data = obs_table[np.argsort(obs_table["OBS_ID"])]["OBS_ID"] - ascii.write( + astropy.io.ascii.write( column_data, f"{output_dir}/run_list.txt", overwrite=True, @@ -234,14 +233,16 @@ def _dqm_report(obs_table, output_dir): print("Outliers for epoch", epoch) _print_min_max(obs_table, epoch_mask, "L3RATE", f"{epoch} (Hz)") _print_min_max(obs_table, epoch_mask, "L3RATESD", f"{epoch} (Hz)") - _print_min_max(obs_table, epoch_mask, "FIRMEAN1", f"{epoch} (deg)") - _print_min_max(obs_table, epoch_mask, "FIRSTD1", f"{epoch} (deg)") - _print_min_max(obs_table, epoch_mask, "FIRCORM1", f"{epoch} (deg)") + _print_outlier(obs_table, epoch_mask, "L3RATE", f"{epoch} (Hz)", output_dir, "min") + _print_outlier(obs_table, epoch_mask, "L3RATESD", f"{epoch} (Hz)", output_dir, "max", True) + + _print_min_max(obs_table, [True], "FIRMEAN1", "all (deg)") + _print_min_max(obs_table, [True], "FIRSTD1", "all (deg)") + _print_min_max(obs_table, [True], "FIRCORM1", "all (deg)") - _print_outlier(obs_table, epoch_mask, "L3RATE", f"{epoch} (Hz)", output_dir) - _print_outlier(obs_table, epoch_mask, "FIRMEAN1", f"{epoch} (deg)", output_dir) - _print_outlier(obs_table, epoch_mask, "FIRCORM1", f"{epoch} (deg)", output_dir) - _print_outlier(obs_table, epoch_mask, "FIRSTD1", f"{epoch} (deg)", output_dir) + _print_outlier(obs_table, [True], "FIRMEAN1", "all (deg)", output_dir, "max") + _print_outlier(obs_table, [True], "FIRCORM1", "all (deg)", output_dir, "max") + _print_outlier(obs_table, [True], "FIRSTD1", "all (deg)", output_dir, "max", True) def _reject_outliers(data, m=3.0): @@ -256,46 +257,66 @@ def _reject_outliers(data, m=3.0): return data[s < m], data[s > m], np.median(data), mdev -def _print_outlier(obs_table, mask, column, string, output_dir): +def _outlier_lists(obs_table, string, column, center_measure, outlier_type, sigma=3.0): """ - Print OBS_ID with more than sigma deviation from mean + Return list of outliers for mean and median cuts. """ - sigma = 2 + if center_measure == "Mean": + m = np.mean(obs_table[column]) + s = np.std(obs_table[column]) + elif center_measure == "Median": + _data = np.ndarray.flatten(obs_table[column]) + m = np.median(_data) + s = np.median(np.abs(_data - m)) + print(f"{center_measure} {column} for {string}: {m:.2f} +- {s:.2f}") + if outlier_type == "bounds": + outlier_list = [row["OBS_ID"] for row in obs_table if abs(row[column] - m) > sigma * s] + elif outlier_type == "max": + outlier_list = [row["OBS_ID"] for row in obs_table if row[column] - m > sigma * s] + elif outlier_type == "min": + outlier_list = [row["OBS_ID"] for row in obs_table if m - row[column] > sigma * s] + + return m, s, outlier_list + + +def _print_outlier( + obs_table, mask, column, string, output_dir, outlier_type="bounds", log_axis=False +): + """ + Print OBS_ID with more than sigma deviation from mean + + """ _obs_table_cleaned = obs_table[(obs_table[column] > -9998.0) & mask] + if log_axis: + _obs_table_cleaned[column] = np.log10(_obs_table_cleaned[column]) - _mean = np.mean(_obs_table_cleaned[column]) - _std = np.std(_obs_table_cleaned[column]) - _ff = np.ndarray.flatten(_obs_table_cleaned[column]) - _rr, _med_rejected, _median, _abs_deviation = _reject_outliers(_ff) - print(f"Mean {column} for {string}: {_mean:.2f} +- {_std:.2f}") - print(f"Median {column} for {string}: {_median:.2f} +- {_abs_deviation:.2f}") - - # get list of obs_ids with more than sigma deviation from mean - _outlier_list = [ - row["OBS_ID"] for row in _obs_table_cleaned if abs(row[column] - _mean) > sigma * _std - ] - _outlier_list_med = [ - row["OBS_ID"] - for row in _obs_table_cleaned - if abs(row[column] - _median) > 3.0 * _abs_deviation - ] + _mean, _std, _outlier_list_mean = _outlier_lists( + _obs_table_cleaned, string, column, "Mean", outlier_type, sigma=2.0 + ) + _median, _abs_deviation, _outlier_list_median = _outlier_lists( + _obs_table_cleaned, string, column, "Median", outlier_type, sigma=3.0 + ) print(f"{column} for {string}:") - print(f" Outliers (mean,std): {_outlier_list}") - print(f" Outliers (median,abs): {_outlier_list_med}") - _outlier_mask = np.in1d( - _obs_table_cleaned["OBS_ID"], list(set(_outlier_list) | set(_outlier_list_med)) - ) - _obs_table_cleaned[_outlier_mask].pprint_all() + print(f" Outliers (mean,std): {_outlier_list_mean}") + _outlier_mask_mean = np.in1d(_obs_table_cleaned["OBS_ID"], _outlier_list_mean) + _obs_table_cleaned[_outlier_mask_mean].pprint_all() + print(f" Outliers (median,abs): {_outlier_list_median}") + _outlier_mask_median = np.in1d(_obs_table_cleaned["OBS_ID"], _outlier_list_median) + _obs_table_cleaned[_outlier_mask_median].pprint_all() + + for row in _obs_table_cleaned: + if row["L3RATE"] < 250.0: + print(f"{row['OBS_ID']}: {row['L3RATE']}") _plot_outliers( _obs_table_cleaned, column, string, - np.in1d(_obs_table_cleaned["OBS_ID"], _outlier_list), - np.in1d(_obs_table_cleaned["OBS_ID"], _outlier_list_med), + _outlier_mask_mean, + _outlier_mask_median, _mean, _std, _median, @@ -317,9 +338,9 @@ def _plot_outliers( plt.axvline(mean, color="red", linestyle="--", linewidth=2) plt.axvspan(median - abs_deviation, median + abs_deviation, color="green", alpha=0.15) plt.axvline(median, color="green", linestyle="--", linewidth=2) - plt.hist(obs_table[column], bins=20) - plt.hist(obs_table[column][mask_std], bins=20, color="red") - plt.hist(obs_table[column][mask_med], bins=20, color="green") + hist_bins = plt.hist(obs_table[column], bins=50)[1] + plt.hist(obs_table[column][mask_med], bins=hist_bins, color="green") + plt.hist(obs_table[column][mask_std], bins=hist_bins, color="red") plt.title(f"{column} for {string}") plt.xlabel(column) plt.ylabel("Number of runs") diff --git a/v2dl5/scripts/generate_runlist.py b/v2dl5/scripts/generate_runlist.py index 30fea10..b15403e 100644 --- a/v2dl5/scripts/generate_runlist.py +++ b/v2dl5/scripts/generate_runlist.py @@ -11,6 +11,10 @@ 2. A detailed printout of the observation table for all runs which satisfy the criteria. 3. A detailed printout of the observation table for all runs which do not satisfy the criteria. +Applies a simple outlier detection based on mean, median, standard, and absolute deviation of L3Rate +and FIR1. This should be used as indications for further investigation, not as hard cuts to remove +runs. + Example: .. code-block:: console @@ -24,6 +28,7 @@ import argparse import logging +from pathlib import Path import v2dl5.configuration import v2dl5.run_lists @@ -67,11 +72,11 @@ def main(): """ logging.root.setLevel(logging.INFO) - args_dict = v2dl5.configuration.configuration(args=_parse(), generate_dqm_run_list=True) + output_path = Path(args_dict["output_dir"]) + output_path.mkdir(parents=True, exist_ok=True) sky_regions = v2dl5.sky_regions.SkyRegions(args_dict=args_dict) - v2dl5.run_lists.generate_run_list(args_dict=args_dict, target=sky_regions.target) From 2dc3747b7365e2eb9a83400bd42a8d704b3cb6f4 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 17 Apr 2024 09:39:58 +0200 Subject: [PATCH 5/9] apply l3 min cut --- examples/run_selection.yml | 7 +++++-- v2dl5/run_lists.py | 31 ++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/examples/run_selection.yml b/examples/run_selection.yml index 85f5057..f89b5d3 100644 --- a/examples/run_selection.yml +++ b/examples/run_selection.yml @@ -22,8 +22,11 @@ dqm: ontime_min: 3 min - # TODO - # l3_rate_min: (epoch, rate) + l3_rate_min: + V4: 10 Hz + V5: 10 Hz + V6: 10 Hz + V6_redHV: 10 Hz atmosphere: diff --git a/v2dl5/run_lists.py b/v2dl5/run_lists.py index 2a8a024..b1cc470 100644 --- a/v2dl5/run_lists.py +++ b/v2dl5/run_lists.py @@ -64,10 +64,30 @@ def _apply_selection_cuts(obs_table, args_dict, target): _obs_table = _apply_cut_dqm(_obs_table, args_dict) _obs_table = _apply_cut_ontime_min(_obs_table, args_dict) _obs_table = _apply_cut_ntel_min(_obs_table, args_dict) + _obs_table = _apply_cut_l3rate(_obs_table, args_dict) return _obs_table +def _apply_cut_l3rate(obs_table, args_dict): + """ + Apply epoch and observation mode dependent minimum L3 Rate cut + + """ + + mask_V4, mask_V5, mask_V6, mask_V6_redHV = _epoch_masks(obs_table) + mask = np.ones(len(obs_table), dtype=bool) + for epoch_mask, epoch in zip( + [mask_V4, mask_V5, mask_V6, mask_V6_redHV], ["V4", "V5", "V6", "V6_redHV"] + ): + try: + l3rate_min = u.Quantity(args_dict["dqm"]["l3_rate_min"][epoch]).to(u.Hz) + except KeyError: + l3rate_min = 0.0 * u.Hz + mask = mask & [((obs_table["L3RATE"] > l3rate_min.value) & epoch_mask) | ~epoch_mask] + return obs_table[mask[0]] + + def _apply_cut_ntel_min(obs_table, args_dict): """ Apply minimum telescope cut cut. @@ -307,10 +327,6 @@ def _print_outlier( _outlier_mask_median = np.in1d(_obs_table_cleaned["OBS_ID"], _outlier_list_median) _obs_table_cleaned[_outlier_mask_median].pprint_all() - for row in _obs_table_cleaned: - if row["L3RATE"] < 250.0: - print(f"{row['OBS_ID']}: {row['L3RATE']}") - _plot_outliers( _obs_table_cleaned, column, @@ -383,4 +399,9 @@ def _epoch_masks(obs_table): mask_V6 = [row["OBS_ID"] > 63372 and row["RUNTYPE"] == "observing" for row in obs_table] mask_V6_redHV = [row["OBS_ID"] > 63372 and row["RUNTYPE"] == "obsLowHV" for row in obs_table] - return mask_V4, mask_V5, mask_V6, mask_V6_redHV + return ( + np.array(mask_V4), + np.array(mask_V5), + np.array(mask_V6), + np.array(mask_V6_redHV), + ) From bd346d3f208c3dc675040f16a31e766ff5fade89 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 17 Apr 2024 09:42:04 +0200 Subject: [PATCH 6/9] remove todo --- v2dl5/run_lists.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/v2dl5/run_lists.py b/v2dl5/run_lists.py index b1cc470..0f4c97e 100644 --- a/v2dl5/run_lists.py +++ b/v2dl5/run_lists.py @@ -40,9 +40,7 @@ def _read_observation_table(obs_table_file_name): """ obs_table = astropy.table.Table.read(obs_table_file_name) - # TODO - check if necessary obs_table["DQMSTAT"].fill_value = "unknown" - return obs_table.filled() From 517ece5184e1682c2ebaec094c3dfe8197b17341 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Wed, 17 Apr 2024 10:52:03 +0200 Subject: [PATCH 7/9] add minimum distance to target --- examples/run_selection.yml | 4 ++-- v2dl5/run_lists.py | 28 ++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/examples/run_selection.yml b/examples/run_selection.yml index f89b5d3..9516718 100644 --- a/examples/run_selection.yml +++ b/examples/run_selection.yml @@ -2,7 +2,8 @@ # Configuration for runlist generator observations: - obs_cone_radius: 1.5 deg + obs_cone_radius_min: 0.25 deg + obs_cone_radius_max: 1.5 deg on_region: target: "LS I +61 303" @@ -28,7 +29,6 @@ dqm: V6: 10 Hz V6_redHV: 10 Hz - atmosphere: weather: ["A", "B", "C"] diff --git a/v2dl5/run_lists.py b/v2dl5/run_lists.py index 0f4c97e..138fd86 100644 --- a/v2dl5/run_lists.py +++ b/v2dl5/run_lists.py @@ -177,17 +177,26 @@ def _apply_cut_target(obs_table, args_dict, target): """ try: - obs_cone_radius = u.Quantity(args_dict["observations"]["obs_cone_radius"]) + obs_cone_radius_min = u.Quantity(args_dict["observations"]["obs_cone_radius_min"]) except KeyError: - _logger.error("KeyError: observations.obs_cone_radius") + obs_cone_radius_min = 0.0 * u.deg + try: + obs_cone_radius_max = u.Quantity(args_dict["observations"]["obs_cone_radius_max"]) + except KeyError: + _logger.error("KeyError: observations.obs_cone_radius_mix missing") raise angular_separation = target.separation( SkyCoord(obs_table["RA_PNT"], obs_table["DEC_PNT"], unit=(u.deg, u.deg)) ) - obs_table = obs_table[angular_separation < obs_cone_radius] + obs_table = obs_table[ + (angular_separation > obs_cone_radius_min) & (angular_separation < obs_cone_radius_max) + ] - _logger.info("Selecting %d runs from observation cone around %s", len(obs_table), target) + _logger.info( + f"Selecting {len(obs_table)} runs from observation cone around {target}" + f"(min {obs_cone_radius_min}, max {obs_cone_radius_max})" + ) return obs_table @@ -227,7 +236,18 @@ def _dqm_report(obs_table, output_dir): """ obs_table.sort("OBS_ID") + # print general info + obs_table[ + "OBS_ID", + "RUNTYPE", + "ONTIME", + "RA_PNT", + "DEC_PNT", + "RA_OBJ", + "DEC_OBJ", + ].pprint_all() + # print dqm obs_table[ "OBS_ID", "RUNTYPE", From 739f03fa5715f5edbb62acf2561cd1a3831d7b98 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 18 Apr 2024 09:34:31 +0200 Subject: [PATCH 8/9] add printing of removed runs --- README.md | 15 +++++++++++++++ v2dl5/run_lists.py | 34 +++++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0d6edad..dce4477 100644 --- a/README.md +++ b/README.md @@ -36,3 +36,18 @@ Activate the environment to start the analysis: ```bash conda activate v2dl5 ``` + +## Run list generator + +The tool `runlist.py` allows to generate a list of runs for a given observation time and zenith angle. +It generates a lot of printout which should be used to fine tune the run selection. + +Example: + +```bash + +python v2dl5/scripts/generate_runlist.py \ + --obs_table ../../../VTS/DL3/v490/dl3_pointlike_moderate2tel/obs-index.fits.gz \ + --config examples/run_selection.yml \ + --output_dir my_output_dir +``` diff --git a/v2dl5/run_lists.py b/v2dl5/run_lists.py index 138fd86..9ab7743 100644 --- a/v2dl5/run_lists.py +++ b/v2dl5/run_lists.py @@ -82,7 +82,10 @@ def _apply_cut_l3rate(obs_table, args_dict): l3rate_min = u.Quantity(args_dict["dqm"]["l3_rate_min"][epoch]).to(u.Hz) except KeyError: l3rate_min = 0.0 * u.Hz - mask = mask & [((obs_table["L3RATE"] > l3rate_min.value) & epoch_mask) | ~epoch_mask] + mask = np.array( + mask & [((obs_table["L3RATE"] > l3rate_min.value) & epoch_mask) | ~epoch_mask] + ) + _print_removed_runs(obs_table, mask[0], "L3RATE", f"{epoch} L3Rate > {l3rate_min}") return obs_table[mask[0]] @@ -98,8 +101,8 @@ def _apply_cut_ntel_min(obs_table, args_dict): _logger.error("KeyError: dqm.ntel_min") raise - mask = [row["N_TELS"] >= ntel_min for row in obs_table] - _logger.info(f"Remove {mask.count(False)} runs with ntel < {ntel_min}") + mask = np.array([row["N_TELS"] >= ntel_min for row in obs_table]) + _print_removed_runs(obs_table, mask, "N_TELS", f"NTel >= {ntel_min}") obs_table = obs_table[mask] _logger.info(f"Minimum number of telescopes: {np.min(obs_table['N_TELS'])}") @@ -118,8 +121,8 @@ def _apply_cut_ontime_min(obs_table, args_dict): _logger.error("KeyError: dqm.ontime_min") raise - mask = [row["ONTIME"] > ontime_min.value for row in obs_table] - _logger.info(f"Remove {mask.count(False)} runs with ontime < {ontime_min}") + mask = np.array([row["ONTIME"] > ontime_min.value for row in obs_table]) + _print_removed_runs(obs_table, mask, "ONTIME", f"ontime < {ontime_min} m") obs_table = obs_table[mask] _logger.info(f"Minimum run time: {np.min(obs_table['ONTIME'])} s") @@ -138,8 +141,8 @@ def _apply_cut_dqm(obs_table, args_dict): _logger.error("KeyError: dqm.dqmstat") raise - mask = [row["DQMSTAT"] in dqm_stat for row in obs_table] - _logger.info(f"Remove {mask.count(False)} runs with dqm status not not in {dqm_stat}") + mask = np.array([row["DQMSTAT"] in dqm_stat for row in obs_table]) + _print_removed_runs(obs_table, mask, "DQMSTAT", "dqm status") obs_table = obs_table[mask] _logger.info(f"Selected dqm status {np.unique(obs_table['DQMSTAT'])}") @@ -159,11 +162,11 @@ def _apply_cut_atmosphere(obs_table, args_dict): raise try: - mask = [row["WEATHER"][0] in weather for row in obs_table] + mask = np.array([row["WEATHER"][0] in weather for row in obs_table]) except IndexError: _logger.error("IndexError: weather") raise - _logger.info(f"Remove {mask.count(False)} runs with weather not in {weather}") + _print_removed_runs(obs_table, mask, "WEATHER", "weather") obs_table = obs_table[mask] _logger.info(f"Selected weather conditions {np.unique(obs_table['WEATHER'])}") @@ -423,3 +426,16 @@ def _epoch_masks(obs_table): np.array(mask_V6), np.array(mask_V6_redHV), ) + + +def _print_removed_runs(obs_table, mask, column_name, cut_type): + """ + + Print removed runs + + """ + + _logger.info(f"Remove {len(mask)-mask.sum(~False)} runs failing {cut_type} cut") + _removed_runs = [f"{row['OBS_ID']} ({row[column_name]})" for row in obs_table[~mask]] + _logger.info(f"Removed following run: {_removed_runs}") + _logger.info(f"Keep {mask.sum(~False)} runs after application of {cut_type} cut") From 0bce8a1a585f22782e84bcea7b6d569551131657 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 18 Apr 2024 10:00:22 +0200 Subject: [PATCH 9/9] apply sample averages (start of implementation) --- v2dl5/run_lists.py | 74 ++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/v2dl5/run_lists.py b/v2dl5/run_lists.py index 9ab7743..f782cac 100644 --- a/v2dl5/run_lists.py +++ b/v2dl5/run_lists.py @@ -21,6 +21,8 @@ def generate_run_list(args_dict, target): """ + calculate_averages(args_dict) + _logger.info("Generate run list. from %s", args_dict["obs_table"]) obs_table = _read_observation_table(args_dict["obs_table"]) obs_table = _apply_selection_cuts(obs_table, args_dict, target) @@ -29,6 +31,22 @@ def generate_run_list(args_dict, target): _write_run_list(obs_table, args_dict["output_dir"]) +def calculate_averages(args_dict): + """ + Determine mean / medium for columns relevant for RMS-dependent + selection cuts. All other DQM cuts are applied (except target cut). + + """ + + _tmp_table = _read_observation_table(args_dict["obs_table"]) + _tmp_table = _apply_selection_cuts(_tmp_table, args_dict, None) + _logger.info(f"Runs selected to calculate averages: {len(_tmp_table)}") + + # TODO + # add a class handling averages / outliers plus application of log / lin + # selection cuts? + + def _read_observation_table(obs_table_file_name): """ Read observation table from obs_index file. @@ -55,19 +73,17 @@ def _apply_selection_cuts(obs_table, args_dict, target): """ - _logger.info("Apply selection cuts.") - - _obs_table = _apply_cut_target(obs_table, args_dict, target) - _obs_table = _apply_cut_atmosphere(_obs_table, args_dict) - _obs_table = _apply_cut_dqm(_obs_table, args_dict) - _obs_table = _apply_cut_ontime_min(_obs_table, args_dict) - _obs_table = _apply_cut_ntel_min(_obs_table, args_dict) - _obs_table = _apply_cut_l3rate(_obs_table, args_dict) - - return _obs_table + if target is not None: + obs_table = _apply_cut_target(obs_table, args_dict, target) + obs_table = _apply_cut_atmosphere(obs_table, args_dict, target) + obs_table = _apply_cut_dqm(obs_table, args_dict, target) + obs_table = _apply_cut_ontime_min(obs_table, args_dict, target) + obs_table = _apply_cut_ntel_min(obs_table, args_dict, target) + obs_table = _apply_cut_l3rate(obs_table, args_dict, target) + return obs_table -def _apply_cut_l3rate(obs_table, args_dict): +def _apply_cut_l3rate(obs_table, args_dict, target): """ Apply epoch and observation mode dependent minimum L3 Rate cut @@ -85,11 +101,13 @@ def _apply_cut_l3rate(obs_table, args_dict): mask = np.array( mask & [((obs_table["L3RATE"] > l3rate_min.value) & epoch_mask) | ~epoch_mask] ) - _print_removed_runs(obs_table, mask[0], "L3RATE", f"{epoch} L3Rate > {l3rate_min}") + _print_removed_runs( + obs_table, mask[0], "L3RATE", f"{epoch} L3Rate > {l3rate_min}", target is not None + ) return obs_table[mask[0]] -def _apply_cut_ntel_min(obs_table, args_dict): +def _apply_cut_ntel_min(obs_table, args_dict, target): """ Apply minimum telescope cut cut. @@ -102,14 +120,15 @@ def _apply_cut_ntel_min(obs_table, args_dict): raise mask = np.array([row["N_TELS"] >= ntel_min for row in obs_table]) - _print_removed_runs(obs_table, mask, "N_TELS", f"NTel >= {ntel_min}") + _print_removed_runs(obs_table, mask, "N_TELS", f"NTel >= {ntel_min}", target is not None) obs_table = obs_table[mask] - _logger.info(f"Minimum number of telescopes: {np.min(obs_table['N_TELS'])}") + if target is not None: + _logger.info(f"Minimum number of telescopes: {np.min(obs_table['N_TELS'])}") return obs_table -def _apply_cut_ontime_min(obs_table, args_dict): +def _apply_cut_ontime_min(obs_table, args_dict, target): """ Apply ontime min cut. @@ -122,14 +141,15 @@ def _apply_cut_ontime_min(obs_table, args_dict): raise mask = np.array([row["ONTIME"] > ontime_min.value for row in obs_table]) - _print_removed_runs(obs_table, mask, "ONTIME", f"ontime < {ontime_min} m") + _print_removed_runs(obs_table, mask, "ONTIME", f"ontime < {ontime_min} m", target is not None) obs_table = obs_table[mask] - _logger.info(f"Minimum run time: {np.min(obs_table['ONTIME'])} s") + if target is not None: + _logger.info(f"Minimum run time: {np.min(obs_table['ONTIME'])} s") return obs_table -def _apply_cut_dqm(obs_table, args_dict): +def _apply_cut_dqm(obs_table, args_dict, target): """ Apply dqm cuts @@ -142,14 +162,15 @@ def _apply_cut_dqm(obs_table, args_dict): raise mask = np.array([row["DQMSTAT"] in dqm_stat for row in obs_table]) - _print_removed_runs(obs_table, mask, "DQMSTAT", "dqm status") + _print_removed_runs(obs_table, mask, "DQMSTAT", "dqm status", target is not None) obs_table = obs_table[mask] - _logger.info(f"Selected dqm status {np.unique(obs_table['DQMSTAT'])}") + if target is not None: + _logger.info(f"Selected dqm status {np.unique(obs_table['DQMSTAT'])}") return obs_table -def _apply_cut_atmosphere(obs_table, args_dict): +def _apply_cut_atmosphere(obs_table, args_dict, target): """ Remove all fields in column "WEATHER" which are not in the list of args_dict.atmosphere.weather @@ -166,9 +187,10 @@ def _apply_cut_atmosphere(obs_table, args_dict): except IndexError: _logger.error("IndexError: weather") raise - _print_removed_runs(obs_table, mask, "WEATHER", "weather") + _print_removed_runs(obs_table, mask, "WEATHER", "weather", target is not None) obs_table = obs_table[mask] - _logger.info(f"Selected weather conditions {np.unique(obs_table['WEATHER'])}") + if target is not None: + _logger.info(f"Selected weather conditions {np.unique(obs_table['WEATHER'])}") return obs_table @@ -428,12 +450,14 @@ def _epoch_masks(obs_table): ) -def _print_removed_runs(obs_table, mask, column_name, cut_type): +def _print_removed_runs(obs_table, mask, column_name, cut_type, print=True): """ Print removed runs """ + if not print: + return _logger.info(f"Remove {len(mask)-mask.sum(~False)} runs failing {cut_type} cut") _removed_runs = [f"{row['OBS_ID']} ({row[column_name]})" for row in obs_table[~mask]]