Skip to content

Safely pass Prometheus key deletion errors since not every setup will use it #585

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 70 additions & 16 deletions src/murfey/server/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Proposal,
)
from PIL import Image
from prometheus_client import Counter, Gauge
from pydantic import BaseModel
from sqlalchemy import func
from sqlalchemy.exc import OperationalError
Expand Down Expand Up @@ -50,7 +51,7 @@
from murfey.server.api.spa import _cryolo_model_path
from murfey.server.gain import Camera, prepare_eer_gain, prepare_gain
from murfey.server.murfey_db import murfey_db
from murfey.util import secure_path
from murfey.util import safe_run, secure_path
from murfey.util.config import MachineConfig, from_file, settings
from murfey.util.db import (
AutoProcProgram,
Expand Down Expand Up @@ -1616,33 +1617,59 @@
sessions_for_visit = db.exec(
select(Session).where(Session.visit == session.visit)
).all()
# Don't remove prometheus metrics if there are other sessions using them
if len(sessions_for_visit) == 1:
# Don't remove prometheus metrics if there are other sessions using them
try:
prom.monitoring_switch.remove(session.visit)
except KeyError:
pass
safe_run(

Check warning on line 1622 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1622

Added line #L1622 was not covered by tests
prom.monitoring_switch.remove,
args=(session.visit,),
label="monitoring_switch",
)
rsync_instances = db.exec(
select(RsyncInstance).where(RsyncInstance.session_id == session_id)
).all()
for ri in rsync_instances:
prom.seen_files.remove(ri.source, session.visit)
prom.transferred_files.remove(ri.source, session.visit)
prom.transferred_files_bytes.remove(ri.source, session.visit)
prom.seen_data_files.remove(ri.source, session.visit)
prom.transferred_data_files.remove(ri.source, session.visit)
prom.transferred_data_files_bytes.remove(ri.source, session.visit)
safe_run(

Check warning on line 1631 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1631

Added line #L1631 was not covered by tests
prom.seen_files.remove,
args=(ri.source, session.visit),
label="seen_files",
)
safe_run(

Check warning on line 1636 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1636

Added line #L1636 was not covered by tests
prom.transferred_files.remove,
args=(ri.source, session.visit),
label="transferred_files",
)
safe_run(

Check warning on line 1641 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1641

Added line #L1641 was not covered by tests
prom.transferred_files_bytes.remove,
args=(ri.source, session.visit),
label="transferred_files_bytes",
)
safe_run(

Check warning on line 1646 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1646

Added line #L1646 was not covered by tests
prom.seen_data_files.remove,
args=(ri.source, session.visit),
label="seen_data_files",
)
safe_run(

Check warning on line 1651 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1651

Added line #L1651 was not covered by tests
prom.transferred_data_files.remove,
args=(ri.source, session.visit),
label="transferred_data_files",
)
safe_run(

Check warning on line 1656 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1656

Added line #L1656 was not covered by tests
prom.transferred_data_files_bytes.remove,
args=(ri.source, session.visit),
label="transferred_data_file_bytes",
)
collected_ids = db.exec(
select(DataCollectionGroup, DataCollection, ProcessingJob)
.where(DataCollectionGroup.session_id == session_id)
.where(DataCollection.dcg_id == DataCollectionGroup.id)
.where(ProcessingJob.dc_id == DataCollection.id)
).all()
for c in collected_ids:
try:
prom.preprocessed_movies.remove(c[2].id)
except KeyError:
continue
safe_run(

Check warning on line 1668 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1668

Added line #L1668 was not covered by tests
prom.preprocessed_movies.remove,
args=(c[2].id,),
label="preprocessed_movies",
)
db.delete(session)
db.commit()
return
Expand Down Expand Up @@ -1954,3 +1981,30 @@
session.current_gain_ref = new_gain_ref.path
db.add(session)
db.commit()


@router.get("/prometheus/{metric_name}")
def inspect_prometheus_metrics(
metric_name: str,
):
"""
A debugging endpoint that returns the current contents of any Prometheus
gauges and counters that have been set up thus far.
"""

# Extract the Prometheus metric defined in the Prometheus module
metric: Optional[Counter | Gauge] = getattr(prom, metric_name, None)

Check warning on line 1996 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1996

Added line #L1996 was not covered by tests
if metric is None or not isinstance(metric, (Counter, Gauge)):
raise LookupError("No matching metric was found")

Check warning on line 1998 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L1998

Added line #L1998 was not covered by tests

# Package contents into dict and return
results = {}

Check warning on line 2001 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L2001

Added line #L2001 was not covered by tests
if hasattr(metric, "_metrics"):
for i, (label_tuple, sub_metric) in enumerate(metric._metrics.items()):
labels = dict(zip(metric._labelnames, label_tuple))
labels["value"] = sub_metric._value.get()
results[i] = labels
return results

Check warning on line 2007 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L2004-L2007

Added lines #L2004 - L2007 were not covered by tests
else:
value = metric._value.get()
return {"value": value}

Check warning on line 2010 in src/murfey/server/api/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/server/api/__init__.py#L2009-L2010

Added lines #L2009 - L2010 were not covered by tests
22 changes: 21 additions & 1 deletion src/murfey/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from queue import Queue
from threading import Thread
from typing import Optional
from typing import Any, Callable, Optional
from uuid import uuid4

from werkzeug.utils import secure_filename
Expand Down Expand Up @@ -132,3 +132,23 @@
if "." not in logger_name:
return False
logger_name = logger_name.rsplit(".", maxsplit=1)[0]


def safe_run(
func: Callable,
args: list | tuple = [],
kwargs: dict[str, Any] = {},
label: str = "",
):
"""
A wrapper to encase individual functions in try-except blocks so that a warning
is raised if the function fails, but the process continues as normal otherwise.
"""
try:
return func(*args, **kwargs)
except Exception:
logger.warning(

Check warning on line 150 in src/murfey/util/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/util/__init__.py#L147-L150

Added lines #L147 - L150 were not covered by tests
f"Function {func.__name__!r} failed to run for object {label!r}",
exc_info=True,
)
return None

Check warning on line 154 in src/murfey/util/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/murfey/util/__init__.py#L154

Added line #L154 was not covered by tests