35
35
"finished" : "successful" ,
36
36
}
37
37
38
- WORKERS_MULTIPLIER = float (os .getenv ("WORKERS_MULTIPLIER" , 1 ))
39
38
ONE_SECOND = datetime .timedelta (seconds = 1 )
40
- HIGH_PRIORITY_USER_UID = os .getenv (
41
- "HIGH_PRIORITY_USER_UID" , "8d8ee054-6a09-4da8-a5be-d5dff52bbc5f"
42
- )
43
- BROKER_PRIORITY_ALGORITHM = os .getenv ("BROKER_PRIORITY_ALGORITHM" , "legacy" )
39
+ ONE_MINUTE = ONE_SECOND * 60
40
+ ONE_HOUR = ONE_MINUTE * 60
41
+ CONFIG = config .BrokerConfig ()
44
42
45
43
46
44
@cachetools .cached ( # type: ignore
47
45
cache = cachetools .TTLCache (
48
- maxsize = 1024 , ttl = float ( os . getenv ( "GET_NUMBER_OF_WORKERS_CACHE_TIME" , 10 ))
46
+ maxsize = 1024 , ttl = CONFIG . get_number_of_workers_cache_time
49
47
),
50
48
info = True ,
51
49
)
@@ -58,14 +56,12 @@ def get_number_of_workers(client: distributed.Client) -> int:
58
56
59
57
60
58
@cachetools .cached ( # type: ignore
61
- cache = cachetools .TTLCache (
62
- maxsize = 1024 , ttl = int (os .getenv ("QOS_RULES_CACHE_TIME" , 10 ))
63
- ),
59
+ cache = cachetools .TTLCache (maxsize = 1024 , ttl = CONFIG .qos_rules_cache_time ),
64
60
info = True ,
65
61
)
66
62
def get_rules_hash (rules_path : str ):
67
63
if rules_path is None or not os .path .exists (rules_path ):
68
- rules = os . getenv ( "DEFAULT_RULES" , "" )
64
+ rules = ""
69
65
else :
70
66
with open (rules_path ) as f :
71
67
rules = f .read ()
@@ -74,7 +70,7 @@ def get_rules_hash(rules_path: str):
74
70
75
71
@cachetools .cached ( # type: ignore
76
72
cache = cachetools .TTLCache (
77
- maxsize = 1024 , ttl = int ( os . getenv ( "GET_TASKS_FROM_SCHEDULER_CACHE_TIME" , 1 ))
73
+ maxsize = 1024 , ttl = CONFIG . get_tasks_from_scheduler_cache_time
78
74
),
79
75
info = True ,
80
76
)
@@ -192,14 +188,12 @@ def reset(self) -> None:
192
188
class QoSRules :
193
189
def __init__ (self , number_of_workers ) -> None :
194
190
self .environment = Environment .Environment (number_of_workers = number_of_workers )
195
- self .rules_path = os . getenv ( "RULES_PATH" , "/src/rules.qos" )
191
+ self .rules_path = CONFIG . rules_path
196
192
if os .path .exists (self .rules_path ):
197
193
self .rules = self .rules_path
198
194
else :
199
195
logger .info ("rules file not found" , rules_path = self .rules_path )
200
- parser = QoS .RulesParser (
201
- io .StringIO (os .getenv ("DEFAULT_RULES" , "" )), logger = logger
202
- )
196
+ parser = QoS .RulesParser (io .StringIO ("" ), logger = logger )
203
197
self .rules = QoS .RuleSet ()
204
198
parser .parse_rules (self .rules , self .environment , raise_exception = False )
205
199
@@ -212,9 +206,9 @@ class Broker:
212
206
address : str
213
207
session_maker_read : sa .orm .sessionmaker
214
208
session_maker_write : sa .orm .sessionmaker
215
- wait_time : float = float ( os . getenv ( "BROKER_WAIT_TIME" , 2 ))
209
+ wait_time : float = CONFIG . wait_time
216
210
ttl_cache = cachetools .TTLCache (
217
- maxsize = 1024 , ttl = int ( os . getenv ( "SYNC_DATABASE_CACHE_TIME" , 10 ))
211
+ maxsize = 1024 , ttl = CONFIG . sync_database_cache_time
218
212
)
219
213
220
214
futures : dict [str , distributed .Future ] = attrs .field (
@@ -282,7 +276,7 @@ def set_request_error_status(
282
276
request = db .get_request (request_uid , session = session )
283
277
if request .status != "running" :
284
278
return None
285
- requeue = os . getenv ( "BROKER_REQUEUE_ON_KILLED_WORKER_REQUESTS" , False )
279
+ requeue = CONFIG . broker_requeue_on_killed_worker_requests
286
280
if error_reason == "KilledWorker" :
287
281
worker_restart_events = self .client .get_events ("worker-restart-memory" )
288
282
# get info on worker and pid of the killed request
@@ -315,9 +309,11 @@ def set_request_error_status(
315
309
session = session ,
316
310
)
317
311
requeue = False
318
- if requeue and request .request_metadata .get (
319
- "resubmit_number" , 0
320
- ) < os .getenv ("BROKER_REQUEUE_LIMIT" , 3 ):
312
+ if (
313
+ requeue
314
+ and request .request_metadata .get ("resubmit_number" , 0 )
315
+ < CONFIG .broker_requeue_limit
316
+ ):
321
317
logger .info ("worker killed: re-queueing" , job_id = request_uid )
322
318
db .requeue_request (request = request , session = session )
323
319
self .queue .add (request_uid , request )
@@ -444,10 +440,10 @@ def sync_database(self, session: sa.orm.Session) -> None:
444
440
)
445
441
continue
446
442
# FIXME: check if request status has changed
447
- if os . getenv (
448
- "BROKER_REQUEUE_ON_LOST_REQUESTS" , True
449
- ) and request .request_metadata .get ("resubmit_number" , 0 ) < os . getenv (
450
- "BROKER_REQUEUE_LIMIT" , 3
443
+ if (
444
+ CONFIG . broker_requeue_on_lost_requests
445
+ and request .request_metadata .get ("resubmit_number" , 0 )
446
+ < CONFIG . broker_requeue_limit
451
447
):
452
448
logger .info (
453
449
"request not found: re-queueing" , job_id = {request .request_uid }
@@ -488,7 +484,7 @@ def sync_qos_rules(self, session_write) -> None:
488
484
if tasks_number := len (self .internal_scheduler .queue ):
489
485
logger .info ("performance" , tasks_number = tasks_number )
490
486
for task in list (self .internal_scheduler .queue )[
491
- : int ( os . getenv ( "BROKER_MAX_INTERNAL_SCHEDULER_TASKS" , 500 ))
487
+ : CONFIG . broker_max_internal_scheduler_tasks
492
488
]:
493
489
# the internal scheduler is used to asynchronously add qos rules to database
494
490
# it returns a new qos rule if a new qos rule is added to database
@@ -575,16 +571,18 @@ def submit_requests(
575
571
candidates : Iterable [db .SystemRequest ],
576
572
) -> None :
577
573
"""Check the qos rules and submit the requests to the dask scheduler."""
578
- if BROKER_PRIORITY_ALGORITHM == "processing_time" :
574
+ if CONFIG . broker_priority_algorithm == "processing_time" :
579
575
user_requests : dict [str , list [db .SystemRequest ]] = {}
580
576
for request in candidates :
581
577
user_requests .setdefault (request .user_uid , []).append (request )
582
578
# FIXME: this is a temporary solution to prioritize subrequests from the high priority user
583
579
interval_stop = datetime .datetime .now ()
584
580
users_queue = {
585
- HIGH_PRIORITY_USER_UID : 0
581
+ CONFIG . high_priority_user_uid : 0
586
582
} | db .get_users_queue_from_processing_time (
587
- interval_stop = interval_stop , session = session_write
583
+ interval_stop = interval_stop ,
584
+ session = session_write ,
585
+ interval = ONE_HOUR * CONFIG .broker_priority_interval_hours ,
588
586
)
589
587
requests_counter = 0
590
588
for user_uid in users_queue :
@@ -613,7 +611,7 @@ def submit_requests(
613
611
if self .qos .can_run (
614
612
request , session = session_write , scheduler = self .internal_scheduler
615
613
):
616
- if requests_counter <= int (number_of_requests * WORKERS_MULTIPLIER ):
614
+ if requests_counter <= int (number_of_requests ):
617
615
self .submit_request (request , session = session_write )
618
616
requests_counter += 1
619
617
0 commit comments