@@ -21,27 +21,23 @@ async def test_scheduler_diff_model_names():
21
21
"""
22
22
scheduler = Scheduler ()
23
23
24
- router = Router (
25
- model_list = [
26
- {
27
- "model_name" : "gpt-3.5-turbo" ,
28
- "litellm_params" : {
29
- "model" : "gpt-3.5-turbo" ,
30
- },
31
- },
32
- {"model_name" : "gpt-4" , "litellm_params" : {"model" : "gpt-4" }},
33
- ]
34
- )
35
-
36
- scheduler .update_variables (llm_router = router )
37
-
38
24
item1 = FlowItem (priority = 0 , request_id = "10" , model_name = "gpt-3.5-turbo" )
39
25
item2 = FlowItem (priority = 0 , request_id = "11" , model_name = "gpt-4" )
40
26
await scheduler .add_request (item1 )
41
27
await scheduler .add_request (item2 )
42
28
43
- assert await scheduler .poll (id = "10" , model_name = "gpt-3.5-turbo" ) == True
44
- assert await scheduler .poll (id = "11" , model_name = "gpt-4" ) == True
29
+ assert (
30
+ await scheduler .poll (
31
+ id = "10" , model_name = "gpt-3.5-turbo" , health_deployments = [{"key" : "value" }]
32
+ )
33
+ == True
34
+ )
35
+ assert (
36
+ await scheduler .poll (
37
+ id = "11" , model_name = "gpt-4" , health_deployments = [{"key" : "value" }]
38
+ )
39
+ == True
40
+ )
45
41
46
42
47
43
@pytest .mark .parametrize ("p0, p1" , [(0 , 0 ), (0 , 1 ), (1 , 0 )])
@@ -52,128 +48,45 @@ async def test_scheduler_prioritized_requests(p0, p1):
52
48
"""
53
49
scheduler = Scheduler ()
54
50
55
- router = Router (
56
- model_list = [
57
- {
58
- "model_name" : "gpt-3.5-turbo" ,
59
- "litellm_params" : {
60
- "model" : "gpt-3.5-turbo" ,
61
- },
62
- },
63
- {"model_name" : "gpt-4" , "litellm_params" : {"model" : "gpt-4" }},
64
- ]
65
- )
66
-
67
- scheduler .update_variables (llm_router = router )
68
-
69
51
item1 = FlowItem (priority = p0 , request_id = "10" , model_name = "gpt-3.5-turbo" )
70
52
item2 = FlowItem (priority = p1 , request_id = "11" , model_name = "gpt-3.5-turbo" )
71
53
await scheduler .add_request (item1 )
72
54
await scheduler .add_request (item2 )
73
55
74
56
if p0 == 0 :
75
- assert await scheduler .peek (id = "10" , model_name = "gpt-3.5-turbo" ) == True
76
- assert await scheduler .peek (id = "11" , model_name = "gpt-3.5-turbo" ) == False
57
+ assert (
58
+ await scheduler .peek (
59
+ id = "10" ,
60
+ model_name = "gpt-3.5-turbo" ,
61
+ health_deployments = [{"key" : "value" }],
62
+ )
63
+ == True
64
+ )
65
+ assert (
66
+ await scheduler .peek (
67
+ id = "11" ,
68
+ model_name = "gpt-3.5-turbo" ,
69
+ health_deployments = [{"key" : "value" }],
70
+ )
71
+ == False
72
+ )
77
73
else :
78
- assert await scheduler .peek (id = "11" , model_name = "gpt-3.5-turbo" ) == True
79
- assert await scheduler .peek (id = "10" , model_name = "gpt-3.5-turbo" ) == False
80
-
81
-
82
- @pytest .mark .parametrize ("p0, p1" , [(0 , 1 ), (0 , 0 )]) #
83
- @pytest .mark .asyncio
84
- async def test_aascheduler_prioritized_requests_mock_response (p0 , p1 ):
85
- """
86
- 2 requests for same model group
87
-
88
- if model is at rate limit, ensure the higher priority request gets done first
89
- """
90
- scheduler = Scheduler ()
91
-
92
- router = Router (
93
- model_list = [
94
- {
95
- "model_name" : "gpt-3.5-turbo" ,
96
- "litellm_params" : {
97
- "model" : "gpt-3.5-turbo" ,
98
- "mock_response" : "Hello world this is Macintosh!" ,
99
- "rpm" : 0 ,
100
- },
101
- },
102
- ],
103
- timeout = 10 ,
104
- num_retries = 3 ,
105
- cooldown_time = 5 ,
106
- routing_strategy = "usage-based-routing-v2" ,
107
- )
108
-
109
- scheduler .update_variables (llm_router = router )
110
-
111
- async def _make_prioritized_call (flow_item : FlowItem ):
112
- ## POLL QUEUE
113
- default_timeout = router .timeout
114
- end_time = time .time () + default_timeout
115
- poll_interval = 0.03 # poll every 3ms
116
- curr_time = time .time ()
117
-
118
- make_request = False
119
-
120
- if router is None :
121
- raise Exception ("No llm router value" )
122
-
123
- while curr_time < end_time :
124
- make_request = await scheduler .poll (
125
- id = flow_item .request_id , model_name = flow_item .model_name
74
+ assert (
75
+ await scheduler .peek (
76
+ id = "11" ,
77
+ model_name = "gpt-3.5-turbo" ,
78
+ health_deployments = [{"key" : "value" }],
126
79
)
127
- print (f"make_request={ make_request } , priority={ flow_item .priority } " )
128
- if make_request : ## IF TRUE -> MAKE REQUEST
129
- break
130
- else : ## ELSE -> loop till default_timeout
131
- await asyncio .sleep (poll_interval )
132
- curr_time = time .time ()
133
-
134
- if make_request :
135
- try :
136
- _response = await router .acompletion (
137
- model = flow_item .model_name ,
138
- messages = [{"role" : "user" , "content" : "Hey!" }],
139
- )
140
- except Exception as e :
141
- print ("Received error - {}" .format (str (e )))
142
- return flow_item .priority , flow_item .request_id , time .time ()
143
-
144
- return flow_item .priority , flow_item .request_id , time .time ()
145
-
146
- raise Exception ("didn't make request" )
147
-
148
- tasks = []
149
-
150
- item = FlowItem (
151
- priority = p0 , request_id = str (uuid .uuid4 ()), model_name = "gpt-3.5-turbo"
152
- )
153
- await scheduler .add_request (request = item )
154
- tasks .append (_make_prioritized_call (flow_item = item ))
155
-
156
- item = FlowItem (
157
- priority = p1 , request_id = str (uuid .uuid4 ()), model_name = "gpt-3.5-turbo"
158
- )
159
- await scheduler .add_request (request = item )
160
- tasks .append (_make_prioritized_call (flow_item = item ))
161
-
162
- # Running the tasks and getting responses in order of completion
163
- completed_responses = []
164
- for task in asyncio .as_completed (tasks ):
165
- result = await task
166
- completed_responses .append (result )
167
- print (f"Received response: { result } " )
168
-
169
- print (f"responses: { completed_responses } " )
170
-
171
- assert (
172
- completed_responses [0 ][0 ] == 0
173
- ) # assert higher priority request got done first
174
- assert (
175
- completed_responses [0 ][2 ] < completed_responses [1 ][2 ]
176
- ) # higher priority request tried first
80
+ == True
81
+ )
82
+ assert (
83
+ await scheduler .peek (
84
+ id = "10" ,
85
+ model_name = "gpt-3.5-turbo" ,
86
+ health_deployments = [{"key" : "value" }],
87
+ )
88
+ == False
89
+ )
177
90
178
91
179
92
@pytest .mark .parametrize ("p0, p1" , [(0 , 1 ), (0 , 0 )]) #
0 commit comments