Skip to content

Commit 2245ee1

Browse files
test(test_scheduler.py): fix testing
1 parent 7715267 commit 2245ee1

File tree

1 file changed

+43
-130
lines changed

1 file changed

+43
-130
lines changed

litellm/tests/test_scheduler.py

+43-130
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,23 @@ async def test_scheduler_diff_model_names():
2121
"""
2222
scheduler = Scheduler()
2323

24-
router = Router(
25-
model_list=[
26-
{
27-
"model_name": "gpt-3.5-turbo",
28-
"litellm_params": {
29-
"model": "gpt-3.5-turbo",
30-
},
31-
},
32-
{"model_name": "gpt-4", "litellm_params": {"model": "gpt-4"}},
33-
]
34-
)
35-
36-
scheduler.update_variables(llm_router=router)
37-
3824
item1 = FlowItem(priority=0, request_id="10", model_name="gpt-3.5-turbo")
3925
item2 = FlowItem(priority=0, request_id="11", model_name="gpt-4")
4026
await scheduler.add_request(item1)
4127
await scheduler.add_request(item2)
4228

43-
assert await scheduler.poll(id="10", model_name="gpt-3.5-turbo") == True
44-
assert await scheduler.poll(id="11", model_name="gpt-4") == True
29+
assert (
30+
await scheduler.poll(
31+
id="10", model_name="gpt-3.5-turbo", health_deployments=[{"key": "value"}]
32+
)
33+
== True
34+
)
35+
assert (
36+
await scheduler.poll(
37+
id="11", model_name="gpt-4", health_deployments=[{"key": "value"}]
38+
)
39+
== True
40+
)
4541

4642

4743
@pytest.mark.parametrize("p0, p1", [(0, 0), (0, 1), (1, 0)])
@@ -52,128 +48,45 @@ async def test_scheduler_prioritized_requests(p0, p1):
5248
"""
5349
scheduler = Scheduler()
5450

55-
router = Router(
56-
model_list=[
57-
{
58-
"model_name": "gpt-3.5-turbo",
59-
"litellm_params": {
60-
"model": "gpt-3.5-turbo",
61-
},
62-
},
63-
{"model_name": "gpt-4", "litellm_params": {"model": "gpt-4"}},
64-
]
65-
)
66-
67-
scheduler.update_variables(llm_router=router)
68-
6951
item1 = FlowItem(priority=p0, request_id="10", model_name="gpt-3.5-turbo")
7052
item2 = FlowItem(priority=p1, request_id="11", model_name="gpt-3.5-turbo")
7153
await scheduler.add_request(item1)
7254
await scheduler.add_request(item2)
7355

7456
if p0 == 0:
75-
assert await scheduler.peek(id="10", model_name="gpt-3.5-turbo") == True
76-
assert await scheduler.peek(id="11", model_name="gpt-3.5-turbo") == False
57+
assert (
58+
await scheduler.peek(
59+
id="10",
60+
model_name="gpt-3.5-turbo",
61+
health_deployments=[{"key": "value"}],
62+
)
63+
== True
64+
)
65+
assert (
66+
await scheduler.peek(
67+
id="11",
68+
model_name="gpt-3.5-turbo",
69+
health_deployments=[{"key": "value"}],
70+
)
71+
== False
72+
)
7773
else:
78-
assert await scheduler.peek(id="11", model_name="gpt-3.5-turbo") == True
79-
assert await scheduler.peek(id="10", model_name="gpt-3.5-turbo") == False
80-
81-
82-
@pytest.mark.parametrize("p0, p1", [(0, 1), (0, 0)]) #
83-
@pytest.mark.asyncio
84-
async def test_aascheduler_prioritized_requests_mock_response(p0, p1):
85-
"""
86-
2 requests for same model group
87-
88-
if model is at rate limit, ensure the higher priority request gets done first
89-
"""
90-
scheduler = Scheduler()
91-
92-
router = Router(
93-
model_list=[
94-
{
95-
"model_name": "gpt-3.5-turbo",
96-
"litellm_params": {
97-
"model": "gpt-3.5-turbo",
98-
"mock_response": "Hello world this is Macintosh!",
99-
"rpm": 0,
100-
},
101-
},
102-
],
103-
timeout=10,
104-
num_retries=3,
105-
cooldown_time=5,
106-
routing_strategy="usage-based-routing-v2",
107-
)
108-
109-
scheduler.update_variables(llm_router=router)
110-
111-
async def _make_prioritized_call(flow_item: FlowItem):
112-
## POLL QUEUE
113-
default_timeout = router.timeout
114-
end_time = time.time() + default_timeout
115-
poll_interval = 0.03 # poll every 3ms
116-
curr_time = time.time()
117-
118-
make_request = False
119-
120-
if router is None:
121-
raise Exception("No llm router value")
122-
123-
while curr_time < end_time:
124-
make_request = await scheduler.poll(
125-
id=flow_item.request_id, model_name=flow_item.model_name
74+
assert (
75+
await scheduler.peek(
76+
id="11",
77+
model_name="gpt-3.5-turbo",
78+
health_deployments=[{"key": "value"}],
12679
)
127-
print(f"make_request={make_request}, priority={flow_item.priority}")
128-
if make_request: ## IF TRUE -> MAKE REQUEST
129-
break
130-
else: ## ELSE -> loop till default_timeout
131-
await asyncio.sleep(poll_interval)
132-
curr_time = time.time()
133-
134-
if make_request:
135-
try:
136-
_response = await router.acompletion(
137-
model=flow_item.model_name,
138-
messages=[{"role": "user", "content": "Hey!"}],
139-
)
140-
except Exception as e:
141-
print("Received error - {}".format(str(e)))
142-
return flow_item.priority, flow_item.request_id, time.time()
143-
144-
return flow_item.priority, flow_item.request_id, time.time()
145-
146-
raise Exception("didn't make request")
147-
148-
tasks = []
149-
150-
item = FlowItem(
151-
priority=p0, request_id=str(uuid.uuid4()), model_name="gpt-3.5-turbo"
152-
)
153-
await scheduler.add_request(request=item)
154-
tasks.append(_make_prioritized_call(flow_item=item))
155-
156-
item = FlowItem(
157-
priority=p1, request_id=str(uuid.uuid4()), model_name="gpt-3.5-turbo"
158-
)
159-
await scheduler.add_request(request=item)
160-
tasks.append(_make_prioritized_call(flow_item=item))
161-
162-
# Running the tasks and getting responses in order of completion
163-
completed_responses = []
164-
for task in asyncio.as_completed(tasks):
165-
result = await task
166-
completed_responses.append(result)
167-
print(f"Received response: {result}")
168-
169-
print(f"responses: {completed_responses}")
170-
171-
assert (
172-
completed_responses[0][0] == 0
173-
) # assert higher priority request got done first
174-
assert (
175-
completed_responses[0][2] < completed_responses[1][2]
176-
) # higher priority request tried first
80+
== True
81+
)
82+
assert (
83+
await scheduler.peek(
84+
id="10",
85+
model_name="gpt-3.5-turbo",
86+
health_deployments=[{"key": "value"}],
87+
)
88+
== False
89+
)
17790

17891

17992
@pytest.mark.parametrize("p0, p1", [(0, 1), (0, 0)]) #

0 commit comments

Comments
 (0)