Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit cd8938a

Browse files
fix: add local e2e test (#1965)
Co-authored-by: sangjanai <[email protected]>
1 parent 19eb7ea commit cd8938a

File tree

1 file changed

+284
-0
lines changed

1 file changed

+284
-0
lines changed

engine/e2e-test/local_test.py

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
import requests
2+
import threading
3+
import time
4+
from concurrent.futures import ThreadPoolExecutor, as_completed
5+
import json
6+
import platform
7+
import asyncio
8+
import argparse
9+
import websockets
10+
11+
# Define a list of request configurations
12+
model_id = "tinyllama:gguf"
13+
14+
15+
def make_request(config):
16+
try:
17+
method = config["method"].lower()
18+
url = config["url"]
19+
headers = config.get("headers", {})
20+
data = json.dumps(config.get("data")) if "data" in config else None
21+
22+
response = requests.request(method, url, headers=headers, data=data)
23+
return response.status_code, response.text
24+
except requests.RequestException as e:
25+
return None, str(e)
26+
27+
28+
def get_setup_configs(host_port):
29+
return [
30+
{
31+
"method": "PATCH",
32+
"url": "http://" + host_port + "/v1/configs",
33+
"headers": {"Content-Type": "application/json"},
34+
"data": {
35+
"cors": True,
36+
"allowed_origins": [
37+
"http://localhost:39281",
38+
"http://127.0.0.1:39281",
39+
"http://0.0.0.0:39281",
40+
],
41+
"proxy_username": "",
42+
"proxy_password": "",
43+
"proxy_url": "",
44+
"verify_proxy_ssl": False,
45+
"verify_proxy_host_ssl": False,
46+
"verify_peer_ssl": False,
47+
"verify_host_ssl": False,
48+
"no_proxy": "localhost",
49+
"huggingface_token": "",
50+
},
51+
},
52+
{
53+
"method": "POST",
54+
"url": "http://" + host_port + "/v1/engines/llama-cpp/install",
55+
"headers": {"Accept": "application/json"},
56+
},
57+
{
58+
"method": "GET",
59+
"url": "http://" + host_port + "/v1/engines/llama-cpp",
60+
"headers": {"Accept": "application/json"},
61+
},
62+
{
63+
"method": "GET",
64+
"url": "http://" + host_port + "/v1/engines/llama-cpp/releases",
65+
"headers": {"Accept": "application/json"},
66+
},
67+
{
68+
"method": "GET",
69+
"url": "http://" + host_port + "/v1/engines/llama-cpp/releases/latest",
70+
"headers": {"Accept": "application/json"},
71+
},
72+
{
73+
"method": "GET",
74+
"url": "http://" + host_port + "/v1/engines/llama-cpp/default",
75+
"headers": {"Accept": "application/json"},
76+
},
77+
{
78+
"method": "POST",
79+
"url": "http://" + host_port + "/v1/models/pull",
80+
"headers": {"Content-Type": "application/json"},
81+
"data": {"model": "tinyllama:gguf"},
82+
},
83+
{
84+
"method": "POST",
85+
"url": "http://" + host_port + "/v1/engines/llama-cpp/load",
86+
"headers": {"Content-Type": "application/json"},
87+
},
88+
{
89+
"method": "POST",
90+
"url": "http://" + host_port + "/v1/models/start",
91+
"headers": {"Content-Type": "application/json"},
92+
"data": {"model": "tinyllama:gguf"},
93+
},
94+
{
95+
"method": "POST",
96+
"url": "http://" + host_port + "/v1/chat/completions",
97+
"headers": {"Content-Type": "application/json"},
98+
"data": {
99+
"model": "tinyllama:gguf",
100+
"stream": True,
101+
"messages": [{"content": "How are you today?", "role": "user"}],
102+
"max_tokens": 256,
103+
},
104+
},
105+
]
106+
107+
108+
def get_teardown_configs(host_port):
109+
return [
110+
{
111+
"method": "POST",
112+
"url": "http://" + host_port + "/v1/models/stop",
113+
"headers": {"Content-Type": "application/json"},
114+
"data": {"model": "tinyllama:gguf"},
115+
},
116+
{
117+
"method": "DELETE",
118+
"url": "http://" + host_port + "/v1/engines/llama-cpp/load",
119+
"headers": {"Content-Type": "application/json"},
120+
},
121+
{
122+
"method": "DELETE",
123+
"url": "http://" + host_port + "/v1/engines/llama-cpp/install",
124+
"headers": {"Accept": "application/json"},
125+
},
126+
{
127+
"method": "DELETE",
128+
"url": "http://" + host_port + "/v1/models/" + model_id,
129+
"headers": {"Accept": "application/json"},
130+
},
131+
]
132+
133+
134+
async def setup_env(host_port):
135+
for config in get_setup_configs(host_port):
136+
status_code, response_text = make_request(config)
137+
if config["method"] == "POST" and (
138+
"/v1/engines/install" in config["url"] or "/v1/models/pull" in config["url"]
139+
):
140+
await wait_for_websocket_download_success_event(timeout=None)
141+
142+
if status_code:
143+
print(
144+
f"setup_env: {config['url']} Status Code {status_code} - Response {response_text}"
145+
)
146+
else:
147+
print(f"setup_env: {config['url']} Error - {response_text}")
148+
149+
150+
def teardown(host_port):
151+
for config in get_teardown_configs(host_port):
152+
status_code, response_text = make_request(config)
153+
154+
if status_code:
155+
print(f"teardown: {config['url']} Status Code {status_code}")
156+
else:
157+
print(f"teardown: {config['url']} Error - {response_text}")
158+
159+
160+
def get_request_configs(host_port):
161+
return [
162+
{
163+
"method": "GET",
164+
"url": "http://" + host_port + "/v1/configs",
165+
"headers": {"Accept": "application/json"},
166+
},
167+
{
168+
"method": "GET",
169+
"url": "http://" + host_port + "/v1/models",
170+
"headers": {"Accept": "application/json"},
171+
},
172+
{
173+
"method": "GET",
174+
"url": "http://" + host_port + "/v1/hardware",
175+
"headers": {"Accept": "application/json"},
176+
},
177+
{
178+
"method": "GET",
179+
"url": "http://" + host_port + "/healthz",
180+
"headers": {"Accept": "application/json"},
181+
},
182+
{
183+
"method": "GET",
184+
"url": "http://" + host_port + "/v1/threads",
185+
"headers": {"Accept": "application/json"},
186+
},
187+
{
188+
"method": "GET",
189+
"url": "http://" + host_port + "/v1/threads",
190+
"headers": {"Accept": "application/json"},
191+
},
192+
]
193+
194+
195+
def worker(host_port, thread_id, num_requests):
196+
request_configs = get_request_configs(host_port)
197+
for i in range(num_requests):
198+
config = request_configs[i % len(request_configs)]
199+
status_code, response_text = make_request(config)
200+
if status_code:
201+
print(
202+
f"Thread {thread_id}, Request {i+1}: {config['method']} {config['url']} - Status Code {status_code}"
203+
)
204+
else:
205+
print(
206+
f"Thread {thread_id}, Request {i+1}: {config['method']} {config['url']} - Error - {response_text}"
207+
)
208+
209+
210+
async def wait_for_websocket_download_success_event(timeout: float = 30):
211+
if platform.system() == "Windows":
212+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
213+
async with websockets.connect("ws://" + host_port + "/events") as websocket:
214+
try:
215+
# Using wait_for instead of timeout context manager
216+
async def receive_until_success():
217+
while True:
218+
message = await websocket.recv()
219+
try:
220+
event = json.loads(message)
221+
if event.get("type") == "DownloadSuccess":
222+
return event
223+
except json.JSONDecodeError:
224+
continue
225+
226+
return await asyncio.wait_for(receive_until_success(), timeout)
227+
228+
except asyncio.TimeoutError:
229+
raise TimeoutError("Timeout waiting for DownloadSuccess event")
230+
231+
232+
def run_test(host_port, num_threads, requests_per_thread):
233+
start_time = time.time()
234+
235+
with ThreadPoolExecutor(max_workers=num_threads) as executor:
236+
futures = [
237+
executor.submit(worker, host_port, i, requests_per_thread)
238+
for i in range(num_threads)
239+
]
240+
241+
for future in as_completed(futures):
242+
future.result()
243+
244+
end_time = time.time()
245+
total_requests = num_threads * requests_per_thread
246+
total_time = end_time - start_time
247+
248+
print(f"\nTest completed:")
249+
print(f"Total threads: {num_threads}")
250+
print(f"Requests per thread: {requests_per_thread}")
251+
print(f"Total requests: {total_requests}")
252+
print(f"Total time: {total_time:.2f} seconds")
253+
print(f"Requests per second: {total_requests / total_time:.2f}")
254+
255+
256+
def parse_argument():
257+
parser = argparse.ArgumentParser(description="Local test")
258+
parser.add_argument("--host", type=str, default="127.0.0.1", help="Server host")
259+
parser.add_argument("--port", type=int, help="Server port", required=True)
260+
parser.add_argument(
261+
"--num_threads", type=str, default=5, help="Number of threads to send requests"
262+
)
263+
parser.add_argument(
264+
"--requests_per_thread",
265+
type=str,
266+
default=10,
267+
help="Number of requests per thread",
268+
)
269+
args = parser.parse_args()
270+
return args
271+
272+
273+
if __name__ == "__main__":
274+
args = parse_argument()
275+
host_port = args.host + ":" + str(args.port)
276+
print("Server start: " + host_port)
277+
teardown(host_port)
278+
279+
loop = asyncio.get_event_loop()
280+
loop.run_until_complete(setup_env(host_port))
281+
282+
run_test(host_port, args.num_threads, args.requests_per_thread)
283+
284+
teardown(host_port)

0 commit comments

Comments
 (0)