14
14
15
15
import asyncio
16
16
import uuid
17
- from typing import List , Optional , Tuple
17
+ from typing import Iterator , List , Optional , Tuple , Union
18
18
19
+ import requests
19
20
import xoscar as xo
20
21
21
22
from .core .model import ModelActor
22
23
from .core .service import SupervisorActor
23
24
from .isolation import Isolation
24
25
from .model import ModelSpec
26
+ from .model .llm .types import (
27
+ ChatCompletion ,
28
+ ChatCompletionChunk ,
29
+ ChatCompletionMessage ,
30
+ Completion ,
31
+ CompletionChunk ,
32
+ )
25
33
26
34
27
35
class Client :
@@ -44,7 +52,7 @@ def launch_model(
44
52
model_size_in_billions : Optional [int ] = None ,
45
53
model_format : Optional [str ] = None ,
46
54
quantization : Optional [str ] = None ,
47
- ** kwargs
55
+ ** kwargs ,
48
56
) -> str :
49
57
model_uid = self .gen_model_uid ()
50
58
@@ -54,7 +62,7 @@ def launch_model(
54
62
model_size_in_billions = model_size_in_billions ,
55
63
model_format = model_format ,
56
64
quantization = quantization ,
57
- ** kwargs
65
+ ** kwargs ,
58
66
)
59
67
self ._isolation .call (coro )
60
68
@@ -73,6 +81,92 @@ def get_model(self, model_uid: str) -> xo.ActorRefType["ModelActor"]:
73
81
return self ._isolation .call (coro )
74
82
75
83
84
+ class RESTfulClient :
85
+ def __init__ (self , base_url ):
86
+ self .base_url = base_url
87
+
88
+ @classmethod
89
+ def gen_model_uid (cls ) -> str :
90
+ # generate a time-based uuid.
91
+ return str (uuid .uuid1 ())
92
+
93
+ def list_models (self ) -> List [str ]:
94
+ url = f"{ self .base_url } /v1/models"
95
+
96
+ response = requests .get (url )
97
+ response_data = response .json ()
98
+ return response_data
99
+
100
+ def launch_model (
101
+ self ,
102
+ model_name : str ,
103
+ model_size_in_billions : Optional [int ] = None ,
104
+ model_format : Optional [str ] = None ,
105
+ quantization : Optional [str ] = None ,
106
+ ** kwargs ,
107
+ ) -> str :
108
+ url = f"{ self .base_url } /v1/models"
109
+
110
+ model_uid = self .gen_model_uid ()
111
+ payload = {
112
+ "model_uid" : model_uid ,
113
+ "model_name" : model_name ,
114
+ "model_size_in_billions" : model_size_in_billions ,
115
+ "model_format" : model_format ,
116
+ "quantization" : quantization ,
117
+ "kwargs" : kwargs ,
118
+ }
119
+ response = requests .post (url , json = payload )
120
+ response_data = response .json ()
121
+ model_uid = response_data ["model_uid" ]
122
+ return model_uid
123
+
124
+ def terminate_model (self , model_uid : str ):
125
+ url = f"{ self .base_url } /v1/models/{ model_uid } "
126
+
127
+ response = requests .delete (url )
128
+ if response .status_code != 200 :
129
+ raise Exception (f"Error terminating the model." )
130
+
131
+ def generate (
132
+ self , model_uid : str , prompt : str , ** kwargs
133
+ ) -> Union [Completion , Iterator [CompletionChunk ]]:
134
+ url = f"{ self .base_url } /v1/completions"
135
+
136
+ request_body = {"model" : model_uid , "prompt" : prompt , ** kwargs }
137
+ response = requests .post (url , json = request_body )
138
+ response_data = response .json ()
139
+ return response_data
140
+
141
+ def chat (
142
+ self ,
143
+ model_uid : str ,
144
+ prompt : str ,
145
+ system_prompt : Optional [str ] = None ,
146
+ chat_history : Optional [List [ChatCompletionMessage ]] = None ,
147
+ ** kwargs ,
148
+ ) -> Union [ChatCompletion , Iterator [ChatCompletionChunk ]]:
149
+ url = f"{ self .base_url } /v1/chat/completions"
150
+
151
+ if chat_history is None :
152
+ chat_history = []
153
+
154
+ if chat_history and chat_history [0 ]["role" ] == "system" :
155
+ if system_prompt is not None :
156
+ chat_history [0 ]["content" ] = system_prompt
157
+ else :
158
+ if system_prompt is not None :
159
+ chat_history .insert (
160
+ 0 , ChatCompletionMessage (role = "system" , content = system_prompt )
161
+ )
162
+
163
+ chat_history .append (ChatCompletionMessage (role = "user" , content = prompt ))
164
+ request_body = {"model" : model_uid , "messages" : chat_history , ** kwargs }
165
+ response = requests .post (url , json = request_body )
166
+ response_data = response .json ()
167
+ return response_data
168
+
169
+
76
170
class AsyncClient :
77
171
def __init__ (self , supervisor_address : str ):
78
172
self ._supervisor_address = supervisor_address
@@ -96,7 +190,7 @@ async def launch_model(
96
190
model_size_in_billions : Optional [int ] = None ,
97
191
model_format : Optional [str ] = None ,
98
192
quantization : Optional [str ] = None ,
99
- ** kwargs
193
+ ** kwargs ,
100
194
) -> str :
101
195
model_uid = self .gen_model_uid ()
102
196
@@ -107,7 +201,7 @@ async def launch_model(
107
201
model_size_in_billions = model_size_in_billions ,
108
202
model_format = model_format ,
109
203
quantization = quantization ,
110
- ** kwargs
204
+ ** kwargs ,
111
205
)
112
206
return model_uid
113
207
0 commit comments