Skip to content

Commit 347be64

Browse files
authored
fix(llm): special tokens and leading space (#1831)
1 parent 08c4ab1 commit 347be64

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

private_gpt/components/llm/custom/sagemaker.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,12 +243,19 @@ def get_stream():
243243
event_stream = resp["Body"]
244244
start_json = b"{"
245245
stop_token = "<|endoftext|>"
246+
first_token = True
246247

247248
for line in LineIterator(event_stream):
248249
if line != b"" and start_json in line:
249250
data = json.loads(line[line.find(start_json) :].decode("utf-8"))
250-
if data["token"]["text"] != stop_token:
251+
special = data["token"]["special"]
252+
stop = data["token"]["text"] == stop_token
253+
if not special and not stop:
251254
delta = data["token"]["text"]
255+
# trim the leading space for the first token if present
256+
if first_token:
257+
delta = delta.lstrip()
258+
first_token = False
252259
text += delta
253260
yield CompletionResponse(delta=delta, text=text, raw=data)
254261

0 commit comments

Comments
 (0)