You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: extensions/openai/typing.py
+2-1Lines changed: 2 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -103,10 +103,11 @@ class ChatCompletionRequestParams(BaseModel):
103
103
instruction_template_str: str|None=Field(default=None, description="A Jinja2 instruction template. If set, will take precedence over everything else.")
104
104
105
105
character: str|None=Field(default=None, description="A character defined under text-generation-webui/characters. If not set, the default \"Assistant\" character will be used.")
106
-
user_name: str|None=Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1")
107
106
bot_name: str|None=Field(default=None, description="Overwrites the value set by character field.", alias="name2")
108
107
context: str|None=Field(default=None, description="Overwrites the value set by character field.")
109
108
greeting: str|None=Field(default=None, description="Overwrites the value set by character field.")
109
+
user_name: str|None=Field(default=None, description="Your name (the user). By default, it's \"You\".", alias="name1")
110
+
user_bio: str|None=Field(default=None, description="The user description/personality.")
110
111
chat_template_str: str|None=Field(default=None, description="Jinja2 template for chat.")
shared.gradio['user_bio'] =gr.Textbox(value=shared.settings['user_bio'], lines=10, label='Description', info='Here you can optionally write a description of yourself.', placeholder='{{user}}\'s personality: ...', elem_classes=['add_scrollbar'])
shared.gradio['n_gpu_layers'] =gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.')
94
94
shared.gradio['n_ctx'] =gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
95
95
shared.gradio['tensor_split'] =gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17')
shared.gradio['tensorcores'] =gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')
120
120
shared.gradio['streaming_llm'] =gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
121
-
shared.gradio['attention_sink_size'] =gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
121
+
shared.gradio['attention_sink_size'] =gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
122
122
shared.gradio['cpu'] =gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
123
123
shared.gradio['row_split'] =gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
124
124
shared.gradio['no_offload_kqv'] =gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
0 commit comments