Skip to content

Commit 9e25661

Browse files
author
mint
committed
Merge branch 'oobabooga-main' into unified
2 parents 4aba114 + 1c29e09 commit 9e25661

15 files changed

+139
-97
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,8 +506,9 @@ Optionally, you can use the following command-line flags:
506506

507507
| Flag | Description |
508508
|------------------|-------------|
509-
|`--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
510-
|`--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should typically be set to max_seq_len / 2048. |
509+
| `--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
510+
| `--rope_freq_base ROPE_FREQ_BASE` | If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63). |
511+
| `--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale. |
511512

512513
#### Gradio
513514

models/config.yaml

Lines changed: 76 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -49,203 +49,203 @@ llama-65b-gptq-3bit:
4949
.*(gr1024|1024g|groupsize1024):
5050
groupsize: 1024
5151
.*(oasst|openassistant-|stablelm-7b-sft-v7-epoch-3):
52-
mode: 'instruct'
52+
mode: 'chat-instruct'
5353
instruction_template: 'Open Assistant'
5454
skip_special_tokens: false
5555
(?!.*galactica)(?!.*reward).*openassistant:
56-
mode: 'instruct'
56+
mode: 'chat-instruct'
5757
instruction_template: 'Open Assistant'
5858
skip_special_tokens: false
5959
(?!.*v0)(?!.*1.1)(?!.*1_1)(?!.*stable)(?!.*chinese).*vicuna:
60-
mode: 'instruct'
60+
mode: 'chat-instruct'
6161
instruction_template: 'Vicuna-v0'
6262
.*vicuna.*v0:
63-
mode: 'instruct'
63+
mode: 'chat-instruct'
6464
instruction_template: 'Vicuna-v0'
6565
.*vicuna.*(1.1|1_1|1.3|1_3):
66-
mode: 'instruct'
66+
mode: 'chat-instruct'
6767
instruction_template: 'Vicuna-v1.1'
6868
.*vicuna.*(1.5|1_5):
69-
mode: 'instruct'
69+
mode: 'chat-instruct'
7070
instruction_template: 'Vicuna-v1.1'
7171
truncation_length: 4096
7272
rms_norm_eps: 5.0e-6
7373
.*stable.*vicuna:
74-
mode: 'instruct'
74+
mode: 'chat-instruct'
7575
instruction_template: 'StableVicuna'
7676
(?!.*chat).*chinese-vicuna:
77-
mode: 'instruct'
77+
mode: 'chat-instruct'
7878
instruction_template: 'Alpaca'
7979
.*chinese-vicuna.*chat:
80-
mode: 'instruct'
80+
mode: 'chat-instruct'
8181
instruction_template: 'Chinese-Vicuna-Chat'
8282
.*alpaca:
83-
mode: 'instruct'
83+
mode: 'chat-instruct'
8484
instruction_template: 'Alpaca'
8585
.*alpaca-native-4bit:
86-
mode: 'instruct'
86+
mode: 'chat-instruct'
8787
instruction_template: 'Alpaca'
8888
wbits: 4
8989
groupsize: 128
9090
.*galactica:
9191
skip_special_tokens: false
9292
.*dolly-v[0-9]-[0-9]*b:
93-
mode: 'instruct'
93+
mode: 'chat-instruct'
9494
instruction_template: 'Alpaca'
9595
skip_special_tokens: false
9696
custom_stopping_strings: '"### End"'
9797
.*koala:
98-
mode: 'instruct'
98+
mode: 'chat-instruct'
9999
instruction_template: 'Koala'
100100
.*chatglm:
101-
mode: 'instruct'
101+
mode: 'chat-instruct'
102102
instruction_template: 'ChatGLM'
103103
.*metharme:
104-
mode: 'instruct'
104+
mode: 'chat-instruct'
105105
instruction_template: 'Metharme'
106106
.*llava:
107-
mode: 'instruct'
107+
mode: 'chat-instruct'
108108
model_type: 'llama'
109109
instruction_template: 'LLaVA'
110110
custom_stopping_strings: '"\n###"'
111111
.*$:
112112
mode: 'chat'
113113
instruction_template: 'None'
114114
.*raven:
115-
mode: 'instruct'
115+
mode: 'chat-instruct'
116116
instruction_template: 'RWKV-Raven'
117117
.*ctx8192:
118118
truncation_length: 8192
119119
.*moss-moon.*sft:
120-
mode: 'instruct'
120+
mode: 'chat-instruct'
121121
instruction_template: 'MOSS'
122122
.*stablelm-tuned:
123-
mode: 'instruct'
123+
mode: 'chat-instruct'
124124
instruction_template: 'StableLM'
125125
truncation_length: 4096
126126
.*stablelm-base:
127127
truncation_length: 4096
128128
.*galactica.*finetuned:
129-
mode: 'instruct'
129+
mode: 'chat-instruct'
130130
instruction_template: 'Galactica Finetuned'
131131
.*galactica.*-v2:
132-
mode: 'instruct'
132+
mode: 'chat-instruct'
133133
instruction_template: 'Galactica v2'
134134
(?!.*finetuned)(?!.*-v2).*galactica:
135-
mode: 'instruct'
135+
mode: 'chat-instruct'
136136
instruction_template: 'Galactica'
137137
.*guanaco:
138-
mode: 'instruct'
138+
mode: 'chat-instruct'
139139
instruction_template: 'Guanaco non-chat'
140140
.*baize:
141-
mode: 'instruct'
141+
mode: 'chat-instruct'
142142
instruction_template: 'Baize'
143143
.*mpt-.*instruct:
144-
mode: 'instruct'
144+
mode: 'chat-instruct'
145145
instruction_template: 'Alpaca'
146146
.*mpt-.*chat:
147-
mode: 'instruct'
147+
mode: 'chat-instruct'
148148
instruction_template: 'MPT-Chat'
149149
(?!.*-flan-)(?!.*-t5-).*lamini-:
150-
mode: 'instruct'
150+
mode: 'chat-instruct'
151151
instruction_template: 'Alpaca'
152152
.*incite.*chat:
153-
mode: 'instruct'
153+
mode: 'chat-instruct'
154154
instruction_template: 'INCITE-Chat'
155155
.*incite.*instruct:
156-
mode: 'instruct'
156+
mode: 'chat-instruct'
157157
instruction_template: 'INCITE-Instruct'
158158
.*wizard.*mega:
159-
mode: 'instruct'
159+
mode: 'chat-instruct'
160160
instruction_template: 'Wizard-Mega'
161161
custom_stopping_strings: '"</s>"'
162162
.*ziya-:
163-
mode: 'instruct'
163+
mode: 'chat-instruct'
164164
instruction_template: 'Ziya'
165165
.*koalpaca:
166-
mode: 'instruct'
166+
mode: 'chat-instruct'
167167
instruction_template: 'KoAlpaca'
168168
.*openbuddy:
169-
mode: 'instruct'
169+
mode: 'chat-instruct'
170170
instruction_template: 'OpenBuddy'
171171
(?!.*chat).*vigogne:
172-
mode: 'instruct'
172+
mode: 'chat-instruct'
173173
instruction_template: 'Vigogne-Instruct'
174174
.*vigogne.*chat:
175-
mode: 'instruct'
175+
mode: 'chat-instruct'
176176
instruction_template: 'Vigogne-Chat'
177177
.*(llama-deus|supercot|llama-natural-instructions|open-llama-0.3t-7b-instruct-dolly-hhrlhf|open-llama-0.3t-7b-open-instruct):
178-
mode: 'instruct'
178+
mode: 'chat-instruct'
179179
instruction_template: 'Alpaca'
180180
.*bactrian:
181-
mode: 'instruct'
181+
mode: 'chat-instruct'
182182
instruction_template: 'Bactrian'
183183
.*(h2ogpt-oig-|h2ogpt-oasst1-|h2ogpt-research-oasst1-):
184-
mode: 'instruct'
184+
mode: 'chat-instruct'
185185
instruction_template: 'H2O-human_bot'
186186
.*h2ogpt-gm-:
187-
mode: 'instruct'
187+
mode: 'chat-instruct'
188188
instruction_template: 'H2O-prompt_answer'
189189
.*manticore:
190-
mode: 'instruct'
190+
mode: 'chat-instruct'
191191
instruction_template: 'Manticore Chat'
192192
.*bluemoonrp-(30|13)b:
193-
mode: 'instruct'
193+
mode: 'chat-instruct'
194194
instruction_template: 'Bluemoon'
195195
truncation_length: 4096
196196
.*Nous-Hermes-13b:
197-
mode: 'instruct'
197+
mode: 'chat-instruct'
198198
instruction_template: 'Alpaca'
199199
.*airoboros:
200-
mode: 'instruct'
200+
mode: 'chat-instruct'
201201
instruction_template: 'Vicuna-v1.1'
202202
.*airoboros.*1.2:
203-
mode: 'instruct'
203+
mode: 'chat-instruct'
204204
instruction_template: 'Airoboros-v1.2'
205205
.*alpa(cino|sta):
206-
mode: 'instruct'
206+
mode: 'chat-instruct'
207207
instruction_template: 'Alpaca'
208208
.*hippogriff:
209-
mode: 'instruct'
209+
mode: 'chat-instruct'
210210
instruction_template: 'Hippogriff'
211211
.*lazarus:
212-
mode: 'instruct'
212+
mode: 'chat-instruct'
213213
instruction_template: 'Alpaca'
214214
.*guanaco-.*(7|13|33|65)b:
215-
mode: 'instruct'
215+
mode: 'chat-instruct'
216216
instruction_template: 'Guanaco'
217217
.*hypermantis:
218-
mode: 'instruct'
218+
mode: 'chat-instruct'
219219
instruction_template: 'Alpaca'
220220
.*open-llama-.*-open-instruct:
221-
mode: 'instruct'
221+
mode: 'chat-instruct'
222222
instruction_template: 'Alpaca'
223223
.*starcoder-gpteacher-code-instruct:
224-
mode: 'instruct'
224+
mode: 'chat-instruct'
225225
instruction_template: 'Alpaca'
226226
.*tulu:
227-
mode: 'instruct'
227+
mode: 'chat-instruct'
228228
instruction_template: 'Tulu'
229229
.*chronos:
230-
mode: 'instruct'
230+
mode: 'chat-instruct'
231231
instruction_template: 'Alpaca'
232232
.*samantha:
233-
mode: 'instruct'
233+
mode: 'chat-instruct'
234234
instruction_template: 'Samantha'
235235
.*wizardcoder:
236-
mode: 'instruct'
236+
mode: 'chat-instruct'
237237
instruction_template: 'Alpaca'
238238
.*starchat-beta:
239-
mode: 'instruct'
239+
mode: 'chat-instruct'
240240
instruction_template: 'Starchat-Beta'
241241
custom_stopping_strings: '"<|end|>"'
242242
.*minotaur:
243-
mode: 'instruct'
243+
mode: 'chat-instruct'
244244
instruction_template: 'Minotaur'
245245
.*minotaur-15b:
246246
truncation_length: 8192
247247
.*orca_mini:
248-
mode: 'instruct'
248+
mode: 'chat-instruct'
249249
instruction_template: 'Orca Mini'
250250
.*landmark:
251251
truncation_length: 8192
@@ -255,51 +255,56 @@ llama-65b-gptq-3bit:
255255
truncation_length: 8192
256256
instruction_template: 'Vicuna-v0'
257257
.*(platypus|gplatty|superplatty):
258-
mode: 'instruct'
258+
mode: 'chat-instruct'
259259
instruction_template: 'Alpaca'
260260
.*longchat:
261-
mode: 'instruct'
261+
mode: 'chat-instruct'
262262
instruction_template: 'Vicuna-v1.1'
263263
.*vicuna-33b:
264-
mode: 'instruct'
264+
mode: 'chat-instruct'
265265
instruction_template: 'Vicuna-v1.1'
266266
.*redmond-hermes-coder:
267-
mode: 'instruct'
267+
mode: 'chat-instruct'
268268
instruction_template: 'Alpaca'
269269
truncation_length: 8192
270270
.*wizardcoder-15b:
271-
mode: 'instruct'
271+
mode: 'chat-instruct'
272272
instruction_template: 'Alpaca'
273273
truncation_length: 8192
274274
.*wizardlm:
275-
mode: 'instruct'
275+
mode: 'chat-instruct'
276276
instruction_template: 'Vicuna-v1.1'
277277
.*godzilla:
278-
mode: 'instruct'
278+
mode: 'chat-instruct'
279279
instruction_template: 'Alpaca'
280280
.*llama-(2|v2):
281281
truncation_length: 4096
282282
rms_norm_eps: 5.0e-6
283283
.*llama-(2|v2).*chat:
284-
mode: 'instruct'
284+
mode: 'chat-instruct'
285285
instruction_template: 'Llama-v2'
286286
.*70b.*ggml.*\.bin:
287287
n_gqa: 8
288288
.*newhope:
289-
mode: 'instruct'
289+
mode: 'chat-instruct'
290290
instruction_template: 'NewHope'
291291
.*stablebeluga2:
292-
mode: 'instruct'
292+
mode: 'chat-instruct'
293293
instruction_template: 'StableBeluga2'
294294
truncation_length: 4096
295295
rms_norm_eps: 5.0e-6
296296
.*openchat:
297-
mode: 'instruct'
297+
mode: 'chat-instruct'
298298
instruction_template: 'OpenChat'
299299
.*falcon.*-instruct:
300-
mode: 'instruct'
300+
mode: 'chat-instruct'
301301
.*(openorca-platypus2):
302-
mode: 'instruct'
302+
mode: 'chat-instruct'
303303
instruction_template: 'OpenOrca-Platypus2'
304304
custom_stopping_strings: '"### Instruction:", "### Response:"'
305305
rms_norm_eps: 5.0e-6
306+
.*codellama:
307+
rope_freq_base: 1000000
308+
.*codellama.*instruct:
309+
mode: 'chat-instruct'
310+
instruction_template: 'Llama-v2'

modules/RoPE.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
def get_alpha_value(alpha, base):
2+
'''
3+
Gets alpha_value from alpha_value and rope_freq_base
4+
'''
5+
if base > 0:
6+
return (base/10000.) ** (63/64.)
7+
else:
8+
return alpha
9+
10+
11+
def get_rope_freq_base(alpha, base):
12+
'''
13+
Gets rope_freq_base from alpha_value and rope_freq_base
14+
'''
15+
if base > 0:
16+
return base
17+
else:
18+
return 10000 * alpha ** (64/63.)

modules/ctransformers_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def from_pretrained(self, path):
1515

1616
config = AutoConfig.from_pretrained(
1717
str(path),
18-
threads=shared.args.threads,
18+
threads=shared.args.threads if shared.args.threads != 0 else -1,
1919
gpu_layers=shared.args.n_gpu_layers,
2020
batch_size=shared.args.n_batch,
2121
context_length=shared.args.n_ctx,

modules/exllama.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import torch.nn.functional as F
44
from torch import version as torch_version
55

6-
from modules import shared
6+
from modules import RoPE, shared
77
from modules.logging_colors import logger
88
from modules.models import clear_torch_cache
99
from modules.text_generation import get_max_prompt_length
@@ -71,8 +71,8 @@ def from_pretrained(self, path_to_model):
7171
config.set_auto_map(shared.args.gpu_split)
7272
config.gpu_peer_fix = False
7373

74-
if shared.args.alpha_value:
75-
config.alpha_value = shared.args.alpha_value
74+
if shared.args.alpha_value > 1 or shared.args.rope_freq_base > 0:
75+
config.alpha_value = RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base)
7676
config.calculate_rotary_embedding_base()
7777

7878
model = ExLlama(config)

0 commit comments

Comments
 (0)