Ph0rk0z
diff --git a/‎README.md
Lines changed: 3 additions & 2 deletions b/‎README.md
Lines changed: 3 additions & 2 deletions
diff --git a/‎models/config.yaml
Lines changed: 76 additions & 71 deletions b/‎models/config.yaml
Lines changed: 76 additions & 71 deletions
diff --git a/‎modules/RoPE.py
Lines changed: 18 additions & 0 deletions b/‎modules/RoPE.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎modules/ctransformers_model.py
Lines changed: 1 addition & 1 deletion b/‎modules/ctransformers_model.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎modules/exllama.py
Lines changed: 3 additions & 3 deletions b/‎modules/exllama.py
Lines changed: 3 additions & 3 deletions
@@ -506,8 +506,9 @@ Optionally, you can use the following command-line flags:
 
 | Flag             | Description |
 |------------------|-------------|
-|`--alpha_value ALPHA_VALUE`           | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
-|`--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should typically be set to max_seq_len / 2048. |
+| `--alpha_value ALPHA_VALUE`           | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
+| `--rope_freq_base ROPE_FREQ_BASE`     | If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63). |
+| `--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale. |
 
 #### Gradio
 
 
@@ -49,203 +49,203 @@ llama-65b-gptq-3bit:
 .*(gr1024|1024g|groupsize1024):
   groupsize: 1024
 .*(oasst|openassistant-|stablelm-7b-sft-v7-epoch-3):
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Open Assistant'
   skip_special_tokens: false
 (?!.*galactica)(?!.*reward).*openassistant:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Open Assistant'
   skip_special_tokens: false
 (?!.*v0)(?!.*1.1)(?!.*1_1)(?!.*stable)(?!.*chinese).*vicuna:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vicuna-v0'
 .*vicuna.*v0:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vicuna-v0'
 .*vicuna.*(1.1|1_1|1.3|1_3):
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vicuna-v1.1'
 .*vicuna.*(1.5|1_5):
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vicuna-v1.1'
   truncation_length: 4096
   rms_norm_eps: 5.0e-6  
 .*stable.*vicuna:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'StableVicuna'
 (?!.*chat).*chinese-vicuna:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*chinese-vicuna.*chat:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Chinese-Vicuna-Chat'
 .*alpaca:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*alpaca-native-4bit:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
   wbits: 4
   groupsize: 128
 .*galactica:
   skip_special_tokens: false
 .*dolly-v[0-9]-[0-9]*b:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
   skip_special_tokens: false
   custom_stopping_strings: '"### End"'
 .*koala:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Koala'
 .*chatglm:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'ChatGLM'
 .*metharme:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Metharme'
 .*llava:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   model_type: 'llama'
   instruction_template: 'LLaVA'
   custom_stopping_strings: '"\n###"'
 .*$:
   mode: 'chat'
   instruction_template: 'None'
 .*raven:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'RWKV-Raven'
 .*ctx8192:
   truncation_length: 8192
 .*moss-moon.*sft:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'MOSS'
 .*stablelm-tuned:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'StableLM'
   truncation_length: 4096
 .*stablelm-base:
   truncation_length: 4096
 .*galactica.*finetuned:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Galactica Finetuned'
 .*galactica.*-v2:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Galactica v2'
 (?!.*finetuned)(?!.*-v2).*galactica:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Galactica'
 .*guanaco:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Guanaco non-chat'
 .*baize:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Baize'
 .*mpt-.*instruct:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*mpt-.*chat:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'MPT-Chat'
 (?!.*-flan-)(?!.*-t5-).*lamini-:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*incite.*chat:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'INCITE-Chat'
 .*incite.*instruct:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'INCITE-Instruct'
 .*wizard.*mega:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Wizard-Mega'
   custom_stopping_strings: '"</s>"'
 .*ziya-:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Ziya'
 .*koalpaca:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'KoAlpaca'
 .*openbuddy:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'OpenBuddy'
 (?!.*chat).*vigogne:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vigogne-Instruct'
 .*vigogne.*chat:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vigogne-Chat'
 .*(llama-deus|supercot|llama-natural-instructions|open-llama-0.3t-7b-instruct-dolly-hhrlhf|open-llama-0.3t-7b-open-instruct):
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*bactrian:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Bactrian'
 .*(h2ogpt-oig-|h2ogpt-oasst1-|h2ogpt-research-oasst1-):
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'H2O-human_bot'
 .*h2ogpt-gm-:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'H2O-prompt_answer'
 .*manticore:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Manticore Chat'
 .*bluemoonrp-(30|13)b:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Bluemoon'
   truncation_length: 4096
 .*Nous-Hermes-13b:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*airoboros:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vicuna-v1.1'
 .*airoboros.*1.2:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Airoboros-v1.2'
 .*alpa(cino|sta):
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*hippogriff:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Hippogriff'
 .*lazarus:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*guanaco-.*(7|13|33|65)b:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Guanaco'
 .*hypermantis:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*open-llama-.*-open-instruct:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*starcoder-gpteacher-code-instruct:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*tulu:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Tulu'
 .*chronos:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*samantha:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Samantha'
 .*wizardcoder:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*starchat-beta:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Starchat-Beta'
   custom_stopping_strings: '"<|end|>"'
 .*minotaur:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Minotaur'
 .*minotaur-15b:
   truncation_length: 8192
 .*orca_mini:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Orca Mini'
 .*landmark:
   truncation_length: 8192
@@ -255,51 +255,56 @@ llama-65b-gptq-3bit:
   truncation_length: 8192
   instruction_template: 'Vicuna-v0'
 .*(platypus|gplatty|superplatty):
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*longchat:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vicuna-v1.1'
 .*vicuna-33b:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vicuna-v1.1'
 .*redmond-hermes-coder:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
   truncation_length: 8192
 .*wizardcoder-15b:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
   truncation_length: 8192
 .*wizardlm:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Vicuna-v1.1'
 .*godzilla:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Alpaca'
 .*llama-(2|v2):
   truncation_length: 4096
   rms_norm_eps: 5.0e-6
 .*llama-(2|v2).*chat:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'Llama-v2'
 .*70b.*ggml.*\.bin:
   n_gqa: 8
 .*newhope:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'NewHope'
 .*stablebeluga2:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'StableBeluga2'
   truncation_length: 4096
   rms_norm_eps: 5.0e-6
 .*openchat:
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'OpenChat'
 .*falcon.*-instruct:
-  mode: 'instruct'
+  mode: 'chat-instruct'
 .*(openorca-platypus2):
-  mode: 'instruct'
+  mode: 'chat-instruct'
   instruction_template: 'OpenOrca-Platypus2'
   custom_stopping_strings: '"### Instruction:", "### Response:"'
   rms_norm_eps: 5.0e-6
+.*codellama:
+  rope_freq_base: 1000000
+.*codellama.*instruct:
+  mode: 'chat-instruct'
+  instruction_template: 'Llama-v2'
@@ -0,0 +1,18 @@
+def get_alpha_value(alpha, base):
+    '''
+    Gets alpha_value from alpha_value and rope_freq_base
+    '''
+    if base > 0:
+        return (base/10000.) ** (63/64.)
+    else:
+        return alpha
+
+
+def get_rope_freq_base(alpha, base):
+    '''
+    Gets rope_freq_base from alpha_value and rope_freq_base
+    '''
+    if base > 0:
+        return base
+    else:
+        return 10000 * alpha ** (64/63.)
@@ -15,7 +15,7 @@ def from_pretrained(self, path):
 
         config = AutoConfig.from_pretrained(
             str(path),
-            threads=shared.args.threads,
+            threads=shared.args.threads if shared.args.threads != 0 else -1,
             gpu_layers=shared.args.n_gpu_layers,
             batch_size=shared.args.n_batch,
             context_length=shared.args.n_ctx,
 
@@ -3,7 +3,7 @@
 import torch.nn.functional as F
 from torch import version as torch_version
 
-from modules import shared
+from modules import RoPE, shared
 from modules.logging_colors import logger
 from modules.models import clear_torch_cache
 from modules.text_generation import get_max_prompt_length
@@ -71,8 +71,8 @@ def from_pretrained(self, path_to_model):
             config.set_auto_map(shared.args.gpu_split)
             config.gpu_peer_fix = False
 
-        if shared.args.alpha_value:
-            config.alpha_value = shared.args.alpha_value
+        if shared.args.alpha_value > 1 or shared.args.rope_freq_base > 0:
+            config.alpha_value = RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base)
             config.calculate_rotary_embedding_base()
 
         model = ExLlama(config)