Skip to content

Commit 255568f

Browse files
authored
python: various fixes for GPT4All and Embed4All (#2130)
Key changes: * honor empty system prompt argument * current_chat_session is now read-only and defaults to None * deprecate fallback prompt template for unknown models * fix mistakes from #2086 Signed-off-by: Jared Van Bortel <[email protected]>
1 parent 53f109f commit 255568f

File tree

7 files changed

+132
-148
lines changed

7 files changed

+132
-148
lines changed

gpt4all-backend/llamamodel.cpp

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <iomanip>
1111
#include <iostream>
1212
#include <map>
13+
#include <numeric>
1314
#include <random>
1415
#include <sstream>
1516
#include <stdexcept>
@@ -345,7 +346,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
345346
d_ptr->ctx_params.n_threads = d_ptr->n_threads;
346347
d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads;
347348

348-
if (m_supportsEmbedding)
349+
if (isEmbedding)
349350
d_ptr->ctx_params.embeddings = true;
350351

351352
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
@@ -612,22 +613,22 @@ struct EmbModelGroup {
612613
std::vector<const char *> names;
613614
};
614615

615-
static const EmbModelSpec NOPREFIX_SPEC {nullptr, nullptr};
616+
static const EmbModelSpec NOPREFIX_SPEC {"", ""};
616617
static const EmbModelSpec NOMIC_SPEC {"search_document", "search_query", {"clustering", "classification"}};
617618
static const EmbModelSpec E5_SPEC {"passage", "query"};
618619

619620
static const EmbModelSpec NOMIC_1_5_SPEC {
620-
"search_document", "search_query", {"clustering", "classification"}, true, "[768, 512, 384, 256, 128]"
621+
"search_document", "search_query", {"clustering", "classification"}, true, "[768, 512, 384, 256, 128]",
621622
};
622623
static const EmbModelSpec LLM_EMBEDDER_SPEC {
623624
"Represent this document for retrieval",
624625
"Represent this query for retrieving relevant documents",
625626
};
626627
static const EmbModelSpec BGE_SPEC {
627-
nullptr, "Represent this sentence for searching relevant passages",
628+
"", "Represent this sentence for searching relevant passages",
628629
};
629630
static const EmbModelSpec E5_MISTRAL_SPEC {
630-
nullptr, "Instruct: Given a query, retrieve relevant passages that answer the query\nQuery",
631+
"", "Instruct: Given a query, retrieve relevant passages that answer the query\nQuery",
631632
};
632633

633634
static const EmbModelGroup EMBEDDING_MODEL_SPECS[] {
@@ -738,18 +739,20 @@ void LLamaModel::embedInternal(
738739
const llama_token bos_token = llama_token_bos(d_ptr->model);
739740
const llama_token eos_token = llama_token_eos(d_ptr->model);
740741

741-
assert(shouldAddBOS());
742-
bool addEOS = llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_WPM;
742+
bool useBOS = shouldAddBOS();
743+
bool useEOS = llama_vocab_type(d_ptr->model) == LLAMA_VOCAB_TYPE_WPM;
743744

744745
// no EOS, optional BOS
745-
auto tokenize = [this, addEOS](std::string text, TokenString &tokens, bool addBOS) {
746-
if (!text.empty() && text[0] != ' ')
746+
auto tokenize = [this, useBOS, useEOS, eos_token](std::string text, TokenString &tokens, bool wantBOS) {
747+
if (!text.empty() && text[0] != ' ') {
747748
text = ' ' + text; // normalize for SPM - our fork of llama.cpp doesn't add a space prefix
749+
}
750+
wantBOS &= useBOS;
748751

749752
tokens.resize(text.length()+4);
750-
int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), addBOS, false);
751-
assert(addEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
752-
tokens.resize(n_tokens - addEOS); // erase EOS/SEP
753+
int32_t n_tokens = llama_tokenize(d_ptr->model, text.c_str(), text.length(), tokens.data(), tokens.size(), wantBOS, false);
754+
assert(useEOS == (eos_token != -1 && tokens[n_tokens - 1] == eos_token));
755+
tokens.resize(n_tokens - useEOS); // erase EOS/SEP
753756
};
754757

755758
// tokenize the texts
@@ -784,7 +787,7 @@ void LLamaModel::embedInternal(
784787
}
785788

786789
const uint32_t n_batch = llama_n_batch(d_ptr->ctx);
787-
const uint32_t max_len = n_batch - (prefixTokens.size() + addEOS); // minus BOS/CLS and EOS/SEP
790+
const uint32_t max_len = n_batch - (prefixTokens.size() + useEOS); // minus BOS/CLS and EOS/SEP
788791
if (chunkOverlap >= max_len) {
789792
throw std::logic_error("max chunk length of " + std::to_string(max_len) + " is smaller than overlap of " +
790793
std::to_string(chunkOverlap) + " tokens");

gpt4all-bindings/python/docs/gpt4all_python.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,10 @@ are used instead of model-specific system and prompt templates:
317317
=== "Output"
318318
```
319319
default system template: ''
320-
default prompt template: '### Human: \n{0}\n\n### Assistant:\n'
320+
default prompt template: '### Human:\n{0}\n\n### Assistant:\n'
321321

322322
session system template: ''
323-
session prompt template: '### Human: \n{0}\n\n### Assistant:\n'
323+
session prompt template: '### Human:\n{0}\n\n### Assistant:\n'
324324
```
325325

326326

gpt4all-bindings/python/gpt4all/_pyllmodel.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import ctypes
4-
import logging
54
import os
65
import platform
76
import re
@@ -17,8 +16,6 @@
1716
else:
1817
import importlib_resources
1918

20-
logger: logging.Logger = logging.getLogger(__name__)
21-
2219

2320
# TODO: provide a config file to make this more robust
2421
MODEL_LIB_PATH = importlib_resources.files("gpt4all") / "llmodel_DO_NOT_MODIFY" / "build"
@@ -130,7 +127,7 @@ class LLModelGPUDevice(ctypes.Structure):
130127
llmodel.llmodel_threadCount.argtypes = [ctypes.c_void_p]
131128
llmodel.llmodel_threadCount.restype = ctypes.c_int32
132129

133-
llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).replace("\\", r"\\").encode())
130+
llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).encode())
134131

135132
llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)]
136133
llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice)
@@ -323,7 +320,7 @@ def generate_embeddings(self, text, prefix, dimensionality, do_mean, atlas):
323320
ctypes.byref(error),
324321
)
325322

326-
if embedding_ptr.value is None:
323+
if not embedding_ptr:
327324
msg = "(unknown error)" if error.value is None else error.value.decode()
328325
raise RuntimeError(f'Failed to generate embeddings: {msg}')
329326

@@ -372,13 +369,6 @@ def prompt_model(
372369
self.buffer.clear()
373370
self.buff_expecting_cont_bytes = 0
374371

375-
logger.info(
376-
"LLModel.prompt_model -- prompt:\n"
377-
+ "%s\n"
378-
+ "===/LLModel.prompt_model -- prompt/===",
379-
prompt,
380-
)
381-
382372
self._set_context(
383373
n_predict=n_predict,
384374
top_k=top_k,

0 commit comments

Comments
 (0)