Skip to content

Commit e17e767

Browse files
feat(Ollama Chat Model Node): Add aditional Ollama config parameters & fix vision (#9215)
Signed-off-by: Oleg Ivaniv <[email protected]> Co-authored-by: Michael Kret <[email protected]>
1 parent 3fbcbce commit e17e767

File tree

3 files changed

+145
-7
lines changed

3 files changed

+145
-7
lines changed

packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import { LLMChain } from 'langchain/chains';
2222
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
2323
import { HumanMessage } from '@langchain/core/messages';
2424
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
25+
import { ChatOllama } from '@langchain/community/chat_models/ollama';
2526
import { getTemplateNoticeField } from '../../../utils/sharedFields';
2627
import {
2728
getOptionalOutputParsers,
@@ -81,7 +82,10 @@ async function getImageMessage(
8182
)) as BaseLanguageModel;
8283
const dataURI = `data:image/jpeg;base64,${bufferData.toString('base64')}`;
8384

84-
const imageUrl = model instanceof ChatGoogleGenerativeAI ? dataURI : { url: dataURI, detail };
85+
const directUriModels = [ChatGoogleGenerativeAI, ChatOllama];
86+
const imageUrl = directUriModels.some((i) => model instanceof i)
87+
? dataURI
88+
: { url: dataURI, detail };
8589

8690
return new HumanMessage({
8791
content: [

packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
type SupplyData,
88
} from 'n8n-workflow';
99

10+
import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama';
1011
import { ChatOllama } from '@langchain/community/chat_models/ollama';
1112
import { logWrapper } from '../../../utils/logWrapper';
1213
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
@@ -54,12 +55,13 @@ export class LmChatOllama implements INodeType {
5455
const credentials = await this.getCredentials('ollamaApi');
5556

5657
const modelName = this.getNodeParameter('model', itemIndex) as string;
57-
const options = this.getNodeParameter('options', itemIndex, {}) as object;
58+
const options = this.getNodeParameter('options', itemIndex, {}) as ChatOllamaInput;
5859

5960
const model = new ChatOllama({
61+
...options,
6062
baseUrl: credentials.baseUrl as string,
6163
model: modelName,
62-
...options,
64+
format: options.format === 'default' ? undefined : options.format,
6365
});
6466

6567
return {

packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts

Lines changed: 136 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,16 +76,16 @@ export const ollamaOptions: INodeProperties = {
7676
default: 0.7,
7777
typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
7878
description:
79-
'Controls randomness: Lowering results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive.',
79+
'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random.',
8080
type: 'number',
8181
},
8282
{
8383
displayName: 'Top K',
8484
name: 'topK',
8585
default: -1,
86-
typeOptions: { maxValue: 1, minValue: -1, numberPrecision: 1 },
86+
typeOptions: { maxValue: 100, minValue: -1, numberPrecision: 1 },
8787
description:
88-
'Used to remove "long tail" low probability responses. Defaults to -1, which disables it.',
88+
'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable.',
8989
type: 'number',
9090
},
9191
{
@@ -94,8 +94,140 @@ export const ollamaOptions: INodeProperties = {
9494
default: 1,
9595
typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 },
9696
description:
97-
'Controls diversity via nucleus sampling: 0.5 means half of all likelihood-weighted options are considered. We generally recommend altering this or temperature but not both.',
97+
'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions.',
9898
type: 'number',
9999
},
100+
{
101+
displayName: 'Frequency Penalty',
102+
name: 'frequencyPenalty',
103+
type: 'number',
104+
default: 0.0,
105+
typeOptions: { minValue: 0 },
106+
description:
107+
'Adjusts the penalty for tokens that have already appeared in the generated text. Higher values discourage repetition.',
108+
},
109+
{
110+
displayName: 'Keep Alive',
111+
name: 'keepAlive',
112+
type: 'string',
113+
default: '5m',
114+
description:
115+
'Specifies the duration to keep the loaded model in memory after use. Useful for frequently used models. Format: 1h30m (1 hour 30 minutes).',
116+
},
117+
{
118+
displayName: 'Low VRAM Mode',
119+
name: 'lowVram',
120+
type: 'boolean',
121+
default: false,
122+
description:
123+
'Whether to Activate low VRAM mode, which reduces memory usage at the cost of slower generation speed. Useful for GPUs with limited memory.',
124+
},
125+
{
126+
displayName: 'Main GPU ID',
127+
name: 'mainGpu',
128+
type: 'number',
129+
default: 0,
130+
description:
131+
'Specifies the ID of the GPU to use for the main computation. Only change this if you have multiple GPUs.',
132+
},
133+
{
134+
displayName: 'Context Batch Size',
135+
name: 'numBatch',
136+
type: 'number',
137+
default: 512,
138+
description:
139+
'Sets the batch size for prompt processing. Larger batch sizes may improve generation speed but increase memory usage.',
140+
},
141+
{
142+
displayName: 'Context Length',
143+
name: 'numCtx',
144+
type: 'number',
145+
default: 2048,
146+
description:
147+
'The maximum number of tokens to use as context for generating the next token. Smaller values reduce memory usage, while larger values provide more context to the model.',
148+
},
149+
{
150+
displayName: 'Number of GPUs',
151+
name: 'numGpu',
152+
type: 'number',
153+
default: -1,
154+
description:
155+
'Specifies the number of GPUs to use for parallel processing. Set to -1 for auto-detection.',
156+
},
157+
{
158+
displayName: 'Max Tokens to Generate',
159+
name: 'numPredict',
160+
type: 'number',
161+
default: -1,
162+
description:
163+
'The maximum number of tokens to generate. Set to -1 for no limit. Be cautious when setting this to a large value, as it can lead to very long outputs.',
164+
},
165+
{
166+
displayName: 'Number of CPU Threads',
167+
name: 'numThread',
168+
type: 'number',
169+
default: 0,
170+
description:
171+
'Specifies the number of CPU threads to use for processing. Set to 0 for auto-detection.',
172+
},
173+
{
174+
displayName: 'Penalize Newlines',
175+
name: 'penalizeNewline',
176+
type: 'boolean',
177+
default: true,
178+
description:
179+
'Whether the model will be less likely to generate newline characters, encouraging longer continuous sequences of text',
180+
},
181+
{
182+
displayName: 'Presence Penalty',
183+
name: 'presencePenalty',
184+
type: 'number',
185+
default: 0.0,
186+
description:
187+
'Adjusts the penalty for tokens based on their presence in the generated text so far. Positive values penalize tokens that have already appeared, encouraging diversity.',
188+
},
189+
{
190+
displayName: 'Repetition Penalty',
191+
name: 'repeatPenalty',
192+
type: 'number',
193+
default: 1.0,
194+
description:
195+
'Adjusts the penalty factor for repeated tokens. Higher values more strongly discourage repetition. Set to 1.0 to disable repetition penalty.',
196+
},
197+
{
198+
displayName: 'Use Memory Locking',
199+
name: 'useMLock',
200+
type: 'boolean',
201+
default: false,
202+
description:
203+
'Whether to lock the model in memory to prevent swapping. This can improve performance but requires sufficient available memory.',
204+
},
205+
{
206+
displayName: 'Use Memory Mapping',
207+
name: 'useMMap',
208+
type: 'boolean',
209+
default: true,
210+
description:
211+
'Whether to use memory mapping for loading the model. This can reduce memory usage but may impact performance. Recommended to keep enabled.',
212+
},
213+
{
214+
displayName: 'Load Vocabulary Only',
215+
name: 'vocabOnly',
216+
type: 'boolean',
217+
default: false,
218+
description:
219+
'Whether to only load the model vocabulary without the weights. Useful for quickly testing tokenization.',
220+
},
221+
{
222+
displayName: 'Output Format',
223+
name: 'format',
224+
type: 'options',
225+
options: [
226+
{ name: 'Default', value: 'default' },
227+
{ name: 'JSON', value: 'json' },
228+
],
229+
default: 'default',
230+
description: 'Specifies the format of the API response',
231+
},
100232
],
101233
};

0 commit comments

Comments
 (0)