Merge branch 'main' into xsn/llama_snippet

ngxson · ngxson · commit ce2fe363bd81 · 2024-06-28T15:13:54.000+02:00
diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
@@ -1,4 +1,5 @@
-import { describe, expect, it } from "vitest";
+import { beforeAll, describe, expect, it } from "vitest";
+import type { GGUFParseOutput } from "./gguf";
 import { GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";
 import fs from "node:fs";
 
@@ -12,8 +13,19 @@ const URL_V1 =
 	"https://huggingface.co/tmadge/testing/resolve/66c078028d1ff92d7a9264a1590bc61ba6437933/tinyllamas-stories-260k-f32.gguf";
 const URL_SHARDED_GROK =
 	"https://huggingface.co/Arki05/Grok-1-GGUF/resolve/ecafa8d8eca9b8cd75d11a0d08d3a6199dc5a068/grok-1-IQ3_XS-split-00001-of-00009.gguf";
+const URL_BIG_METADATA = "https://huggingface.co/ngxson/test_gguf_models/resolve/main/gguf_test_big_metadata.gguf";
 
 describe("gguf", () => {
+	beforeAll(async () => {
+		// download the gguf for "load file" test, save to .cache directory
+		if (!fs.existsSync(".cache")) {
+			fs.mkdirSync(".cache");
+		}
+		const res = await fetch(URL_BIG_METADATA);
+		const arrayBuf = await res.arrayBuffer();
+		fs.writeFileSync(".cache/model.gguf", Buffer.from(arrayBuf));
+	});
+
 	it("should parse a llama2 7b", async () => {
 		const { metadata, tensorInfos } = await gguf(URL_LLAMA);
 
@@ -228,16 +240,10 @@ describe("gguf", () => {
 	});
 
 	it("should parse a local file", async () => {
-		// download the file and save to .cache folder
-		if (!fs.existsSync(".cache")) {
-			fs.mkdirSync(".cache");
-		}
-		const res = await fetch(URL_V1);
-		const arrayBuf = await res.arrayBuffer();
-		fs.writeFileSync(".cache/model.gguf", Buffer.from(arrayBuf));
-
-		const { metadata } = await gguf(".cache/model.gguf", { allowLocalFile: true });
-		expect(metadata).toMatchObject({ "general.name": "tinyllamas-stories-260k" });
+		const parsedGguf = await gguf(".cache/model.gguf", { allowLocalFile: true });
+		const { metadata } = parsedGguf as GGUFParseOutput<{ strict: false }>; // custom metadata arch, no need for typing
+		expect(metadata["dummy.1"]).toBeDefined(); // first metadata in the list
+		expect(metadata["dummy.32767"]).toBeDefined(); // last metadata in the list
 	});
 
 	it("should detect sharded gguf filename", async () => {
diff --git a/packages/tasks/package.json b/packages/tasks/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/tasks",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.10.19",
+	"version": "0.10.20",
 	"description": "List of ML tasks for huggingface.co/tasks",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
@@ -38,14 +38,14 @@ export type LocalApp = {
 			/**
 			 * If the app supports deeplink, URL to open.
 			 */
-			deeplink: (model: ModelData) => URL;
+			deeplink: (model: ModelData, filepath?: string) => URL;
 	  }
 	| {
 			/**
 			 * And if not (mostly llama.cpp), snippet to copy/paste in your terminal
 			 * Support the placeholder {{GGUF_FILE}} that will be replaced by the gguf file path or the list of available files.
 			 */
-			snippet: (model: ModelData) => Snippet | Snippet[];
+			snippet: (model: ModelData, filepath?: string) => string | string[] | Snippet | Snippet[];
 	  }
 );
 
@@ -118,7 +118,8 @@ export const LOCAL_APPS = {
 		docsUrl: "https://lmstudio.ai",
 		mainTask: "text-generation",
 		displayOnModelPage: isGgufModel,
-		deeplink: (model) => new URL(`lmstudio://open_from_hf?model=${model.id}`),
+		deeplink: (model, filepath) =>
+			new URL(`lmstudio://open_from_hf?model=${model.id}` + filepath ? `&file=${filepath}` : ""),
 	},
 	jan: {
 		prettyLabel: "Jan",
diff --git a/packages/tasks/src/snippets/curl.ts b/packages/tasks/src/snippets/curl.ts
@@ -10,6 +10,24 @@ export const snippetBasic = (model: ModelDataMinimal, accessToken: string): stri
 	-H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}"
 `;
 
+export const snippetTextGeneration = (model: ModelDataMinimal, accessToken: string): string => {
+	if (model.config?.tokenizer_config?.chat_template) {
+		// Conversational model detected, so we display a code snippet that features the Messages API
+		return `curl 'https://api-inference.huggingface.co/models/${model.id}/v1/chat/completions' \\
+-H "Authorization: Bearer ${accessToken || `{API_TOKEN}`}" \\
+-H 'Content-Type: application/json' \\
+-d '{
+	"model": "${model.id}",
+	"messages": [{"role": "user", "content": "What is the capital of France?"}],
+	"max_tokens": 500,
+	"stream": false
+}'
+`;
+	} else {
+		return snippetBasic(model, accessToken);
+	}
+};
+
 export const snippetZeroShotClassification = (model: ModelDataMinimal, accessToken: string): string =>
 	`curl https://api-inference.huggingface.co/models/${model.id} \\
 	-X POST \\
@@ -35,7 +53,7 @@ export const curlSnippets: Partial<Record<PipelineType, (model: ModelDataMinimal
 	translation: snippetBasic,
 	summarization: snippetBasic,
 	"feature-extraction": snippetBasic,
-	"text-generation": snippetBasic,
+	"text-generation": snippetTextGeneration,
 	"text2text-generation": snippetBasic,
 	"fill-mask": snippetBasic,
 	"sentence-similarity": snippetBasic,
diff --git a/packages/tasks/src/snippets/inputs.ts b/packages/tasks/src/snippets/inputs.ts
@@ -11,30 +11,30 @@ const inputsSummarization = () =>
 
 const inputsTableQuestionAnswering = () =>
 	`{
-		"query": "How many stars does the transformers repository have?",
-		"table": {
-			"Repository": ["Transformers", "Datasets", "Tokenizers"],
-			"Stars": ["36542", "4512", "3934"],
-			"Contributors": ["651", "77", "34"],
-			"Programming language": [
-				"Python",
-				"Python",
-				"Rust, Python and NodeJS"
-			]
-		}
-	}`;
+	"query": "How many stars does the transformers repository have?",
+	"table": {
+		"Repository": ["Transformers", "Datasets", "Tokenizers"],
+		"Stars": ["36542", "4512", "3934"],
+		"Contributors": ["651", "77", "34"],
+		"Programming language": [
+			"Python",
+			"Python",
+			"Rust, Python and NodeJS"
+		]
+	}
+}`;
 
 const inputsVisualQuestionAnswering = () =>
 	`{
-		"image": "cat.png",
-		"question": "What is in this image?"
-	}`;
+	"image": "cat.png",
+	"question": "What is in this image?"
+}`;
 
 const inputsQuestionAnswering = () =>
 	`{
-		"question": "What is my name?",
-		"context": "My name is Clara and I live in Berkeley."
-	}`;
+	"question": "What is my name?",
+	"context": "My name is Clara and I live in Berkeley."
+}`;
 
 const inputsTextClassification = () => `"I like you. I love you"`;
 
@@ -48,13 +48,13 @@ const inputsFillMask = (model: ModelDataMinimal) => `"The answer to the universe
 
 const inputsSentenceSimilarity = () =>
 	`{
-		"source_sentence": "That is a happy person",
-		"sentences": [
-			"That is a happy dog",
-			"That is a very happy person",
-			"Today is a sunny day"
-		]
-	}`;
+	"source_sentence": "That is a happy person",
+	"sentences": [
+		"That is a happy dog",
+		"That is a very happy person",
+		"Today is a sunny day"
+	]
+}`;
 
 const inputsFeatureExtraction = () => `"Today is a sunny day and I will get some ice cream."`;
 
diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts
@@ -7,7 +7,10 @@ export const snippetBasic = (model: ModelDataMinimal, accessToken: string): stri
 	const response = await fetch(
 		"https://api-inference.huggingface.co/models/${model.id}",
 		{
-			headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" },
+			headers: {
+				Authorization: "Bearer ${accessToken || `{API_TOKEN}`}"
+				"Content-Type": "application/json",
+			},
 			method: "POST",
 			body: JSON.stringify(data),
 		}
@@ -20,12 +23,34 @@ query({"inputs": ${getModelInputSnippet(model)}}).then((response) => {
 	console.log(JSON.stringify(response));
 });`;
 
+export const snippetTextGeneration = (model: ModelDataMinimal, accessToken: string): string => {
+	if (model.config?.tokenizer_config?.chat_template) {
+		// Conversational model detected, so we display a code snippet that features the Messages API
+		return `import { HfInference } from "@huggingface/inference";
+
+const inference = new HfInference("${accessToken || `{API_TOKEN}`}");
+
+for await (const chunk of inference.chatCompletionStream({
+	model: "${model.id}",
+	messages: [{ role: "user", content: "What is the capital of France?" }],
+	max_tokens: 500,
+})) {
+	process.stdout.write(chunk.choices[0]?.delta?.content || "");
+}
+`;
+	} else {
+		return snippetBasic(model, accessToken);
+	}
+};
 export const snippetZeroShotClassification = (model: ModelDataMinimal, accessToken: string): string =>
 	`async function query(data) {
 	const response = await fetch(
 		"https://api-inference.huggingface.co/models/${model.id}",
 		{
-			headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" },
+			headers: {
+				Authorization: "Bearer ${accessToken || `{API_TOKEN}`}"
+				"Content-Type": "application/json",
+			},
 			method: "POST",
 			body: JSON.stringify(data),
 		}
@@ -45,7 +70,10 @@ export const snippetTextToImage = (model: ModelDataMinimal, accessToken: string)
 	const response = await fetch(
 		"https://api-inference.huggingface.co/models/${model.id}",
 		{
-			headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" },
+			headers: {
+				Authorization: "Bearer ${accessToken || `{API_TOKEN}`}"
+				"Content-Type": "application/json",
+			},
 			method: "POST",
 			body: JSON.stringify(data),
 		}
@@ -62,7 +90,10 @@ export const snippetTextToAudio = (model: ModelDataMinimal, accessToken: string)
 		const response = await fetch(
 			"https://api-inference.huggingface.co/models/${model.id}",
 			{
-				headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" },
+				headers: {
+					Authorization: "Bearer ${accessToken || `{API_TOKEN}`}"
+					"Content-Type": "application/json",
+				},
 				method: "POST",
 				body: JSON.stringify(data),
 			}
@@ -99,7 +130,10 @@ export const snippetFile = (model: ModelDataMinimal, accessToken: string): strin
 	const response = await fetch(
 		"https://api-inference.huggingface.co/models/${model.id}",
 		{
-			headers: { Authorization: "Bearer ${accessToken || `{API_TOKEN}`}" },
+			headers: {
+				Authorization: "Bearer ${accessToken || `{API_TOKEN}`}"
+				"Content-Type": "application/json",
+			},
 			method: "POST",
 			body: data,
 		}
@@ -122,7 +156,7 @@ export const jsSnippets: Partial<Record<PipelineType, (model: ModelDataMinimal,
 	translation: snippetBasic,
 	summarization: snippetBasic,
 	"feature-extraction": snippetBasic,
-	"text-generation": snippetBasic,
+	"text-generation": snippetTextGeneration,
 	"text2text-generation": snippetBasic,
 	"fill-mask": snippetBasic,
 	"sentence-similarity": snippetBasic,
diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts
@@ -2,6 +2,22 @@ import type { PipelineType } from "../pipelines.js";
 import { getModelInputSnippet } from "./inputs.js";
 import type { ModelDataMinimal } from "./types.js";
 
+export const snippetConversational = (model: ModelDataMinimal, accessToken: string): string =>
+	`from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    "${model.id}",
+    token="${accessToken || "{API_TOKEN}"}",
+)
+
+for message in client.chat_completion(
+	messages=[{"role": "user", "content": "What is the capital of France?"}],
+	max_tokens=500,
+	stream=True,
+):
+    print(message.choices[0].delta.content, end="")
+`;
+
 export const snippetZeroShotClassification = (model: ModelDataMinimal): string =>
 	`def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
@@ -107,7 +123,7 @@ output = query({
     "inputs": ${getModelInputSnippet(model)},
 })`;
 
-export const pythonSnippets: Partial<Record<PipelineType, (model: ModelDataMinimal) => string>> = {
+export const pythonSnippets: Partial<Record<PipelineType, (model: ModelDataMinimal, accessToken: string) => string>> = {
 	// Same order as in tasks/src/pipelines.ts
 	"text-classification": snippetBasic,
 	"token-classification": snippetBasic,
@@ -138,15 +154,22 @@ export const pythonSnippets: Partial<Record<PipelineType, (model: ModelDataMinim
 };
 
 export function getPythonInferenceSnippet(model: ModelDataMinimal, accessToken: string): string {
-	const body =
-		model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model) ?? "" : "";
+	if (model.pipeline_tag === "text-generation" && model.config?.tokenizer_config?.chat_template) {
+		// Conversational model detected, so we display a code snippet that features the Messages API
+		return snippetConversational(model, accessToken);
+	} else {
+		const body =
+			model.pipeline_tag && model.pipeline_tag in pythonSnippets
+				? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? ""
+				: "";
 
-	return `import requests
+		return `import requests
 
 API_URL = "https://api-inference.huggingface.co/models/${model.id}"
 headers = {"Authorization": ${accessToken ? `"Bearer ${accessToken}"` : `f"Bearer {API_TOKEN}"`}}
 
 ${body}`;
+	}
 }
 
 export function hasPythonInferenceSnippet(model: ModelDataMinimal): boolean {
diff --git a/packages/tasks/src/snippets/types.ts b/packages/tasks/src/snippets/types.ts
@@ -5,4 +5,4 @@ import type { ModelData } from "../model-data";
  *
  * Add more fields as needed.
  */
-export type ModelDataMinimal = Pick<ModelData, "id" | "pipeline_tag" | "mask_token" | "library_name">;
+export type ModelDataMinimal = Pick<ModelData, "id" | "pipeline_tag" | "mask_token" | "library_name" | "config">;

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/tasks",`
`3`	`3`	`"packageManager": "[email protected]",`
`4`		`- "version": "0.10.19",`
	`4`	`+ "version": "0.10.20",`
`5`	`5`	`"description": "List of ML tasks for huggingface.co/tasks",`
`6`	`6`	`"repository": "https://github.com/huggingface/huggingface.js.git",`
`7`	`7`	`"publishConfig": {`
Original file line number	Diff line number	Diff line change
`@@ -5,4 +5,4 @@ import type { ModelData } from "../model-data";`
`5`	`5`	`*`
`6`	`6`	`* Add more fields as needed.`
`7`	`7`	`*/`
`8`		`-export type ModelDataMinimal = Pick<ModelData, "id" \| "pipeline_tag" \| "mask_token" \| "library_name">;`
	`8`	`+export type ModelDataMinimal = Pick<ModelData, "id" \| "pipeline_tag" \| "mask_token" \| "library_name" \| "config">;`