Skip to content

Commit 6dd4255

Browse files
authored
updating to tee the primary stream if stream usage is enabled - so we can extract usage and include in _meta (#176)
1 parent 3fb0b08 commit 6dd4255

28 files changed

+90
-44
lines changed

.changeset/calm-knives-sin.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@instructor-ai/instructor": minor
3+
---
4+
5+
add ability to include usage from streams by teeing stream when option is present

bun.lockb

621 Bytes
Binary file not shown.

docs/concepts/streaming.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ A follow-up meeting is scheduled for January 25th at 3 PM GMT to finalize the ag
6161

6262
const extractionStream = await client.chat.completions.create({
6363
messages: [{ role: "user", content: textBlock }],
64-
model: "gpt-4-turbo",
64+
model: "gpt-4o",
6565
response_model: {
6666
schema: ExtractionValuesSchema,
6767
name: "value extraction"

docs/examples/action_items.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ const extractActionItems = async (data: string): Promise<ActionItems | undefined
6666
"content": `Create the action items for the following transcript: ${data}`,
6767
},
6868
],
69-
model: "gpt-4-turbo",
69+
model: "gpt-4o",
7070
response_model: { schema: ActionItemsSchema },
7171
max_tokens: 1000,
7272
temperature: 0.0,

docs/examples/query_decomposition.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ const createQueryPlan = async (question: string): Promise<QueryPlan | undefined>
6565
"content": `Consider: ${question}\nGenerate the correct query plan.`,
6666
},
6767
],
68-
model: "gpt-4-turbo",
68+
model: "gpt-4o",
6969
response_model: { schema: QueryPlanSchema },
7070
max_tokens: 1000,
7171
temperature: 0.0,

docs/examples/self_correction.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ const question = "What is the meaning of life?"
4444
const context = "According to the devil the meaning of live is to live a life of sin and debauchery."
4545

4646
await instructor.chat.completions.create({
47-
model: "gpt-4",
47+
model: "gpt-4o",
4848
max_retries: 0,
4949
response_model: { schema: QuestionAnswer, name: "Question and Answer" },
5050
messages: [
@@ -82,14 +82,14 @@ const QuestionAnswer = z.object({
8282
question: z.string(),
8383
answer: z.string().superRefine(
8484
LLMValidator(instructor, statement, {
85-
model: "gpt-4"
85+
model: "gpt-4o"
8686
})
8787
)
8888
})
8989

9090
try {
9191
await instructor.chat.completions.create({
92-
model: "gpt-4",
92+
model: "gpt-4o",
9393
max_retries: 0,
9494
response_model: { schema: QuestionAnswer, name: "Question and Answer" },
9595
messages: [
@@ -132,7 +132,7 @@ By adding the `max_retries` parameter, we can retry the request with corrections
132132
```ts
133133
try {
134134
await instructor.chat.completions.create({
135-
model: "gpt-4",
135+
model: "gpt-4o",
136136
max_retries: 2,
137137
response_model: { schema: QuestionAnswer, name: "Question and Answer" },
138138
messages: [

examples/action_items/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ const extractActionItems = async (data: string) => {
4545
content: `Create the action items for the following transcript: ${data}`
4646
}
4747
],
48-
model: "gpt-4-turbo",
48+
model: "gpt-4o",
4949
response_model: { schema: ActionItemsSchema, name: "ActionItems" },
5050
max_tokens: 1000,
5151
temperature: 0.0,

examples/extract_user/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ const client = Instructor({
1919

2020
const user = await client.chat.completions.create({
2121
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
22-
model: "gpt-4",
22+
model: "gpt-4o",
2323
response_model: {
2424
schema: UserSchema,
2525
name: "User"

examples/extract_user/properties.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ const client = Instructor({
2727

2828
const user = await client.chat.completions.create({
2929
messages: [{ role: "user", content: "Happy Potter" }],
30-
model: "gpt-4",
30+
model: "gpt-4o",
3131
response_model: { schema: UserSchema, name: "User" },
3232
max_retries: 3,
3333
seed: 1

examples/extract_user_stream/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ let extraction = {}
5353

5454
const extractionStream = await client.chat.completions.create({
5555
messages: [{ role: "user", content: textBlock }],
56-
model: "gpt-4-turbo",
56+
model: "gpt-4o",
5757
response_model: {
5858
schema: ExtractionValuesSchema,
5959
name: "value extraction"

examples/llm-validator/index.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ const QuestionAnswer = z.object({
1616
question: z.string(),
1717
answer: z.string().superRefine(
1818
LLMValidator(instructor, statement, {
19-
model: "gpt-4-turbo"
19+
model: "gpt-4o"
2020
})
2121
)
2222
})
@@ -25,7 +25,7 @@ const question = "What is the meaning of life?"
2525

2626
const check = async (context: string) => {
2727
return await instructor.chat.completions.create({
28-
model: "gpt-4-turbo",
28+
model: "gpt-4o",
2929
max_retries: 2,
3030
response_model: { schema: QuestionAnswer, name: "Question and Answer" },
3131
messages: [

examples/query_decomposition/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ const createQueryPlan = async (question: string) => {
3838
content: `Consider: ${question}\nGenerate the correct query plan.`
3939
}
4040
],
41-
model: "gpt-4-turbo",
41+
model: "gpt-4o",
4242
response_model: { schema: QueryPlanSchema, name: "Query Plan Decomposition" },
4343
max_tokens: 1000,
4444
temperature: 0.0,

examples/query_expansions/run.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ const runExtraction = async (query: string) => {
7373
{ role: "system", content: systemPrompt },
7474
{ role: "user", content: query }
7575
],
76-
model: "gpt-4",
76+
model: "gpt-4o",
7777
response_model: {
7878
schema: ExtractionValuesSchema,
7979
name: "value_extraction"

examples/query_expansions/run_sync.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ export const runExtractionStream = async (query: string) => {
9595
{ role: "system", content: systemPrompt },
9696
{ role: "user", content: query }
9797
],
98-
model: "gpt-4",
98+
model: "gpt-4o",
9999
response_model: {
100100
schema: SearchQuery,
101101
name: "value_extraction"
@@ -124,7 +124,7 @@ const runExtraction = async (query: string) => {
124124
{ role: "system", content: systemPrompt },
125125
{ role: "user", content: query }
126126
],
127-
model: "gpt-4",
127+
model: "gpt-4o",
128128
response_model: {
129129
schema: Response,
130130
name: "Respond"

examples/resolving-complex-entitities/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ const askAi = async (input: string) => {
5959
content: input
6060
}
6161
],
62-
model: "gpt-4",
62+
model: "gpt-4o",
6363
response_model: { schema: DocumentExtractionSchema, name: "Document Extraction" },
6464
max_retries: 3,
6565
seed: 1

package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
},
5252
"homepage": "https://github.com/instructor-ai/instructor-js#readme",
5353
"dependencies": {
54-
"zod-stream": "1.0.2",
54+
"zod-stream": "1.0.3",
5555
"zod-validation-error": "^2.1.0"
5656
},
5757
"peerDependencies": {
@@ -76,6 +76,7 @@
7676
"eslint-plugin-prettier": "^5.1.2",
7777
"husky": "^8.0.3",
7878
"llm-polyglot": "1.0.0",
79+
"openai": "latest",
7980
"prettier": "latest",
8081
"ts-inference-check": "^0.3.0",
8182
"tsup": "^8.0.1",

src/constants/providers.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ export const PROVIDER_SUPPORTED_MODES_BY_MODEL = {
103103
[PROVIDERS.OAI]: {
104104
[MODE.FUNCTIONS]: ["*"],
105105
[MODE.TOOLS]: ["*"],
106-
[MODE.JSON]: ["gpt-3.5-turbo-1106", "gpt-4-turbo", "gpt-4-0125-preview", "gpt-4-turbo-preview"],
106+
[MODE.JSON]: ["*"],
107107
[MODE.MD_JSON]: ["*"]
108108
},
109109
[PROVIDERS.TOGETHER]: {

src/instructor.ts

+38-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import {
99
ReturnTypeBasedOnParams
1010
} from "@/types"
1111
import OpenAI from "openai"
12+
import { Stream } from "openai/streaming.mjs"
1213
import { z, ZodError } from "zod"
1314
import ZodStream, { OAIResponseParser, OAIStream, withResponseModel, type Mode } from "zod-stream"
1415
import { fromZodError } from "zod-validation-error"
@@ -266,10 +267,10 @@ class Instructor<C extends GenericClient | OpenAI> {
266267
return makeCompletionCallWithRetries()
267268
}
268269

269-
private async chatCompletionStream<T extends z.AnyZodObject>(
270+
private async *chatCompletionStream<T extends z.AnyZodObject>(
270271
{ max_retries, response_model, ...params }: ChatCompletionCreateParamsWithModel<T>,
271272
requestOptions?: ClientTypeChatCompletionRequestOptions<C>
272-
): Promise<AsyncGenerator<Partial<T> & { _meta?: CompletionMeta }, void, unknown>> {
273+
): AsyncGenerator<Partial<T> & { _meta?: CompletionMeta }, void, unknown> {
273274
if (max_retries) {
274275
this.log("warn", "max_retries is not supported for streaming completions")
275276
}
@@ -293,7 +294,16 @@ class Instructor<C extends GenericClient | OpenAI> {
293294
debug: this.debug ?? false
294295
})
295296

296-
return streamClient.create({
297+
async function checkForUsage(reader: Stream<OpenAI.ChatCompletionChunk>) {
298+
for await (const chunk of reader) {
299+
if ("usage" in chunk) {
300+
streamUsage = chunk.usage as CompletionMeta["usage"]
301+
}
302+
}
303+
}
304+
305+
let streamUsage: CompletionMeta["usage"] | undefined
306+
const structuredStream = await streamClient.create({
297307
completionPromise: async () => {
298308
if (this.client.chat?.completions?.create) {
299309
const completion = await this.client.chat.completions.create(
@@ -306,6 +316,21 @@ class Instructor<C extends GenericClient | OpenAI> {
306316

307317
this.log("debug", "raw stream completion response: ", completion)
308318

319+
if (
320+
this.provider === "OAI" &&
321+
completionParams?.stream &&
322+
"stream_options" in completionParams &&
323+
completion instanceof Stream
324+
) {
325+
const [completion1, completion2] = completion.tee()
326+
327+
checkForUsage(completion1)
328+
329+
return OAIStream({
330+
res: completion2
331+
})
332+
}
333+
309334
return OAIStream({
310335
res: completion as unknown as AsyncIterable<OpenAI.ChatCompletionChunk>
311336
})
@@ -315,6 +340,16 @@ class Instructor<C extends GenericClient | OpenAI> {
315340
},
316341
response_model
317342
})
343+
344+
for await (const chunk of structuredStream) {
345+
yield {
346+
...chunk,
347+
_meta: {
348+
usage: streamUsage ?? undefined,
349+
...(chunk?._meta ?? {})
350+
}
351+
}
352+
}
318353
}
319354

320355
private isChatCompletionCreateParamsWithModel<T extends z.AnyZodObject>(

src/types/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ export type ReturnTypeBasedOnParams<C, P> =
8888
response_model: ResponseModel<infer T>
8989
}
9090
) ?
91-
Promise<AsyncGenerator<Partial<z.infer<T>> & { _meta?: CompletionMeta }, void, unknown>>
91+
AsyncGenerator<Partial<z.infer<T>> & { _meta?: CompletionMeta }, void, unknown>
9292
: P extends { response_model: ResponseModel<infer T> } ?
9393
Promise<z.infer<T> & { _meta?: CompletionMeta }>
9494
: C extends OpenAI ?

tests/extract.test.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ async function extractUser() {
2121

2222
const user = await client.chat.completions.create({
2323
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
24-
model: "gpt-4-turbo",
24+
model: "gpt-4o",
2525
response_model: { schema: UserSchema, name: "User" },
2626
seed: 1
2727
})
@@ -49,7 +49,7 @@ async function extractUserValidated() {
4949

5050
const user = await client.chat.completions.create({
5151
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
52-
model: "gpt-4",
52+
model: "gpt-4o",
5353
response_model: { schema: UserSchema, name: "User" },
5454
max_retries: 3,
5555
seed: 1
@@ -82,7 +82,7 @@ async function extractUserMany() {
8282

8383
const user = await client.chat.completions.create({
8484
messages: [{ role: "user", content: "Jason is 30 years old, Sarah is 12" }],
85-
model: "gpt-4-turbo",
85+
model: "gpt-4o",
8686
response_model: { schema: UsersSchema, name: "Users" },
8787
max_retries: 3,
8888
seed: 1

tests/functions.test.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ async function extractUser() {
2121

2222
const user = await client.chat.completions.create({
2323
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
24-
model: "gpt-4-turbo",
24+
model: "gpt-4o",
2525
response_model: { schema: UserSchema, name: "User" },
2626
seed: 1
2727
})
@@ -52,7 +52,7 @@ async function extractUserValidated() {
5252

5353
const user = await client.chat.completions.create({
5454
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
55-
model: "gpt-4-turbo",
55+
model: "gpt-4o",
5656
response_model: { schema: UserSchema, name: "User" },
5757
max_retries: 3,
5858
seed: 1
@@ -85,7 +85,7 @@ async function extractUserMany() {
8585

8686
const user = await client.chat.completions.create({
8787
messages: [{ role: "user", content: "Jason is 30 years old, Sarah is 12" }],
88-
model: "gpt-4-turbo",
88+
model: "gpt-4o",
8989
response_model: { schema: UsersSchema, name: "Users" },
9090
max_retries: 3,
9191
seed: 1

tests/inference.test.ts

+6-6
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ describe("Inference Checking", () => {
3333
test("no response_model, no stream", async () => {
3434
const user = await client.chat.completions.create({
3535
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
36-
model: "gpt-4-turbo",
36+
model: "gpt-4o",
3737
seed: 1,
3838
stream: false
3939
})
@@ -44,7 +44,7 @@ describe("Inference Checking", () => {
4444
test("no response_model, stream", async () => {
4545
const userStream = await client.chat.completions.create({
4646
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
47-
model: "gpt-4-turbo",
47+
model: "gpt-4o",
4848
seed: 1,
4949
stream: true
5050
})
@@ -57,7 +57,7 @@ describe("Inference Checking", () => {
5757
test("response_model, no stream", async () => {
5858
const user = await client.chat.completions.create({
5959
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
60-
model: "gpt-4-turbo",
60+
model: "gpt-4o",
6161
response_model: { schema: UserSchema, name: "User" },
6262
seed: 1,
6363
stream: false
@@ -71,7 +71,7 @@ describe("Inference Checking", () => {
7171
test("response_model, stream", async () => {
7272
const userStream = await client.chat.completions.create({
7373
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
74-
model: "gpt-4-turbo",
74+
model: "gpt-4o",
7575
response_model: { schema: UserSchema, name: "User" },
7676
seed: 1,
7777
stream: true
@@ -94,7 +94,7 @@ describe("Inference Checking", () => {
9494
test("response_model, stream, max_retries", async () => {
9595
const userStream = await client.chat.completions.create({
9696
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
97-
model: "gpt-4-turbo",
97+
model: "gpt-4o",
9898
response_model: { schema: UserSchema, name: "User" },
9999
seed: 1,
100100
stream: true,
@@ -118,7 +118,7 @@ describe("Inference Checking", () => {
118118
test("response_model, no stream, max_retries", async () => {
119119
const user = await client.chat.completions.create({
120120
messages: [{ role: "user", content: "Jason Liu is 30 years old" }],
121-
model: "gpt-4-turbo",
121+
model: "gpt-4o",
122122
response_model: { schema: UserSchema, name: "User" },
123123
seed: 1,
124124
max_retries: 3

tests/maybe.test.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ async function maybeExtractUser(content: string) {
2424

2525
const user = await client.chat.completions.create({
2626
messages: [{ role: "user", content: "Extract " + content }],
27-
model: "gpt-4",
27+
model: "gpt-4o",
2828
response_model: { schema: MaybeUserSchema, name: "User" },
2929
max_retries: 3,
3030
seed: 1

0 commit comments

Comments
 (0)