instructor-ai
diff --git a/‎.changeset/curly-ants-tie.md
+5 b/‎.changeset/curly-ants-tie.md
+5
diff --git a/‎docs/concepts/streaming.md
+1-1 b/‎docs/concepts/streaming.md
+1-1
diff --git a/‎docs/examples/action_items.md
+1-1 b/‎docs/examples/action_items.md
+1-1
diff --git a/‎docs/examples/query_decomposition.md
+1-1 b/‎docs/examples/query_decomposition.md
+1-1
diff --git a/‎examples/action_items/index.ts
+1-1 b/‎examples/action_items/index.ts
+1-1
diff --git a/‎examples/extract_user_stream/index.ts
+1-1 b/‎examples/extract_user_stream/index.ts
+1-1
diff --git a/‎examples/llm-validator/index.ts
+3-4 b/‎examples/llm-validator/index.ts
+3-4
diff --git a/‎examples/query_decomposition/index.ts
+1-1 b/‎examples/query_decomposition/index.ts
+1-1
diff --git a/‎package.json
+1-1 b/‎package.json
+1-1
diff --git a/‎src/constants/providers.ts
+17-9 b/‎src/constants/providers.ts
+17-9
diff --git a/‎src/dsl/validator.ts
+1-7 b/‎src/dsl/validator.ts
+1-7
diff --git a/‎src/instructor.ts
+60-29 b/‎src/instructor.ts
+60-29
diff --git a/‎src/types/index.ts
+7-2 b/‎src/types/index.ts
+7-2
@@ -0,0 +1,5 @@
+---
+"@instructor-ai/instructor": minor
+---
+
+adding request option pass through + handling non validation errors a little bit better and not retrying if not validation error specifically
@@ -61,7 +61,7 @@ A follow-up meeting is scheduled for January 25th at 3 PM GMT to finalize the ag
 
 const extractionStream = await client.chat.completions.create({
   messages: [{ role: "user", content: textBlock }],
-  model: "gpt-4-1106-preview",
+  model: "gpt-4-turbo",
   response_model: {
     schema: ExtractionValuesSchema,
     name: "value extraction"
 
@@ -66,7 +66,7 @@ const extractActionItems = async (data: string): Promise<ActionItems | undefined
         "content": `Create the action items for the following transcript: ${data}`,
       },
     ],
-    model: "gpt-4-1106-preview",
+    model: "gpt-4-turbo",
     response_model: { schema: ActionItemsSchema },
     max_tokens: 1000,
     temperature: 0.0,
 
@@ -65,7 +65,7 @@ const createQueryPlan = async (question: string): Promise<QueryPlan | undefined>
         "content": `Consider: ${question}\nGenerate the correct query plan.`,
       },
     ],
-    model: "gpt-4-1106-preview",
+    model: "gpt-4-turbo",
     response_model: { schema: QueryPlanSchema },
     max_tokens: 1000,
     temperature: 0.0,
 
@@ -45,7 +45,7 @@ const extractActionItems = async (data: string) => {
         content: `Create the action items for the following transcript: ${data}`
       }
     ],
-    model: "gpt-4-1106-preview",
+    model: "gpt-4-turbo",
     response_model: { schema: ActionItemsSchema, name: "ActionItems" },
     max_tokens: 1000,
     temperature: 0.0,
 
@@ -53,7 +53,7 @@ let extraction = {}
 
 const extractionStream = await client.chat.completions.create({
   messages: [{ role: "user", content: textBlock }],
-  model: "gpt-4-1106-preview",
+  model: "gpt-4-turbo",
   response_model: {
     schema: ExtractionValuesSchema,
     name: "value extraction"
 
@@ -7,8 +7,7 @@ const openAi = new OpenAI({ apiKey: process.env.OPENAI_API_KEY ?? "" })
 
 const instructor = Instructor({
   client: openAi,
-  mode: "TOOLS",
-  debug: true
+  mode: "TOOLS"
 })
 
 const statement = "Do not say questionable things"
@@ -17,7 +16,7 @@ const QuestionAnswer = z.object({
   question: z.string(),
   answer: z.string().superRefine(
     LLMValidator(instructor, statement, {
-      model: "gpt-4"
+      model: "gpt-4-turbo"
     })
   )
 })
@@ -26,7 +25,7 @@ const question = "What is the meaning of life?"
 
 const check = async (context: string) => {
   return await instructor.chat.completions.create({
-    model: "gpt-3.5-turbo",
+    model: "gpt-4-turbo",
     max_retries: 2,
     response_model: { schema: QuestionAnswer, name: "Question and Answer" },
     messages: [
 
@@ -38,7 +38,7 @@ const createQueryPlan = async (question: string) => {
         content: `Consider: ${question}\nGenerate the correct query plan.`
       }
     ],
-    model: "gpt-4-1106-preview",
+    model: "gpt-4-turbo",
     response_model: { schema: QueryPlanSchema, name: "Query Plan Decomposition" },
     max_tokens: 1000,
     temperature: 0.0,
 
@@ -1,6 +1,6 @@
 {
   "name": "@instructor-ai/instructor",
-  "version": "1.1.2",
+  "version": "1.1.1",
   "description": "structured outputs for llms",
   "publishConfig": {
     "access": "public"
 
@@ -1,8 +1,9 @@
 import { omit } from "@/lib"
 import OpenAI from "openai"
 import { z } from "zod"
-import { MODE, withResponseModel, type Mode } from "zod-stream"
+import { withResponseModel, MODE as ZMODE, type Mode } from "zod-stream"
 
+export const MODE = ZMODE
 export const PROVIDERS = {
   OAI: "OAI",
   ANYSCALE: "ANYSCALE",
@@ -11,7 +12,6 @@ export const PROVIDERS = {
   GROQ: "GROQ",
   OTHER: "OTHER"
 } as const
-
 export type Provider = keyof typeof PROVIDERS
 
 export const PROVIDER_SUPPORTED_MODES: {
@@ -34,6 +34,19 @@ export const NON_OAI_PROVIDER_URLS = {
 } as const
 
 export const PROVIDER_PARAMS_TRANSFORMERS = {
+  [PROVIDERS.GROQ]: {
+    [MODE.TOOLS]: function groqToolsParamsTransformer<
+      T extends z.AnyZodObject,
+      P extends OpenAI.ChatCompletionCreateParams
+    >(params: ReturnType<typeof withResponseModel<T, "TOOLS", P>>) {
+      if (params.tools.some(tool => tool) && params.stream) {
+        console.warn("Streaming may not be supported when using tools in Groq, try MD_JSON instead")
+        return params
+      }
+
+      return params
+    }
+  },
   [PROVIDERS.ANYSCALE]: {
     [MODE.JSON_SCHEMA]: function removeAdditionalPropertiesKeyJSONSchema<
       T extends z.AnyZodObject,
@@ -90,12 +103,7 @@ export const PROVIDER_SUPPORTED_MODES_BY_MODEL = {
   [PROVIDERS.OAI]: {
     [MODE.FUNCTIONS]: ["*"],
     [MODE.TOOLS]: ["*"],
-    [MODE.JSON]: [
-      "gpt-3.5-turbo-1106",
-      "gpt-4-1106-preview",
-      "gpt-4-0125-preview",
-      "gpt-4-turbo-preview"
-    ],
+    [MODE.JSON]: ["gpt-3.5-turbo-1106", "gpt-4-turbo", "gpt-4-0125-preview", "gpt-4-turbo-preview"],
     [MODE.MD_JSON]: ["*"]
   },
   [PROVIDERS.TOGETHER]: {
@@ -124,7 +132,7 @@ export const PROVIDER_SUPPORTED_MODES_BY_MODEL = {
     [MODE.TOOLS]: ["*"]
   },
   [PROVIDERS.GROQ]: {
-    [MODE.TOOLS]: ["llama2-70b-4096", "mixtral-8x7b-32768", "gemma-7b-it"],
+    [MODE.TOOLS]: ["mixtral-8x7b-32768", "gemma-7b-it"],
     [MODE.MD_JSON]: ["*"]
   }
 }
@@ -44,15 +44,9 @@ export const LLMValidator = <C extends GenericClient | OpenAI>(
   }
 }
 
-export const moderationValidator = <C extends GenericClient | OpenAI>(
-  client: InstructorClient<C>
-) => {
+export const moderationValidator = (client: InstructorClient<OpenAI>) => {
   return async (value: string, ctx: z.RefinementCtx) => {
     try {
-      if (!(client instanceof OpenAI)) {
-        throw new Error("ModerationValidator only supports OpenAI clients")
-      }
-
       const response = await client.moderations.create({ input: value })
       const flaggedResults = response.results.filter(result => result.flagged)
 
 
@@ -1,5 +1,6 @@
 import {
   ChatCompletionCreateParamsWithModel,
+  ClientTypeChatCompletionRequestOptions,
   GenericChatCompletion,
   GenericClient,
   InstructorConfig,
@@ -8,7 +9,7 @@ import {
   ReturnTypeBasedOnParams
 } from "@/types"
 import OpenAI from "openai"
-import { z } from "zod"
+import { z, ZodError } from "zod"
 import ZodStream, { OAIResponseParser, OAIStream, withResponseModel, type Mode } from "zod-stream"
 import { fromZodError } from "zod-validation-error"
 
@@ -102,11 +103,14 @@ class Instructor<C extends GenericClient | OpenAI> {
     }
   }
 
-  private async chatCompletionStandard<T extends z.AnyZodObject>({
-    max_retries = MAX_RETRIES_DEFAULT,
-    response_model,
-    ...params
-  }: ChatCompletionCreateParamsWithModel<T>): Promise<z.infer<T>> {
+  private async chatCompletionStandard<T extends z.AnyZodObject>(
+    {
+      max_retries = MAX_RETRIES_DEFAULT,
+      response_model,
+      ...params
+    }: ChatCompletionCreateParamsWithModel<T>,
+    requestOptions?: ClientTypeChatCompletionRequestOptions<C>
+  ): Promise<z.infer<T>> {
     let attempts = 0
     let validationIssues = ""
     let lastMessage: OpenAI.ChatCompletionMessageParam | null = null
@@ -147,13 +151,17 @@ class Instructor<C extends GenericClient | OpenAI> {
 
       try {
         if (this.client.chat?.completions?.create) {
-          const result = await this.client.chat.completions.create({
-            ...resolvedParams,
-            stream: false
-          })
+          const result = await this.client.chat.completions.create(
+            {
+              ...resolvedParams,
+              stream: false
+            },
+            requestOptions
+          )
+
           completion = result as GenericChatCompletion<typeof result>
         } else {
-          throw new Error("Unsupported client type")
+          throw new Error("Unsupported client type -- no completion method found.")
         }
         this.log("debug", "raw standard completion response: ", completion)
       } catch (error) {
@@ -176,7 +184,17 @@ class Instructor<C extends GenericClient | OpenAI> {
         const data = JSON.parse(parsedCompletion) as z.infer<T> & { _meta?: CompletionMeta }
         return { ...data, _meta: { usage: completion?.usage ?? undefined } }
       } catch (error) {
-        this.log("error", "failed to parse completion", parsedCompletion, this.mode)
+        this.log(
+          "error",
+          "failed to parse completion",
+          parsedCompletion,
+          this.mode,
+          "attempt: ",
+          attempts,
+          "max attempts: ",
+          max_retries
+        )
+
         throw error
       }
     }
@@ -202,26 +220,38 @@ class Instructor<C extends GenericClient | OpenAI> {
             throw new Error("Validation failed.")
           }
         }
+
         return validation.data
       } catch (error) {
+        if (!(error instanceof ZodError)) {
+          throw error
+        }
+
         if (attempts < max_retries) {
           this.log(
             "debug",
             `response model: ${response_model.name} - Retrying, attempt: `,
             attempts
           )
+
           this.log(
             "warn",
             `response model: ${response_model.name} - Validation issues: `,
-            validationIssues
+            validationIssues,
+            " - Attempt: ",
+            attempts,
+            " - Max attempts: ",
+            max_retries
           )
+
           attempts++
           return await makeCompletionCallWithRetries()
         } else {
           this.log(
             "debug",
             `response model: ${response_model.name} - Max attempts reached: ${attempts}`
           )
+
           this.log(
             "error",
             `response model: ${response_model.name} - Validation issues: `,
@@ -236,13 +266,10 @@ class Instructor<C extends GenericClient | OpenAI> {
     return makeCompletionCallWithRetries()
   }
 
-  private async chatCompletionStream<T extends z.AnyZodObject>({
-    max_retries,
-    response_model,
-    ...params
-  }: ChatCompletionCreateParamsWithModel<T>): Promise<
-    AsyncGenerator<Partial<T> & { _meta?: CompletionMeta }, void, unknown>
-  > {
+  private async chatCompletionStream<T extends z.AnyZodObject>(
+    { max_retries, response_model, ...params }: ChatCompletionCreateParamsWithModel<T>,
+    requestOptions?: ClientTypeChatCompletionRequestOptions<C>
+  ): Promise<AsyncGenerator<Partial<T> & { _meta?: CompletionMeta }, void, unknown>> {
     if (max_retries) {
       this.log("warn", "max_retries is not supported for streaming completions")
     }
@@ -269,10 +296,13 @@ class Instructor<C extends GenericClient | OpenAI> {
     return streamClient.create({
       completionPromise: async () => {
         if (this.client.chat?.completions?.create) {
-          const completion = await this.client.chat.completions.create({
-            ...completionParams,
-            stream: true
-          })
+          const completion = await this.client.chat.completions.create(
+            {
+              ...completionParams,
+              stream: true
+            },
+            requestOptions
+          )
 
           this.log("debug", "raw stream completion response: ", completion)
 
@@ -306,18 +336,19 @@ class Instructor<C extends GenericClient | OpenAI> {
         P extends T extends z.AnyZodObject ? ChatCompletionCreateParamsWithModel<T>
         : ClientTypeChatCompletionParams<OpenAILikeClient<C>> & { response_model: never }
       >(
-        params: P
+        params: P,
+        requestOptions?: ClientTypeChatCompletionRequestOptions<C>
       ): Promise<ReturnTypeBasedOnParams<typeof this.client, P>> => {
         this.validateModelModeSupport(params)
 
         if (this.isChatCompletionCreateParamsWithModel(params)) {
           if (params.stream) {
-            return this.chatCompletionStream(params) as ReturnTypeBasedOnParams<
+            return this.chatCompletionStream(params, requestOptions) as ReturnTypeBasedOnParams<
               typeof this.client,
               P & { stream: true }
             >
           } else {
-            return this.chatCompletionStandard(params) as ReturnTypeBasedOnParams<
+            return this.chatCompletionStandard(params, requestOptions) as ReturnTypeBasedOnParams<
               typeof this.client,
               P
             >
@@ -326,8 +357,8 @@ class Instructor<C extends GenericClient | OpenAI> {
           if (this.client.chat?.completions?.create) {
             const result =
               this.isStandardStream(params) ?
-                await this.client.chat.completions.create(params)
-              : await this.client.chat.completions.create(params)
+                await this.client.chat.completions.create(params, requestOptions)
+              : await this.client.chat.completions.create(params, requestOptions)
 
             return result as unknown as ReturnTypeBasedOnParams<OpenAILikeClient<C>, P>
           } else {
 
@@ -18,6 +18,10 @@ export type GenericCreateParams<M = unknown> = Omit<
   [key: string]: unknown
 }
 
+export type GenericRequestOptions = Partial<OpenAI.RequestOptions> & {
+  [key: string]: unknown
+}
+
 export type GenericChatCompletion<T = unknown> = Partial<OpenAI.Chat.Completions.ChatCompletion> & {
   [key: string]: unknown
   choices?: T
@@ -43,15 +47,16 @@ export type GenericClient = {
 export type ClientTypeChatCompletionParams<C> =
   C extends OpenAI ? OpenAI.ChatCompletionCreateParams : GenericCreateParams
 
+export type ClientTypeChatCompletionRequestOptions<C> =
+  C extends OpenAI ? OpenAI.RequestOptions : GenericRequestOptions
+
 export type ClientType<C> =
   C extends OpenAI ? "openai"
   : C extends GenericClient ? "generic"
   : never
 
 export type OpenAILikeClient<C> = C extends OpenAI ? OpenAI : C & GenericClient
-
 export type SupportedInstructorClient = GenericClient | OpenAI
-
 export type LogLevel = "debug" | "info" | "warn" | "error"
 
 export type CompletionMeta = Partial<ZCompletionMeta> & {
Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ const extractActionItems = async (data: string) => {`
`45`	`45`	content: `Create the action items for the following transcript: ${data}`
`46`	`46`	`}`
`47`	`47`	`],`
`48`		`- model: "gpt-4-1106-preview",`
	`48`	`+ model: "gpt-4-turbo",`
`49`	`49`	`response_model: { schema: ActionItemsSchema, name: "ActionItems" },`
`50`	`50`	`max_tokens: 1000,`
`51`	`51`	`temperature: 0.0,`
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@ const createQueryPlan = async (question: string) => {`
`38`	`38`	content: `Consider: ${question}\nGenerate the correct query plan.`
`39`	`39`	`}`
`40`	`40`	`],`
`41`		`- model: "gpt-4-1106-preview",`
	`41`	`+ model: "gpt-4-turbo",`
`42`	`42`	`response_model: { schema: QueryPlanSchema, name: "Query Plan Decomposition" },`
`43`	`43`	`max_tokens: 1000,`
`44`	`44`	`temperature: 0.0,`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@instructor-ai/instructor",`
`3`		`- "version": "1.1.2",`
	`3`	`+ "version": "1.1.1",`
`4`	`4`	`"description": "structured outputs for llms",`
`5`	`5`	`"publishConfig": {`
`6`	`6`	`"access": "public"`