implementing personalization with gorilla

arthbohra · arthbohra · commit 235f7036c722 · 2024-01-23T15:03:59.000-08:00
diff --git a/.env b/.env
@@ -0,0 +1 @@
+OPENAI_KEY = "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"
diff --git a/go_cli.py b/go_cli.py
@@ -26,7 +26,7 @@
 import sys
 from halo import Halo
 import go_questionary
-
+from utils import personalize
 __version__ = "0.0.11"  # current version
 SERVER_URL = "https://cli.gorilla-llm.com"
 UPDATE_CHECK_FILE = os.path.expanduser("~/.gorilla-cli-last-update-check")
@@ -231,14 +231,23 @@ def get_history_commands(history_file):
     # Generate a unique interaction ID
     interaction_id = str(uuid.uuid4())
 
+
+    personalized_input = f"""
+    Some relevant context about my history:
+    {personalize(user_input, get_history_commands(HISTORY_FILE), False)}
+
+    The query of the user is:
+    {user_input}
+    """
+
     if args.history:
         commands = get_history_commands(HISTORY_FILE)
     else:
         with Halo(text=f"{GORILLA_EMOJI}Loading", spinner="dots"):
             try:
                 data_json = {
                     "user_id": user_id,
-                    "user_input": user_input,
+                    "user_input": personalized_input,
                     "interaction_id": interaction_id,
                     "system_info": system_info
                 }
diff --git a/utils.py b/utils.py
@@ -0,0 +1,84 @@
+import os
+from presidio_analyzer import AnalyzerEngine, PatternRecognizer
+from presidio_anonymizer import AnonymizerEngine
+from presidio_anonymizer.entities import OperatorConfig
+import json
+from pprint import pprint
+from openai import OpenAI
+
+client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
+
+"""
+
+1. Remove duplicates
+2. Make PI removal an optional flag
+
+"""
+
+
+def get_bash_history():
+    history_file = os.path.expanduser("~/.bash_history")
+    prev_operations = ""
+    try:
+        with open(history_file, "r") as file:
+            history = file.readlines()
+    except FileNotFoundError:
+        return "No bash history was found."
+    return history[:-10]
+
+
+def anonymize_bash_history(operations):
+    analyzer = AnalyzerEngine()
+    analyzer_results = analyzer.analyze(text=operations, language="en")
+    anonymizer = AnonymizerEngine()
+    anonymized_results = anonymizer.anonymize(
+        text=operations, analyzer_results=analyzer_results
+    )
+    return anonymized_results.text
+
+
+def remove_duplicates(operations: list[str]):
+    return list(set(operations))
+
+
+def stringify_bash_history(operations: list[str]):
+    return "\n".join(operations)
+
+
+def synthesize_bash_history(desired_operation, gorila_history, history):
+    SYSTEM_PROMPT = """
+You are an assistant for a developer who wants to find the right API call for a specific task. 
+The developer has bash history that contains the command they used to perform a task.
+Synthesize their bash history to provide the API call prediction model with extra context about the task.
+Use the previous bash history as well as their query to provide the model with a short paragraph of possible relevant context.
+There is a chance that their query has nothing to do with the bash history, so in that case, return 'No relevant context found'.
+"""
+    USER_PROMPT = f"""
+The user's bash history is:
+{history}
+
+The user's previous operations with the API calling tool are:
+{gorila_history}
+
+The query of the user is:
+{desired_operation}
+
+Use this information to provide the model with a short paragraph of possible relevant context.
+"""
+
+    response = client.chat.completions.create(
+        model="gpt-4",
+        messages=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": USER_PROMPT},
+        ],
+    )
+    return response.choices[0].message.content
+
+
+def personalize(query, gorilla_history, pi_removal=True):
+    history = stringify_bash_history(remove_duplicates(get_bash_history()))
+    if pi_removal:
+        history = anonymize_bash_history(history)
+    summary = synthesize_bash_history(query, gorilla_history, history)
+    return summary

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+OPENAI_KEY = "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"`