|
| 1 | +import os |
| 2 | +from presidio_analyzer import AnalyzerEngine, PatternRecognizer |
| 3 | +from presidio_anonymizer import AnonymizerEngine |
| 4 | +from presidio_anonymizer.entities import OperatorConfig |
| 5 | +import json |
| 6 | +from pprint import pprint |
| 7 | +from openai import OpenAI |
| 8 | + |
| 9 | +client = OpenAI(api_key=os.getenv("OPENAI_KEY")) |
| 10 | + |
| 11 | +""" |
| 12 | +
|
| 13 | +1. Remove duplicates |
| 14 | +2. Make PI removal an optional flag |
| 15 | +
|
| 16 | +""" |
| 17 | + |
| 18 | + |
| 19 | +def get_bash_history(): |
| 20 | + history_file = os.path.expanduser("~/.bash_history") |
| 21 | + prev_operations = "" |
| 22 | + try: |
| 23 | + with open(history_file, "r") as file: |
| 24 | + history = file.readlines() |
| 25 | + except FileNotFoundError: |
| 26 | + return "No bash history was found." |
| 27 | + return history[:-10] |
| 28 | + |
| 29 | + |
| 30 | +def anonymize_bash_history(operations): |
| 31 | + analyzer = AnalyzerEngine() |
| 32 | + analyzer_results = analyzer.analyze(text=operations, language="en") |
| 33 | + anonymizer = AnonymizerEngine() |
| 34 | + anonymized_results = anonymizer.anonymize( |
| 35 | + text=operations, analyzer_results=analyzer_results |
| 36 | + ) |
| 37 | + return anonymized_results.text |
| 38 | + |
| 39 | + |
| 40 | +def remove_duplicates(operations: list[str]): |
| 41 | + return list(set(operations)) |
| 42 | + |
| 43 | + |
| 44 | +def stringify_bash_history(operations: list[str]): |
| 45 | + return "\n".join(operations) |
| 46 | + |
| 47 | + |
| 48 | +def synthesize_bash_history(desired_operation, gorila_history, history): |
| 49 | + SYSTEM_PROMPT = """ |
| 50 | +You are an assistant for a developer who wants to find the right API call for a specific task. |
| 51 | +The developer has bash history that contains the command they used to perform a task. |
| 52 | +Synthesize their bash history to provide the API call prediction model with extra context about the task. |
| 53 | +Use the previous bash history as well as their query to provide the model with a short paragraph of possible relevant context. |
| 54 | +There is a chance that their query has nothing to do with the bash history, so in that case, return 'No relevant context found'. |
| 55 | +""" |
| 56 | + USER_PROMPT = f""" |
| 57 | +The user's bash history is: |
| 58 | +{history} |
| 59 | +
|
| 60 | +The user's previous operations with the API calling tool are: |
| 61 | +{gorila_history} |
| 62 | +
|
| 63 | +The query of the user is: |
| 64 | +{desired_operation} |
| 65 | +
|
| 66 | +Use this information to provide the model with a short paragraph of possible relevant context. |
| 67 | +""" |
| 68 | + |
| 69 | + response = client.chat.completions.create( |
| 70 | + model="gpt-4", |
| 71 | + messages=[ |
| 72 | + {"role": "system", "content": SYSTEM_PROMPT}, |
| 73 | + {"role": "user", "content": USER_PROMPT}, |
| 74 | + ], |
| 75 | + ) |
| 76 | + return response.choices[0].message.content |
| 77 | + |
| 78 | + |
| 79 | +def personalize(query, gorilla_history, pi_removal=True): |
| 80 | + history = stringify_bash_history(remove_duplicates(get_bash_history())) |
| 81 | + if pi_removal: |
| 82 | + history = anonymize_bash_history(history) |
| 83 | + summary = synthesize_bash_history(query, gorilla_history, history) |
| 84 | + return summary |
0 commit comments