added streaming

Chrisyhjiang · Chrisyhjiang · commit 773d5a62a170 · 2024-10-08T20:07:27.000-04:00
diff --git a/app/app.py b/app/app.py
@@ -1,24 +1,11 @@
-from flask import Flask, request, jsonify, render_template
-import subprocess
-import os
+from flask import Flask, request, jsonify, render_template, Response, stream_with_context
 from rag_system import rag_system
 
 app = Flask(__name__)
 
 @app.route('/', methods=['GET', 'POST'])
 def index():
-    if request.method == 'POST':
-        query = request.form.get('query')
-        if not query:
-            return render_template('index.html', query=None, response="No query provided")
-        
-        try:
-            response = rag_system.answer_query(query)
-            return render_template('index.html', query=query, response=response)
-        except Exception as e:
-            print(f"Error in /ask endpoint: {e}")
-            return render_template('index.html', query=query, response="Internal Server Error")
-    return render_template('index.html', query=None, response=None)
+    return render_template('index.html')
 
 @app.route('/ask', methods=['POST'])
 def ask():
@@ -27,24 +14,15 @@ def ask():
     if not query:
         return jsonify({"error": "No query provided"}), 400
     
-    try:
-        response = rag_system.answer_query(query)
-        return jsonify({"response": response})
-    except Exception as e:
-        print(f"Error in /ask endpoint: {e}")
-        return jsonify({"error": "Internal Server Error"}), 500
-    
-
-# # New endpoint for triggering the rebuild
-# def run_get_knowledge_base_script():
-#     """ Function to run the get_knowledge_base.py script from the parent directory """
-#     try:
-#         subprocess.run(['python', 'get_knowledge_base.py'], check=True)
+    def generate():
+        try:
+            for token in rag_system.answer_query_stream(query):
+                yield token
+        except Exception as e:
+            print(f"Error in /ask endpoint: {e}")
+            yield "Internal Server Error"
 
-#     except subprocess.CalledProcessError as e:
-#         print(f"Error running get_knowledge_base.py: {e}")
-#     except Exception as e:
-#         print(f"An error occurred: {e}")
+    return Response(stream_with_context(generate()), content_type='text/plain')
 
 @app.route('/trigger-rebuild', methods=['POST'])
 def trigger_rebuild():
@@ -73,6 +51,6 @@ def trigger_rebuild():
     except Exception as e:
         print(f"Error in /trigger-rebuild endpoint: {e}")
         return jsonify({"error": "Internal Server Error"}), 500
-    
+
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=5000)
+    app.run(host='0.0.0.0', port=5000)
diff --git a/app/rag_system.py b/app/rag_system.py
@@ -5,7 +5,6 @@
 import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
 
-# Ensure you have set the OPENAI_API_KEY in your environment variables
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
 class RAGSystem:
@@ -14,43 +13,23 @@ def __init__(self, knowledge_base_path='knowledge_base.json'):
         self.knowledge_base = self.load_knowledge_base()
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
         self.doc_embeddings = self.embed_knowledge_base()
-        self.conversation_history = []  # To store the conversation history
+        self.conversation_history = []
 
     def load_knowledge_base(self):
-        """
-        Load the knowledge base from a JSON file.
-        """
         with open(self.knowledge_base_path, 'r') as kb_file:
             return json.load(kb_file)
 
     def embed_knowledge_base(self):
-        """
-        Embed the knowledge base using the SentenceTransformer model.
-        Combines 'about' and 'text' fields for each document for embedding.
-        """
         docs = [f'{doc["about"]}. {doc["text"]}' for doc in self.knowledge_base]
         return self.model.encode(docs, convert_to_tensor=True)
 
     def normalize_query(self, query):
-        """
-        Normalize the query by converting it to lowercase and stripping whitespace.
-        """
         return query.lower().strip()
 
     def retrieve(self, query, similarity_threshold=0.7, high_match_threshold=0.8, max_docs=5):
-        """
-        Retrieve relevant documents from the knowledge base using cosine similarity.
-        """
         normalized_query = self.normalize_query(query)
-        print(f"Retrieving context for query: '{normalized_query}'")
-
-        # Query embedding
         query_embedding = self.model.encode([normalized_query], convert_to_tensor=True)
-
-        # Calculate similarities
         similarities = cosine_similarity(query_embedding, self.doc_embeddings)[0]
-
-        # Initialize relevance scores
         relevance_scores = []
 
         for i, doc in enumerate(self.knowledge_base):
@@ -72,77 +51,58 @@ def retrieve(self, query, similarity_threshold=0.7, high_match_threshold=0.8, ma
             max_index = np.argmax(similarities)
             retrieved_docs.append(f'{self.knowledge_base[max_index]["about"]}. {self.knowledge_base[max_index]["text"]}')
 
-        context = "\n\n".join(retrieved_docs)
-        print("Retrieved Context:\n", context)
+        return "\n\n".join(retrieved_docs)
+
+    def answer_query_stream(self, query):
+        try:
+            normalized_query = self.normalize_query(query)
+            context = self.retrieve(normalized_query)
+            
+            self.conversation_history.append({"role": "user", "content": query})
 
-        return context
+            messages = [{"role": "system", "content": "You are a helpful assistant."}]
+            messages.extend(self.conversation_history)
 
-    def generate_response(self, query, context):
-        """
-        Generate a response from OpenAI using conversation history and knowledge base context.
-        """
-        # Add user query to conversation history
-        self.conversation_history.append({"role": "user", "content": query})
-
-        # Include conversation history in the prompt
-        messages = [{"role": "system", "content": "You are a helpful assistant."}]
-        messages.extend(self.conversation_history)  # Include past conversation history
-
-        # Prepare the context from the knowledge base
-        system_message = {
-            "role": "system",
-            "content": (
-                "You are a dedicated assistant designed to provide positive answers about Defang. "
-                "When the user says 'you', 'your', or any pronoun, interpret it as referring to Defang with context of Defang. "
-                "If the user's question involves comparisons with or references to other services, you may use external knowledge. "
-                "However, if the question is strictly about Defang, you must ignore all external knowledge and only utilize the given context. "
-                "Context: " + context
-            )
-        }
+            system_message = {
+                "role": "system",
+                "content": (
+                    "You are a dedicated assistant designed to provide positive answers about Defang. "
+                    "When the user says 'you', 'your', or any pronoun, interpret it as referring to Defang with context of Defang. "
+                    "If the user's question involves comparisons with or references to other services, you may use external knowledge. "
+                    "However, if the question is strictly about Defang, you must ignore all external knowledge and only utilize the given context. "
+                    "Context: " + context
+                )
+            }
 
-        messages.append(system_message)
+            messages.append(system_message)
 
-        try:
-            response = openai.ChatCompletion.create(
+            stream = openai.ChatCompletion.create(
                 model="gpt-4-turbo",
                 messages=messages,
                 temperature=0.5,
                 max_tokens=2048,
                 top_p=1,
                 frequency_penalty=0,
-                presence_penalty=0
+                presence_penalty=0,
+                stream=True
             )
 
-            generated_response = response['choices'][0]['message']['content'].strip()
-
-            # Add the bot's response to the conversation history
-            self.conversation_history.append({"role": "assistant", "content": generated_response})
-
-            print("Generated Response:\n", generated_response)
-            return generated_response
+            collected_messages = []
+            for chunk in stream:
+                if chunk['choices'][0]['finish_reason'] is not None:
+                    break
+                content = chunk['choices'][0]['delta'].get('content', '')
+                collected_messages.append(content)
+                yield content
 
-        except openai.error.OpenAIError as e:
-            print(f"Error generating response from OpenAI: {e}")
-            return "An error occurred while generating the response."
+            full_response = ''.join(collected_messages).strip()
+            self.conversation_history.append({"role": "assistant", "content": full_response})
 
-    def answer_query(self, query):
-        """
-        Answer the user query, leveraging knowledge base context and conversation history.
-        """
-        try:
-            normalized_query = self.normalize_query(query)
-            context = self.retrieve(normalized_query)
-            response = self.generate_response(normalized_query, context)
-            return response
         except Exception as e:
-            print(f"Error in answer_query: {e}")
-            return "An error occurred while generating the response."
+            print(f"Error in answer_query_stream: {e}")
+            yield "An error occurred while generating the response."
 
     def clear_conversation_history(self):
-        """
-        Clear the stored conversation history.
-        This can be called to reset the conversation for a new session.
-        """
         self.conversation_history = []
         print("Conversation history cleared.")
 
diff --git a/app/templates/index.html b/app/templates/index.html