konveyor · JonahSussman · Jul 24, 2024 · Jun 6, 2024 · Jul 10, 2024 · Jul 10, 2024
diff --git a/.trunk/configs/bandit.yaml b/.trunk/configs/bandit.yaml
@@ -1,3 +1,5 @@
 assert_used:
   skips:
     - "*/kai-service/tests/**.py"
+    - "**/test_*.py"
+    - "**/*_test.py"
diff --git a/example/run_demo.py b/example/run_demo.py
@@ -6,15 +6,20 @@
 import sys
 import time
 import traceback
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from dataclasses import asdict, dataclass
+from concurrent.futures import Future, ThreadPoolExecutor, as_completed
+from pathlib import Path
 
 import requests
 
+from kai.models.report_types import ExtendedIncident
+from kai.routes.get_incident_solutions_for_file import (
+    PostGetIncidentSolutionsForFileParams,
+)
+
 # Ensure that we have 'kai' in our import path
 sys.path.append("../../kai")
 from kai.kai_logging import formatter
-from kai.report import Report
+from kai.models.report import Report
 
 KAI_LOG = logging.getLogger(__name__)
 
@@ -27,74 +32,22 @@
 # 2) Limit to specific rulesets/violations we are interested in
 
 
-@dataclass
-class KaiIncident:
-    violation_name: str
-    ruleset_name: str
-    analysis_message: str
-    line_number: int | None = None
-    incident_variables: dict | None = None
-    incident_snip: str | None = None
-
-    @staticmethod
-    def from_incident(incident) -> "KaiIncident":
-        return KaiIncident(
-            incident["violation_name"],
-            incident["ruleset_name"],
-            incident["message"],
-            incident["lineNumber"],  # this may be empty in the report
-            incident["variables"],
-            "",  # We don't plan to use 'incident_snip'
-        )
-
-
-@dataclass
-class KaiRequestParams:
-    application_name: str
-    file_name: str
-    file_contents: str
-    incidents: list[KaiIncident]
-    include_llm_results: bool = True
-
-    @staticmethod
-    def from_incidents(
-        app_name, file_path, file_contents, incidents
-    ) -> "KaiRequestParams":
-        kai_incidents = []
-        for incident in incidents:
-            kai_incidents.append(KaiIncident.from_incident(incident))
-        return KaiRequestParams(app_name, file_path, file_contents, kai_incidents)
-
-    def to_json(self):
-        return json.dumps(asdict(self))
-
-
-def collect_parameters(file_path, violations) -> KaiRequestParams:
-    with open(f"{SAMPLE_APP_DIR}/{file_path}", "r") as f:
-        file_contents = f.read()
-
-    params = KaiRequestParams.from_incidents(
-        APP_NAME, file_path, file_contents, violations
-    )
-    return params
-
-
-def _generate_fix(params: KaiRequestParams):
+def _generate_fix(params: PostGetIncidentSolutionsForFileParams):
     headers = {"Content-type": "application/json", "Accept": "text/plain"}
     response = requests.post(
         ###
         # If we are sending only one incident, we can use this endpoint
         # f"{SERVER_URL}/get_incident_solution",
         ###
         f"{SERVER_URL}/get_incident_solutions_for_file",
-        data=params.to_json(),
+        data=params.model_dump_json(),
         headers=headers,
         timeout=3600,
     )
     return response
 
 
-def generate_fix(params: KaiRequestParams):
+def generate_fix(params: PostGetIncidentSolutionsForFileParams):
     retries_left = 6
     for i in range(retries_left):
         try:
@@ -117,22 +70,27 @@ def generate_fix(params: KaiRequestParams):
     )
 
 
-def parse_response(response):
+def parse_response(response: requests.Response):
     try:
         return response.json()
     except Exception as e:
         KAI_LOG.error(f"Failed to parse response with error: {e}")
         KAI_LOG.error(f"Response: {response}")
         sys.exit(1)
+
     ## TODO:  Below is rough guess at error handling, need to confirm
     # if "error" in response_json:
     #    print(f"Error: {response_json['error']}")
     #    return ""
-    # TODO: When we are batching incidents we get back a parse result so we dont need below
-    # return pydantic_models.parse_file_solution_content(response_json["updated_file"])
+
+    # TODO: When we are batching incidents we get back a parse result so we dont
+    # need below return
+    # pydantic_models.parse_file_solution_content(response_json["updated_file"])
 
 
-def write_to_disk(file_path, updated_file_contents):
+def write_to_disk(file_path: Path, updated_file_contents: dict):
+    file_path = str(file_path)  # Temporary fix for Path object
+
     # We expect that we are overwriting the file, so all directories should exist
     intended_file_path = f"{SAMPLE_APP_DIR}/{file_path}"
     if not os.path.exists(intended_file_path):
@@ -205,36 +163,54 @@ def write_to_disk(file_path, updated_file_contents):
             sys.exit(1)
 
 
-def process_file(file_path, violations, num_impacted_files, count):
+def process_file(
+    file_path: Path,
+    incidents: list[ExtendedIncident],
+    num_impacted_files: int,
+    count: int,
+):
     start = time.time()
     KAI_LOG.info(
-        f"File #{count} of {num_impacted_files} - Processing {file_path} which has {len(violations)} violations"
+        f"File #{count} of {num_impacted_files} - Processing {file_path} which has {len(incidents)} incidents."
+    )
+
+    with open(f"{SAMPLE_APP_DIR}/{str(file_path)}", "r") as f:
+        file_contents = f.read()
+
+    params = PostGetIncidentSolutionsForFileParams(
+        file_name=str(file_path),
+        file_contents=file_contents,
+        application_name=APP_NAME,
+        incidents=incidents,
+        include_llm_results=True,
     )
 
-    params = collect_parameters(file_path, violations)
     response = generate_fix(params)
     KAI_LOG.info(f"Response StatusCode: {response.status_code} for {file_path}\n")
-    updated_file_contents = parse_response(response)
+
+    updated_file_contents: dict = parse_response(response)
     if os.getenv("WRITE_TO_DISK", "").lower() not in ("false", "0", "no"):
         write_to_disk(file_path, updated_file_contents)
+
     end = time.time()
-    return f"{end-start}s to process {file_path} with {len(violations)} violations"
+    return f"{end-start}s to process {file_path} with {len(incidents)} violations"
 
 
-def run_demo(report):
+def run_demo(report: Report):
     impacted_files = report.get_impacted_files()
     num_impacted_files = len(impacted_files)
     remaining_files = num_impacted_files
-    total_violations = sum(len(violations) for violations in impacted_files.values())
-    print(f"{num_impacted_files} files with a total of {total_violations} violations.")
+
+    total_incidents = sum(len(incidents) for incidents in impacted_files.values())
+    print(f"{num_impacted_files} files with a total of {total_incidents} incidents.")
 
     max_workers = int(os.environ.get("KAI_MAX_WORKERS", 8))
     KAI_LOG.info(f"Running in parallel with {max_workers} workers")
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        futures = []
-        for count, (file_path, violations) in enumerate(impacted_files.items(), 1):
+        futures: list[Future[str]] = []
+        for count, (file_path, incidents) in enumerate(impacted_files.items(), 1):
             future = executor.submit(
-                process_file, file_path, violations, num_impacted_files, count
+                process_file, file_path, incidents, num_impacted_files, count
             )
             futures.append(future)
 
@@ -245,6 +221,8 @@ def run_demo(report):
             except Exception as exc:
                 KAI_LOG.error(f"Generated an exception: {exc}")
                 KAI_LOG.error(traceback.format_exc())
+                exit(1)
+
             remaining_files -= 1
             KAI_LOG.info(
                 f"{remaining_files} files remaining from total of {num_impacted_files}"
@@ -258,8 +236,10 @@ def run_demo(report):
     KAI_LOG.setLevel("DEBUG")
 
     start = time.time()
+
     coolstore_analysis_dir = "./analysis/coolstore/output.yaml"
-    r = Report.load_report_from_file(coolstore_analysis_dir)
-    run_demo(r)
+    report = Report.load_report_from_file(coolstore_analysis_dir)
+    run_demo(report)
+
     end = time.time()
     KAI_LOG.info(f"Total time to process '{coolstore_analysis_dir}' was {end-start}s")
diff --git a/kai/config.toml b/kai/config.toml
@@ -4,28 +4,32 @@ log_dir = "$pwd/logs"
 demo_mode = false
 trace_enabled = true
 
-# **Postgresql incident store**
-# [incident_store]
-# provider = "postgresql"
+solution_consumers = ["diff_only", "llm_summary"]
 
+# **Postgresql incident store**
+# ```
 # [incident_store.args]
+# provider = "postgresql"
 # host = "127.0.0.1"
 # database = "kai"
 # user = "kai"
 # password = "dog8code"
+# ```
 
 # **In-memory sqlite incident store**
 # ```
-# [incident_store]
-# provider = "sqlite"
-#
 # [incident_store.args]
+# provider = "sqlite"
 # connection_string = "sqlite:///:memory:"
+# ```
 
 [incident_store]
-provider = "postgresql"
+solution_detectors = "naive"
+solution_producers = "text_only"
 
 [incident_store.args]
+provider = "postgresql"
+
 host = "127.0.0.1"
 database = "kai"
 user = "kai"

diff --git a/kai/constants.py b/kai/constants.py
@@ -1,4 +1,5 @@
 import os
+import pathlib
 
 """
 This file exists because we need to define some constants - specifically file
@@ -18,3 +19,6 @@
 PATH_TEMPLATES = os.path.join(PATH_DATA, "templates")
 
 PATH_LOCAL_REPO = os.path.join(PATH_GIT_ROOT, "samples/sample_repos")
+
+PATH_TESTS = os.path.join(PATH_GIT_ROOT, "tests")
+PATH_TEST_DATA = pathlib.Path(os.path.join(PATH_GIT_ROOT, "tests/test_data"))
diff --git a/kai/data/benchmarks/templates/main.jinja b/kai/data/benchmarks/templates/main.jinja
@@ -34,7 +34,7 @@ Source file contents:
 {% for incident in incidents %}
 ### incident {{ loop.index0 }}
 incident to fix: "{{ incident.analysis_message }}"
-Line number: {{ incident.analysis_line_number }}
+Line number: {{ incident.line_number }}
 {% if incident.solved_example_diff is defined %}
 Solved example:
 ```diff

diff --git a/kai/data/templates/main.jinja b/kai/data/templates/main.jinja
@@ -33,21 +33,10 @@ Source file contents:
 
 {% for incident in incidents %}
 ### incident {{ loop.index0 }}
-incident to fix: "{{ incident.analysis_message }}"
-Line number: {{ incident.analysis_line_number }}
-{% if incident.solved_example_diff is defined %}
-Solved example:
-```diff
-{{ incident.solved_example_diff }}
-```{# {% elif incident.solved_example_file is defined %}
-Solved example before changes:
-```{{ incident.src_file_language }}
-{{ incident.solved_example_file.before }}
-```
-Solved example after changes:
-```{{ incident.src_file_language }}
-{{ incident.solved_example_file.after }}
-``` #}
+incident to fix: "{{ incident.message }}"
+Line number: {{ incident.line_number }}
+{% if incident.solution_str is defined %}
+{{ incident.solution_str }}
 {% endif %}
 {% endfor %}
 

diff --git a/kai/data/templates/solution_handling/before_and_after.jinja b/kai/data/templates/solution_handling/before_and_after.jinja
@@ -0,0 +1,9 @@
+Solution before changes:
+```
+{{ solution.original_code }}
+```
+
+Solution after changes:
+```
+{{ solution.updated_code }}
+```
diff --git a/kai/data/templates/solution_handling/diff_only.jinja b/kai/data/templates/solution_handling/diff_only.jinja
@@ -0,0 +1,3 @@
+Solution diff:
+```diff
+{{ solution.file_diff }}
diff --git a/kai/data/templates/solution_handling/generation.jinja b/kai/data/templates/solution_handling/generation.jinja
@@ -0,0 +1,42 @@
+{% if model_provider.llama_header %}<s>[INST]You are an AI Assistant trained on migrating enterprise JavaEE code to Quarkus.<<SYS>>{% endif %}
+
+I will give you an example of a JavaEE file that has been migrated to Quarkus.
+
+You will need to reason through the changes required to update the JavaEE file to Quarkus.
+
+You will then provide an step-by-step explanation of the changes required so that someone could recreate it in a similar situtaion.
+
+# Input information
+
+## Input File
+
+File name: "{{ src_file_name }}"
+Source file contents:
+```{{ src_file_language }}
+{{ src_file_contents }}
+```
+
+## Incident
+
+Incident that was fixed: "{{ incident.analysis_message }}"
+Line number: {{ incident.line_number }}
+
+## Solution
+
+File name: "{{ sln_file_name }}"
+Source file contents:
+```{{ sln_file_language }}
+{{ sln_file_contents }}
+```
+
+# Output Instructions
+Structure your output in Markdown format such as:
+
+## Reasoning
+Write the step by step reasoning in this markdown section. If you are unsure of a step or reasoning, clearly state you are unsure and why.
+
+## Additional Information (optional)
+
+If you have any additional details or steps that need to be performed, put it here.
+
+{% if model_provider.llama_header %}[/INST]{% endif %}
diff --git a/kai/data/templates/solution_handling/llm_summary.jinja b/kai/data/templates/solution_handling/llm_summary.jinja
@@ -0,0 +1,3 @@
+Summary of changes for solution:
+
+{{ solution.llm_summary }}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Summary of changes for solution:

		{{ solution.llm_summary }}