ls1intum
diff --git a/‎athena/scripts/test_modules.py
Lines changed: 45 additions & 11 deletions b/‎athena/scripts/test_modules.py
Lines changed: 45 additions & 11 deletions
diff --git a/‎athena/tests/modules/text/module_text_llm/conftest.py renamed to ‎athena/tests/modules/text/module_text_llm/mock/conftest.py b/‎athena/tests/modules/text/module_text_llm/conftest.py renamed to ‎athena/tests/modules/text/module_text_llm/mock/conftest.py
diff --git a/‎athena/tests/modules/text/module_text_llm/test_basic_approach_mock.py renamed to ‎athena/tests/modules/text/module_text_llm/mock/test_basic_approach_mock.py b/‎athena/tests/modules/text/module_text_llm/test_basic_approach_mock.py renamed to ‎athena/tests/modules/text/module_text_llm/mock/test_basic_approach_mock.py
diff --git a/‎athena/tests/modules/text/module_text_llm/real/conftest.py
Lines changed: 32 additions & 0 deletions b/‎athena/tests/modules/text/module_text_llm/real/conftest.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎athena/tests/modules/text/module_text_llm/real/test_basic_approach_real.py
Lines changed: 143 additions & 0 deletions b/‎athena/tests/modules/text/module_text_llm/real/test_basic_approach_real.py
Lines changed: 143 additions & 0 deletions
@@ -1,9 +1,15 @@
 import subprocess
 import os
 import sys
+import argparse
 
 
 def main():
+    parser = argparse.ArgumentParser(description='Run tests for Athena modules')
+    parser.add_argument('--include-real', action='store_true',
+                      help='Include real tests in addition to mock tests')
+    args = parser.parse_args()
+
     modules = [
         "docs",
         "log_viewer",
@@ -43,19 +49,47 @@ def main():
         print(f"Using Python path: {python_path}")
 
         try:
-            # Install pytest in the virtual environment
-            print(f"Installing pytest for {module}...")
-            subprocess.run([pip_path, "install", "pytest"], check=True, capture_output=True, text=True)
+            # Install pytest and pytest-asyncio in the virtual environment
+            print(f"Installing pytest and pytest-asyncio for {module}...")
+            subprocess.run([pip_path, "install", "pytest", "pytest-asyncio"], check=True, capture_output=True, text=True)
 
-            # Run pytest using the module's virtual environment
-            result = subprocess.run([python_path, "-m", "pytest", test_dir], capture_output=True, text=True)
-            if result.returncode != 0:
-                print(f"Tests failed for {module}:")
-                print(result.stdout)
-                print(result.stderr)
-                success = False
+            # Run pytest using the module's virtual environment, only running tests from mock directories
+            mock_test_dir = os.path.join(test_dir, "mock")
+            if os.path.exists(mock_test_dir):
+                print(f"\nRunning mock tests for {module}...")
+                result = subprocess.run([python_path, "-m", "pytest", mock_test_dir, "-v"], check=False)
+                if result.returncode != 0:
+                    print(f"\nMock tests failed for {module}")
+                    success = False
+                else:
+                    print(f"\nMock tests passed for {module}")
             else:
-                print(f"Tests passed for {module}")
+                print(f"No mock tests found for {module}, skipping...")
+
+            # Run real tests if requested and if module has real tests
+            if args.include_real:
+                real_test_dir = os.path.join(test_dir, "real")
+                if os.path.exists(real_test_dir):
+                    # Change to the module directory for real tests
+                    module_dir = os.path.join(os.getcwd(), module)
+                    if os.path.exists(module_dir):
+                        original_dir = os.getcwd()
+                        os.chdir(module_dir)
+                        print(f"\nRunning real tests from {module_dir}...")
+                        # Run pytest with the real test directory as the test path
+                        result = subprocess.run([python_path, "-m", "pytest", '../../../'+real_test_dir, "-v"], check=False)
+                        if result.returncode != 0:
+                            print(f"\nReal tests failed for {module}")
+                            success = False
+                        else:
+                            print(f"\nReal tests passed for {module}")
+                        # Change back to original directory
+                        os.chdir(original_dir)
+                    else:
+                        print(f"Module directory not found for {module}, skipping real tests...")
+                else:
+                    print(f"No real tests found for {module}, skipping...")
+
         except Exception as e:
             print(f"Error running tests for {module}: {str(e)}")
             success = False
 
@@ -0,0 +1,32 @@
+import os
+import pytest
+import nltk
+import asyncio
+from module_text_llm.basic_approach import BasicApproachConfig
+from llm_core.models.openai import OpenAIModelConfig
+
+
+@pytest.fixture(scope="session", autouse=True)
+def setup_environment():
+    
+    nltk.download('punkt', quiet=True)
+    nltk.download('punkt_tab', quiet=True)
+
+@pytest.fixture(scope="session")
+def event_loop():
+    """Create an instance of the default event loop for each test case."""
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    yield loop
+    loop.close()
+
+@pytest.fixture
+def real_config():
+    """Create a real configuration for testing with Azure OpenAI."""
+    return BasicApproachConfig(
+        max_input_tokens=5000,
+        model=OpenAIModelConfig(
+            model_name="azure_openai_gpt-4o",  
+            get_model=lambda: None  # This will be set by the module
+        ),
+        type="basic"
+    ) 
@@ -0,0 +1,143 @@
+import pytest
+from athena.text import Exercise, Submission, Feedback
+from athena.schemas.exercise_type import ExerciseType
+
+@pytest.mark.asyncio
+async def test_generate_suggestions_algorithm_explanation(real_config):
+    """Test feedback generation for explaining an algorithm."""
+    exercise = Exercise(
+        id=1,
+        title="Algorithm Explanation Exercise",
+        type=ExerciseType.text,
+        max_points=10,
+        bonus_points=2,
+        grading_instructions="Explain the algorithm clearly, including its time complexity and space complexity.",
+        problem_statement="Explain how the binary search algorithm works. Include its time complexity and when it should be used.",
+        example_solution="Binary search is an efficient algorithm for finding an element in a sorted array. It works by repeatedly dividing the search interval in half. If the value of the search key is less than the item in the middle of the interval, narrow the interval to the lower half. Otherwise, narrow it to the upper half. The time complexity is O(log n) because we divide the search space in half each time. Space complexity is O(1) as we only use a constant amount of extra space.",
+        grading_criteria=[]
+    )
+    
+    submission = Submission(
+        id=1,
+        exerciseId=exercise.id,
+        text="Binary search is when you look for something in a sorted list by checking the middle element. If it's not there, you look in the left or right half. It's pretty fast."
+    )
+    
+    feedbacks = await real_config.generate_suggestions(
+        exercise=exercise,
+        submission=submission,
+        config=real_config,
+        debug=False,
+        is_graded=True
+    )
+    
+    for feedback in feedbacks:
+        print(feedback.description)
+        print("--------------------------------")
+    
+    assert isinstance(feedbacks, list)
+    assert len(feedbacks) > 0, "Expected feedback about algorithm explanation"
+    
+    # Combine all feedback for analysis
+    all_feedback = " ".join(f.description.lower() for f in feedbacks)
+    
+    # Technical Accuracy Checks - must include at least half of the terms
+    required_complexity_terms = ["time complexity", "o(log n)", "space complexity", "o(1)"]
+    found_complexity = [term for term in required_complexity_terms if term in all_feedback]
+    min_required = len(required_complexity_terms) // 2
+    assert len(found_complexity) >= min_required, f"Feedback must include at least {min_required} complexity terms. Found: {', '.join(found_complexity)}"
+    
+    required_algorithm_terms = ["sorted", "interval", "element"]
+    found_algorithm = [term for term in required_algorithm_terms if term in all_feedback]
+    min_required = len(required_algorithm_terms) // 2
+    assert len(found_algorithm) >= min_required, f"Feedback must include at least {min_required} algorithm terms. Found: {', '.join(found_algorithm)}"
+
+@pytest.mark.asyncio
+async def test_generate_suggestions_code_documentation(real_config):
+    """Test feedback generation for code documentation exercise."""
+    exercise = Exercise(
+        id=2,
+        title="Code Documentation Exercise",
+        type=ExerciseType.text,
+        max_points=10,
+        bonus_points=2,
+        grading_instructions="Document the code's purpose, parameters, return values, and any important notes about usage.",
+        problem_statement="Write documentation for a function that calculates the factorial of a number. Include its purpose, parameters, return value, and any edge cases to consider.",
+        example_solution="This function calculates the factorial of a non-negative integer n. Parameters: n (int) - The number to calculate factorial for. Returns: int - The factorial of n. Note: This function will raise a ValueError if n is negative. The factorial of 0 is defined as 1.",
+        grading_criteria=[]
+    )
+    
+    submission = Submission(
+        id=2,
+        exerciseId=exercise.id,
+        text="This function finds the factorial. It takes a number and multiplies it by all numbers below it until it reaches 1."
+    )
+    
+    feedbacks = await real_config.generate_suggestions(
+        exercise=exercise,
+        submission=submission,
+        config=real_config,
+        debug=False,
+        is_graded=True
+    )
+    
+    for feedback in feedbacks:
+        print(feedback.description)
+        print("--------------------------------")
+    
+    assert isinstance(feedbacks, list)
+    assert len(feedbacks) > 0, "Expected feedback about documentation"
+    
+    # Combine all feedback for analysis
+    all_feedback = " ".join(f.description.lower() for f in feedbacks)
+    
+    # Documentation Requirements - must include at least half of the terms
+    required_doc_terms = ["parameter", "return", "edge case", "negative", "0"]
+    found_doc = [term for term in required_doc_terms if term in all_feedback]
+    min_required = len(required_doc_terms) // 2
+    assert len(found_doc) >= min_required, f"Feedback must include at least {min_required} documentation terms. Found: {', '.join(found_doc)}"
+
+@pytest.mark.asyncio
+async def test_generate_suggestions_design_pattern(real_config):
+    """Test feedback generation for explaining a design pattern."""
+    exercise = Exercise(
+        id=3,
+        title="Design Pattern Explanation Exercise",
+        type=ExerciseType.text,
+        max_points=10,
+        bonus_points=2,
+        grading_instructions="Explain the design pattern, its use cases, advantages, and disadvantages.",
+        problem_statement="Explain the Singleton design pattern. Include when it should be used and its potential drawbacks.",
+        example_solution="The Singleton pattern ensures a class has only one instance and provides a global point of access to it. It's useful when exactly one object is needed to coordinate actions across the system. Advantages include controlled access to the sole instance and reduced namespace pollution. Disadvantages include potential violation of the Single Responsibility Principle and difficulty in unit testing due to global state.",
+        grading_criteria=[]
+    )
+    
+    submission = Submission(
+        id=3,
+        exerciseId=exercise.id,
+        text="Singleton is when you make sure there's only one copy of something in your program. It's good for saving memory."
+    )
+    
+    feedbacks = await real_config.generate_suggestions(
+        exercise=exercise,
+        submission=submission,
+        config=real_config,
+        debug=False,
+        is_graded=True
+    )
+    
+    for feedback in feedbacks:
+        print(feedback.description)
+        print("--------------------------------")
+    
+    assert isinstance(feedbacks, list)
+    assert len(feedbacks) > 0, "Expected feedback about design pattern explanation"
+    
+    # Combine all feedback for analysis
+    all_feedback = " ".join(f.description.lower() for f in feedbacks)
+    
+    # Design Pattern Requirements - must include at least half of the terms
+    required_pattern_terms = ["instance", "advantage", "drawback", "use"]
+    found_pattern = [term for term in required_pattern_terms if term in all_feedback]
+    min_required = len(required_pattern_terms) // 2
+    assert len(found_pattern) >= min_required, f"Feedback must include at least {min_required} design pattern terms. Found: {', '.join(found_pattern)}"