|
| 1 | +import pytest |
| 2 | +from athena.text import Exercise, Submission, Feedback |
| 3 | +from athena.schemas.exercise_type import ExerciseType |
| 4 | + |
| 5 | +@pytest.mark.asyncio |
| 6 | +async def test_generate_suggestions_algorithm_explanation(real_config): |
| 7 | + """Test feedback generation for explaining an algorithm.""" |
| 8 | + exercise = Exercise( |
| 9 | + id=1, |
| 10 | + title="Algorithm Explanation Exercise", |
| 11 | + type=ExerciseType.text, |
| 12 | + max_points=10, |
| 13 | + bonus_points=2, |
| 14 | + grading_instructions="Explain the algorithm clearly, including its time complexity and space complexity.", |
| 15 | + problem_statement="Explain how the binary search algorithm works. Include its time complexity and when it should be used.", |
| 16 | + example_solution="Binary search is an efficient algorithm for finding an element in a sorted array. It works by repeatedly dividing the search interval in half. If the value of the search key is less than the item in the middle of the interval, narrow the interval to the lower half. Otherwise, narrow it to the upper half. The time complexity is O(log n) because we divide the search space in half each time. Space complexity is O(1) as we only use a constant amount of extra space.", |
| 17 | + grading_criteria=[] |
| 18 | + ) |
| 19 | + |
| 20 | + submission = Submission( |
| 21 | + id=1, |
| 22 | + exerciseId=exercise.id, |
| 23 | + text="Binary search is when you look for something in a sorted list by checking the middle element. If it's not there, you look in the left or right half. It's pretty fast." |
| 24 | + ) |
| 25 | + |
| 26 | + feedbacks = await real_config.generate_suggestions( |
| 27 | + exercise=exercise, |
| 28 | + submission=submission, |
| 29 | + config=real_config, |
| 30 | + debug=False, |
| 31 | + is_graded=True |
| 32 | + ) |
| 33 | + |
| 34 | + for feedback in feedbacks: |
| 35 | + print(feedback.description) |
| 36 | + print("--------------------------------") |
| 37 | + |
| 38 | + assert isinstance(feedbacks, list) |
| 39 | + assert len(feedbacks) > 0, "Expected feedback about algorithm explanation" |
| 40 | + |
| 41 | + # Combine all feedback for analysis |
| 42 | + all_feedback = " ".join(f.description.lower() for f in feedbacks) |
| 43 | + |
| 44 | + # Technical Accuracy Checks - must include at least half of the terms |
| 45 | + required_complexity_terms = ["time complexity", "o(log n)", "space complexity", "o(1)"] |
| 46 | + found_complexity = [term for term in required_complexity_terms if term in all_feedback] |
| 47 | + min_required = len(required_complexity_terms) // 2 |
| 48 | + assert len(found_complexity) >= min_required, f"Feedback must include at least {min_required} complexity terms. Found: {', '.join(found_complexity)}" |
| 49 | + |
| 50 | + required_algorithm_terms = ["sorted", "interval", "element"] |
| 51 | + found_algorithm = [term for term in required_algorithm_terms if term in all_feedback] |
| 52 | + min_required = len(required_algorithm_terms) // 2 |
| 53 | + assert len(found_algorithm) >= min_required, f"Feedback must include at least {min_required} algorithm terms. Found: {', '.join(found_algorithm)}" |
| 54 | + |
| 55 | +@pytest.mark.asyncio |
| 56 | +async def test_generate_suggestions_code_documentation(real_config): |
| 57 | + """Test feedback generation for code documentation exercise.""" |
| 58 | + exercise = Exercise( |
| 59 | + id=2, |
| 60 | + title="Code Documentation Exercise", |
| 61 | + type=ExerciseType.text, |
| 62 | + max_points=10, |
| 63 | + bonus_points=2, |
| 64 | + grading_instructions="Document the code's purpose, parameters, return values, and any important notes about usage.", |
| 65 | + problem_statement="Write documentation for a function that calculates the factorial of a number. Include its purpose, parameters, return value, and any edge cases to consider.", |
| 66 | + example_solution="This function calculates the factorial of a non-negative integer n. Parameters: n (int) - The number to calculate factorial for. Returns: int - The factorial of n. Note: This function will raise a ValueError if n is negative. The factorial of 0 is defined as 1.", |
| 67 | + grading_criteria=[] |
| 68 | + ) |
| 69 | + |
| 70 | + submission = Submission( |
| 71 | + id=2, |
| 72 | + exerciseId=exercise.id, |
| 73 | + text="This function finds the factorial. It takes a number and multiplies it by all numbers below it until it reaches 1." |
| 74 | + ) |
| 75 | + |
| 76 | + feedbacks = await real_config.generate_suggestions( |
| 77 | + exercise=exercise, |
| 78 | + submission=submission, |
| 79 | + config=real_config, |
| 80 | + debug=False, |
| 81 | + is_graded=True |
| 82 | + ) |
| 83 | + |
| 84 | + for feedback in feedbacks: |
| 85 | + print(feedback.description) |
| 86 | + print("--------------------------------") |
| 87 | + |
| 88 | + assert isinstance(feedbacks, list) |
| 89 | + assert len(feedbacks) > 0, "Expected feedback about documentation" |
| 90 | + |
| 91 | + # Combine all feedback for analysis |
| 92 | + all_feedback = " ".join(f.description.lower() for f in feedbacks) |
| 93 | + |
| 94 | + # Documentation Requirements - must include at least half of the terms |
| 95 | + required_doc_terms = ["parameter", "return", "edge case", "negative", "0"] |
| 96 | + found_doc = [term for term in required_doc_terms if term in all_feedback] |
| 97 | + min_required = len(required_doc_terms) // 2 |
| 98 | + assert len(found_doc) >= min_required, f"Feedback must include at least {min_required} documentation terms. Found: {', '.join(found_doc)}" |
| 99 | + |
| 100 | +@pytest.mark.asyncio |
| 101 | +async def test_generate_suggestions_design_pattern(real_config): |
| 102 | + """Test feedback generation for explaining a design pattern.""" |
| 103 | + exercise = Exercise( |
| 104 | + id=3, |
| 105 | + title="Design Pattern Explanation Exercise", |
| 106 | + type=ExerciseType.text, |
| 107 | + max_points=10, |
| 108 | + bonus_points=2, |
| 109 | + grading_instructions="Explain the design pattern, its use cases, advantages, and disadvantages.", |
| 110 | + problem_statement="Explain the Singleton design pattern. Include when it should be used and its potential drawbacks.", |
| 111 | + example_solution="The Singleton pattern ensures a class has only one instance and provides a global point of access to it. It's useful when exactly one object is needed to coordinate actions across the system. Advantages include controlled access to the sole instance and reduced namespace pollution. Disadvantages include potential violation of the Single Responsibility Principle and difficulty in unit testing due to global state.", |
| 112 | + grading_criteria=[] |
| 113 | + ) |
| 114 | + |
| 115 | + submission = Submission( |
| 116 | + id=3, |
| 117 | + exerciseId=exercise.id, |
| 118 | + text="Singleton is when you make sure there's only one copy of something in your program. It's good for saving memory." |
| 119 | + ) |
| 120 | + |
| 121 | + feedbacks = await real_config.generate_suggestions( |
| 122 | + exercise=exercise, |
| 123 | + submission=submission, |
| 124 | + config=real_config, |
| 125 | + debug=False, |
| 126 | + is_graded=True |
| 127 | + ) |
| 128 | + |
| 129 | + for feedback in feedbacks: |
| 130 | + print(feedback.description) |
| 131 | + print("--------------------------------") |
| 132 | + |
| 133 | + assert isinstance(feedbacks, list) |
| 134 | + assert len(feedbacks) > 0, "Expected feedback about design pattern explanation" |
| 135 | + |
| 136 | + # Combine all feedback for analysis |
| 137 | + all_feedback = " ".join(f.description.lower() for f in feedbacks) |
| 138 | + |
| 139 | + # Design Pattern Requirements - must include at least half of the terms |
| 140 | + required_pattern_terms = ["instance", "advantage", "drawback", "use"] |
| 141 | + found_pattern = [term for term in required_pattern_terms if term in all_feedback] |
| 142 | + min_required = len(required_pattern_terms) // 2 |
| 143 | + assert len(found_pattern) >= min_required, f"Feedback must include at least {min_required} design pattern terms. Found: {', '.join(found_pattern)}" |
0 commit comments