ls1intum · ahmetsenturk · Apr 25, 2025 · Apr 30, 2025 · Apr 30, 2025 · May 1, 2025
@@ -7,9 +7,10 @@
 from module_text_llm.basic_approach import BasicApproachConfig
 from module_text_llm.divide_and_conquer import DivideAndConquerConfig
 from module_text_llm.cot_learner_profile import COTLearnerProfileConfig
+from module_text_llm.llm_as_profiler import LLMAsProfilerConfig
 
-ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig, DivideAndConquerConfig, SelfConsistencyConfig, COTLearnerProfileConfig]
 
+ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig, DivideAndConquerConfig, SelfConsistencyConfig, COTLearnerProfileConfig, LLMAsProfilerConfig]
 @config_schema_provider
 class Configuration(BaseModel):
     debug: bool = Field(default=False, description="Enable debug mode.")

@@ -0,0 +1,19 @@
+from pydantic import Field
+from typing import Literal
+
+from athena.text import Exercise, Submission
+from module_text_llm.approach_config import ApproachConfig
+from module_text_llm.llm_as_profiler.prompt_generate_feedback import GenerateSuggestionsPrompt
+from module_text_llm.llm_as_profiler.prompt_profiler import ProfilerPrompt
+from module_text_llm.llm_as_profiler.generate_suggestions import generate_suggestions
+from athena.schemas.learner_profile import LearnerProfile
+
+
+class LLMAsProfilerConfig(ApproachConfig):
+    type: Literal['llm_as_profiler'] = 'llm_as_profiler'
+    profiler_prompt: ProfilerPrompt = Field(default=ProfilerPrompt())
+    generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
+
+    async def generate_suggestions(self, exercise: Exercise, submission: Submission, config, *, debug: bool,
+                                   is_graded: bool, learner_profile: LearnerProfile = None):
+        return await generate_suggestions(exercise, submission, config, debug=debug, is_graded=is_graded)
@@ -0,0 +1,127 @@
+from typing import List
+
+from athena import emit_meta
+from athena.text import Exercise, Submission, Feedback
+from athena.logger import logger
+from llm_core.utils.llm_utils import (
+    get_chat_prompt_with_formatting_instructions,
+    check_prompt_length_and_omit_features_if_necessary,
+    num_tokens_from_prompt,
+)
+from llm_core.utils.predict_and_parse import predict_and_parse
+from module_text_llm.approach_config import ApproachConfig
+from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, \
+    format_grading_instructions
+from module_text_llm.llm_as_profiler.prompt_profiler import SubmissionCompetencyProfile
+from module_text_llm.llm_as_profiler.prompt_generate_feedback import AssessmentModel
+
+async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool,
+                               is_graded: bool) -> List[Feedback]:
+    model = config.model.get_model()  # type: ignore[attr-defined]
+
+    prompt_input = {
+        "grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
+        "problem_statement": exercise.problem_statement or "No problem statement.",
+        "example_solution": exercise.example_solution,
+        "submission": add_sentence_numbers(submission.text)
+    }
+
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model,
+        system_message=config.profiler_prompt.system_message,
+        human_message=config.profiler_prompt.human_message,
+        pydantic_object=SubmissionCompetencyProfile
+    )
+
+    # Check if the prompt is too long and omit features if necessary (in order of importance)
+    omittable_features = ["example_solution", "problem_statement", "grading_instructions"]
+    prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary(
+        prompt=chat_prompt,
+        prompt_input=prompt_input,
+        max_input_tokens=config.max_input_tokens,
+        omittable_features=omittable_features,
+        debug=debug
+    )
+
+    # Skip if the prompt is too long
+    if not should_run:
+        logger.warning("Input too long. Skipping.")
+        if debug:
+            emit_meta("prompt", chat_prompt.format(**prompt_input))
+            emit_meta("error",
+                      f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
+        return []
+
+    initial_result: SubmissionCompetencyProfile = await predict_and_parse(
+        model=model,
+        chat_prompt=chat_prompt,
+        prompt_input=prompt_input,
+        pydantic_object=SubmissionCompetencyProfile,
+        tags=[
+            f"exercise-{exercise.id}",
+            f"submission-{submission.id}",
+        ],
+        use_function_calling=True
+    )
+
+    second_prompt_input = {
+        "max_points": exercise.max_points,
+        "competency_analysis": initial_result.dict() if initial_result is not None else None,
+        "submission": add_sentence_numbers(submission.text),
+        "grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
+        "problem_statement": exercise.problem_statement or "No problem statement.",
+        "example_solution": exercise.example_solution
+    }
+
+    second_chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model,
+        system_message=config.generate_suggestions_prompt.second_system_message,
+        human_message=config.generate_suggestions_prompt.answer_message,
+        pydantic_object=AssessmentModel)
+
+    result: AssessmentModel = await predict_and_parse(
+        model=model,
+        chat_prompt=second_chat_prompt,
+        prompt_input=second_prompt_input,
+        pydantic_object=AssessmentModel,
+        tags=[
+            f"exercise-{exercise.id}",
+            f"submission-{submission.id}",
+        ],
+        use_function_calling=True
+    )
+
+    if debug:
+        emit_meta("generate_suggestions", {
+            "prompt": second_chat_prompt.format(**second_prompt_input),
+            "result": result.dict() if result is not None else None
+        })
+
+    if result is None:
+        return []
+
+    grading_instruction_ids = set(
+        grading_instruction.id
+        for criterion in exercise.grading_criteria or []
+        for grading_instruction in criterion.structured_grading_instructions
+    )
+
+    feedbacks = []
+    for feedback in result.feedbacks:
+        index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end,
+                                                                 submission.text)
+        grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None
+        feedbacks.append(Feedback(
+            exercise_id=exercise.id,
+            submission_id=submission.id,
+            title=feedback.title,
+            description=feedback.description,
+            index_start=index_start,
+            index_end=index_end,
+            credits=feedback.credits,
+            is_graded=is_graded,
+            structured_grading_instruction_id=grading_instruction_id,
+            meta={}
+        ))
+
+    return feedbacks
@@ -0,0 +1,125 @@
+from pydantic import BaseModel, Field
+from enum import Enum
+from typing import List, Optional
+
+system_message = """
+You are a grading assistant at a university. Your task is to assess student submissions for text-based exercises and provide constructive, respectful, and helpful feedback without revealing the correct solution.
+
+You will receive:
+- A problem statement
+- A sample solution (for internal reference only)
+- Grading instructions
+- The student's submission (with line numbers)
+- The maximum score
+- A structured analysis of the competencies required to solve the task, and how the student performed for each one
+
+Instructions:
+1. Read the problem statement to understand what the student was asked to do.
+2. Use the sample solution only to understand the intended reasoning and structure.
+3. Review the grading instructions to identify how responses are evaluated.
+4. Review the structured competency analysis. Each item includes:
+   - A competency the student was expected to demonstrate
+   - The associated cognitive level (e.g., Understand, Apply, Analyze)
+   - An evaluation of how well the student demonstrated it (Correct, Partially Correct, Attempted Incorrectly, Not Attempted)
+   - Optional evidence with line numbers
+5. Follow the below steps for generating the each point of feedback:
+    - Write a short title summarizing the feedback
+    - Include line_start and line_end if the feedback refers to a specific part of the answer
+    - Include credits (points awarded or deducted)
+    - Suggest the action student should take regarding the diagnosis (Review Concept, Improve Explanation, Extend Thinking)
+        - Review Concept: When student faces conceptual misunderstandings; suggest them to revisit foundational material. Tell them "Go over this subject/topic" without explaining/revealing answer.
+        - Improve Explanation: When student is partially correct; suggest to elaborate or clarify and try again to strengthen their answer. Tell them what they should do better, do not reveal the solution
+        - Extend Thinking: When student is fully or mostly correct; deepen insight or explore related ideas. Provide a clear actionable follow-up question or things they can they take a look further.
+    - Write a clear explanation directly addressed to the student according to the suggested action
+    - Assign credits gained or lost for this competency, aligned with grading instruction (if available)
+    - Include grading_instruction_id if related to a rubric item
+    - Ensure feedback adds value beyond what the student already wrote - avoid simply agreeing or repeating. 
+
+You may also provide general feedback that does not refer to any specific line. In that case, set line_start and line_end to null, and credits to 0.
+
+Guidelines:
+- Do not, no matter what, reveal the solution
+- Do not exceed the maximum total score: {max_points}
+- Do not copy text from the student's answer, rubric, or solution
+- Do not repeat the student's sentences
+- Do not include metadata or extra commentary
+- Do not limit the feedback to structured competency analysis items
+- Cover all the grading instructions and questions
+
+<Inputs>
+
+Structured Competency Analysis:
+{competency_analysis}
+
+Max Score:
+{max_points}
+
+Problem Statement:
+{problem_statement}
+
+Sample Solution:
+{example_solution}
+
+Grading Instructions:
+{grading_instructions}
+"""
+
+
+human_message = """\
+Student\'s submission to grade (with sentence numbers <number>: <sentence>):
+\"\"\"
+{submission}
+\"\"\"\
+"""
+
+
+# Input Prompt
+
+class GenerateSuggestionsPrompt(BaseModel):
+    """A prompt model for generating structured feedback on student submissions.
+
+    This class provides the system and human messages used to instruct an LLM to analyze
+    student submissions and generate constructive feedback based on competency analysis,
+    grading instructions, and problem requirements.
+    """
+    second_system_message: str = Field(default=system_message,
+                                       description="Message for priming AI behavior and instructing it what to do.")
+    answer_message: str = Field(default=human_message,
+                                description="Message from a human. The input on which the AI is supposed to act.")
+
+# Output Object
+
+class SuggestedAction(str, Enum):
+    REVIEW_CONCEPT = "Review Concept"     # For conceptual misunderstandings; revisit foundational material
+    IMPROVE_EXPLANATION = "Improve Explanation"  # Partially correct; elaborate or clarify to strengthen understanding
+    EXTEND_THINKING = "Extend Thinking"   # Fully or mostly correct; deepen insight or explore related ideas
+
+
+class FeedbackModel(BaseModel):
+    title: str = Field(
+        description="A very short label summarizing the issue or focus of the feedback (e.g., 'Missing Concept', 'Strong Start')."
+    )
+    description: str = Field(
+        description="Student-facing feedback message that explains the issue or suggestion in a constructive and clear way."
+    )
+    suggested_action: SuggestedAction = Field(
+        description="Suggested action for the student as a next step."
+    )
+    line_start: Optional[int] = Field(
+        description="Referenced starting line number from the student's submission, or empty if unreferenced"
+    )
+    line_end: Optional[int] = Field(
+        description="Referenced ending line number from the student's submission, or empty if unreferenced"
+    )
+    credits: float = Field(
+        default=0.0,
+        description="The number of points awarded or deducted for this feedback item."
+    )
+    grading_instruction_id: Optional[int] = Field(
+        description="The ID of the grading instruction or rubric item related to this feedback, if applicable."
+    )
+
+
+class AssessmentModel(BaseModel):
+    """Collection of feedbacks making up an assessment"""
+    feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")