Skip to content

Athena: Add a llm as profiler approach for text exercise feedback generation #105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from module_text_llm.basic_approach import BasicApproachConfig
from module_text_llm.divide_and_conquer import DivideAndConquerConfig
from module_text_llm.cot_learner_profile import COTLearnerProfileConfig
from module_text_llm.llm_as_profiler import LLMAsProfilerConfig

ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig, DivideAndConquerConfig, SelfConsistencyConfig, COTLearnerProfileConfig]

ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig, DivideAndConquerConfig, SelfConsistencyConfig, COTLearnerProfileConfig, LLMAsProfilerConfig]
@config_schema_provider
class Configuration(BaseModel):
debug: bool = Field(default=False, description="Enable debug mode.")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pydantic import Field
from typing import Literal

from athena.text import Exercise, Submission
from module_text_llm.approach_config import ApproachConfig
from module_text_llm.llm_as_profiler.prompt_generate_feedback import GenerateSuggestionsPrompt
from module_text_llm.llm_as_profiler.prompt_profiler import ProfilerPrompt
from module_text_llm.llm_as_profiler.generate_suggestions import generate_suggestions
from athena.schemas.learner_profile import LearnerProfile


class LLMAsProfilerConfig(ApproachConfig):
type: Literal['llm_as_profiler'] = 'llm_as_profiler'
profiler_prompt: ProfilerPrompt = Field(default=ProfilerPrompt())
generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())

async def generate_suggestions(self, exercise: Exercise, submission: Submission, config, *, debug: bool,
is_graded: bool, learner_profile: LearnerProfile = None):
return await generate_suggestions(exercise, submission, config, debug=debug, is_graded=is_graded)
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from typing import List

from athena import emit_meta
from athena.text import Exercise, Submission, Feedback
from athena.logger import logger
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
check_prompt_length_and_omit_features_if_necessary,
num_tokens_from_prompt,
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_text_llm.approach_config import ApproachConfig
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, \
format_grading_instructions
from module_text_llm.llm_as_profiler.prompt_profiler import SubmissionCompetencyProfile
from module_text_llm.llm_as_profiler.prompt_generate_feedback import AssessmentModel

async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool,
is_graded: bool) -> List[Feedback]:
model = config.model.get_model() # type: ignore[attr-defined]

prompt_input = {
"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
"problem_statement": exercise.problem_statement or "No problem statement.",
"example_solution": exercise.example_solution,
"submission": add_sentence_numbers(submission.text)
}

chat_prompt = get_chat_prompt_with_formatting_instructions(
model=model,
system_message=config.profiler_prompt.system_message,
human_message=config.profiler_prompt.human_message,
pydantic_object=SubmissionCompetencyProfile
)

# Check if the prompt is too long and omit features if necessary (in order of importance)
omittable_features = ["example_solution", "problem_statement", "grading_instructions"]
prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary(
prompt=chat_prompt,
prompt_input=prompt_input,
max_input_tokens=config.max_input_tokens,
omittable_features=omittable_features,
debug=debug
)

# Skip if the prompt is too long
if not should_run:
logger.warning("Input too long. Skipping.")
if debug:
emit_meta("prompt", chat_prompt.format(**prompt_input))
emit_meta("error",
f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
return []

initial_result: SubmissionCompetencyProfile = await predict_and_parse(
model=model,
chat_prompt=chat_prompt,
prompt_input=prompt_input,
pydantic_object=SubmissionCompetencyProfile,
tags=[
f"exercise-{exercise.id}",
f"submission-{submission.id}",
],
use_function_calling=True
)

second_prompt_input = {
"max_points": exercise.max_points,
"competency_analysis": initial_result.dict() if initial_result is not None else None,
"submission": add_sentence_numbers(submission.text),
"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
"problem_statement": exercise.problem_statement or "No problem statement.",
"example_solution": exercise.example_solution
}

second_chat_prompt = get_chat_prompt_with_formatting_instructions(
model=model,
system_message=config.generate_suggestions_prompt.second_system_message,
human_message=config.generate_suggestions_prompt.answer_message,
pydantic_object=AssessmentModel)

result: AssessmentModel = await predict_and_parse(
model=model,
chat_prompt=second_chat_prompt,
prompt_input=second_prompt_input,
pydantic_object=AssessmentModel,
tags=[
f"exercise-{exercise.id}",
f"submission-{submission.id}",
],
use_function_calling=True
)

if debug:
emit_meta("generate_suggestions", {
"prompt": second_chat_prompt.format(**second_prompt_input),
"result": result.dict() if result is not None else None
})

if result is None:
return []

grading_instruction_ids = set(
grading_instruction.id
for criterion in exercise.grading_criteria or []
for grading_instruction in criterion.structured_grading_instructions
)

feedbacks = []
for feedback in result.feedbacks:
index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end,
submission.text)
grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None
feedbacks.append(Feedback(
exercise_id=exercise.id,
submission_id=submission.id,
title=feedback.title,
description=feedback.description,
index_start=index_start,
index_end=index_end,
credits=feedback.credits,
is_graded=is_graded,
structured_grading_instruction_id=grading_instruction_id,
meta={}
))

return feedbacks
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from pydantic import BaseModel, Field
from enum import Enum
from typing import List, Optional

system_message = """
You are a grading assistant at a university. Your task is to assess student submissions for text-based exercises and provide constructive, respectful, and helpful feedback without revealing the correct solution.

You will receive:
- A problem statement
- A sample solution (for internal reference only)
- Grading instructions
- The student's submission (with line numbers)
- The maximum score
- A structured analysis of the competencies required to solve the task, and how the student performed for each one

Instructions:
1. Read the problem statement to understand what the student was asked to do.
2. Use the sample solution only to understand the intended reasoning and structure.
3. Review the grading instructions to identify how responses are evaluated.
4. Review the structured competency analysis. Each item includes:
- A competency the student was expected to demonstrate
- The associated cognitive level (e.g., Understand, Apply, Analyze)
- An evaluation of how well the student demonstrated it (Correct, Partially Correct, Attempted Incorrectly, Not Attempted)
- Optional evidence with line numbers
5. Follow the below steps for generating the each point of feedback:
- Write a short title summarizing the feedback
- Include line_start and line_end if the feedback refers to a specific part of the answer
- Include credits (points awarded or deducted)
- Suggest the action student should take regarding the diagnosis (Review Concept, Improve Explanation, Extend Thinking)
- Review Concept: When student faces conceptual misunderstandings; suggest them to revisit foundational material. Tell them "Go over this subject/topic" without explaining/revealing answer.
- Improve Explanation: When student is partially correct; suggest to elaborate or clarify and try again to strengthen their answer. Tell them what they should do better, do not reveal the solution
- Extend Thinking: When student is fully or mostly correct; deepen insight or explore related ideas. Provide a clear actionable follow-up question or things they can they take a look further.
- Write a clear explanation directly addressed to the student according to the suggested action
- Assign credits gained or lost for this competency, aligned with grading instruction (if available)
- Include grading_instruction_id if related to a rubric item
- Ensure feedback adds value beyond what the student already wrote - avoid simply agreeing or repeating.

You may also provide general feedback that does not refer to any specific line. In that case, set line_start and line_end to null, and credits to 0.

Guidelines:
- Do not, no matter what, reveal the solution
- Do not exceed the maximum total score: {max_points}
- Do not copy text from the student's answer, rubric, or solution
- Do not repeat the student's sentences
- Do not include metadata or extra commentary
- Do not limit the feedback to structured competency analysis items
- Cover all the grading instructions and questions

<Inputs>

Structured Competency Analysis:
{competency_analysis}

Max Score:
{max_points}

Problem Statement:
{problem_statement}

Sample Solution:
{example_solution}

Grading Instructions:
{grading_instructions}
"""


human_message = """\
Student\'s submission to grade (with sentence numbers <number>: <sentence>):
\"\"\"
{submission}
\"\"\"\
"""


# Input Prompt

class GenerateSuggestionsPrompt(BaseModel):
"""A prompt model for generating structured feedback on student submissions.

This class provides the system and human messages used to instruct an LLM to analyze
student submissions and generate constructive feedback based on competency analysis,
grading instructions, and problem requirements.
"""
second_system_message: str = Field(default=system_message,
description="Message for priming AI behavior and instructing it what to do.")
answer_message: str = Field(default=human_message,
description="Message from a human. The input on which the AI is supposed to act.")

# Output Object

class SuggestedAction(str, Enum):
REVIEW_CONCEPT = "Review Concept" # For conceptual misunderstandings; revisit foundational material
IMPROVE_EXPLANATION = "Improve Explanation" # Partially correct; elaborate or clarify to strengthen understanding
EXTEND_THINKING = "Extend Thinking" # Fully or mostly correct; deepen insight or explore related ideas


class FeedbackModel(BaseModel):
title: str = Field(
description="A very short label summarizing the issue or focus of the feedback (e.g., 'Missing Concept', 'Strong Start')."
)
description: str = Field(
description="Student-facing feedback message that explains the issue or suggestion in a constructive and clear way."
)
suggested_action: SuggestedAction = Field(
description="Suggested action for the student as a next step."
)
line_start: Optional[int] = Field(
description="Referenced starting line number from the student's submission, or empty if unreferenced"
)
line_end: Optional[int] = Field(
description="Referenced ending line number from the student's submission, or empty if unreferenced"
)
credits: float = Field(
default=0.0,
description="The number of points awarded or deducted for this feedback item."
)
grading_instruction_id: Optional[int] = Field(
description="The ID of the grading instruction or rubric item related to this feedback, if applicable."
)


class AssessmentModel(BaseModel):
"""Collection of feedbacks making up an assessment"""
feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")
Loading