test-ragp / src /rag_pipelines /prompts /usefulness_evaluator.py
awinml's picture
Upload 107 files
336f4a9 verified
from typing import Any, ClassVar, Literal
from pydantic import BaseModel, Field, field_validator
USEFULNESS_EVALUATOR_PROMPT: str = """You are a senior researcher evaluating document usefulness. Follow these steps:
1. Required Information: Identify key facts needed to answer the question
2. Factual Content: Check document for presence of required facts
3. Quality Assessment: Evaluate reliability/detail of presented information
4. Coverage Analysis: Determine percentage of required information covered
5. Score Synthesis: Combine factors into final usefulness score
**Question:** {question}
**Document Excerpt:** {context}
Provide detailed reasoning through all steps, then state final score as an integer from 1-5 using format: "final_score": "<score>"
1 = No relevant facts, 3 = Some useful facts with gaps, 5 = Comprehensive high-quality information"""
class UsefulnessEvaluatorResult(BaseModel):
"""Structured evaluation result for document usefulness scoring system.
Encapsulates both the reasoning process and final score while ensuring validation
of required analytical components through Pydantic model constraints.
Attributes:
reasoning_chain (str): Step-by-step analysis through evaluation stages.
decision (Literal["1", "2", "3", "4", "5"]): Final numerical usefulness score.
Raises:
ValueError: If reasoning chain misses any required analysis sections
ValidationError: If score value is not in allowed range (1-5)
Example:
>>> try:
... result = UsefulnessEvaluatorResult(
... reasoning_chain=(
... "1. Required Information: Needs 3 economic indicators"
... "2. Factual Content: Contains GDP data"
... "3. Quality Assessment: Government statistics"
... "4. Coverage Analysis: 2/3 indicators present"
... "5. Score Synthesis: Partial official data"
... ),
... decision="3"
... )
... print(result.decision)
... except ValidationError as e:
... print(e)
3
"""
reasoning_chain: str = Field(
...,
description=(
"Complete analysis chain containing required sections:\n"
"1. Required Information: Key facts needed for comprehensive answer\n"
"2. Factual Content: Presence verification of required facts\n"
"3. Quality Assessment: Source reliability and detail depth\n"
"4. Coverage Analysis: Percentage of requirements fulfilled\n"
"5. Score Synthesis: Final numerical score justification"
),
)
decision: Literal["1", "2", "3", "4", "5"] = Field(
...,
description=(
"Numerical usefulness score with criteria:\n"
"1 - Irrelevant/no facts | 2 - Minimal value | 3 - Partial with gaps\n"
"4 - Good reliable coverage | 5 - Comprehensive high-quality"
),
)
@field_validator("reasoning_chain")
@classmethod
def validate_reasoning_steps(cls, chain_to_validate: str) -> str:
"""Validate completeness of analytical reasoning chain.
Ensures all required evaluation phases are present and properly formatted in
the reasoning chain through section header verification.
Args:
chain_to_validate (str): Raw text of the reasoning chain to validate
Returns:
str: Validated reasoning chain if all requirements are met
Raises:
ValueError: If any of the required section headers are missing from
the reasoning chain text
Example:
>>> valid_chain = (
... "1. Required Information: Needs 5 metrics"
... "2. Factual Content: Contains 3 metrics"
... "3. Quality Assessment: Industry reports"
... "4. Coverage Analysis: 60% complete"
... "5. Score Synthesis: Partial coverage"
... )
>>> UsefulnessEvaluatorResult.validate_reasoning_steps(valid_chain)
'1. Required Information: Needs 5 metrics...'
"""
required_steps: list[str] = [
"1. Required Information",
"2. Factual Content",
"3. Quality Assessment",
"4. Coverage Analysis",
"5. Score Synthesis",
]
missing: list[str] = [step for step in required_steps if step not in chain_to_validate]
if missing:
msg = f"Missing required analysis sections: {', '.join(missing)}"
raise ValueError(msg)
return chain_to_validate
model_config: ClassVar[dict[str, Any]] = {
"json_schema_extra": {
"example": {
"reasoning_chain": (
"1. Required Information: Needs 5 climate change impacts\n"
"2. Factual Content: Details 3 impacts with data\n"
"3. Quality Assessment: Peer-reviewed sources cited\n"
"4. Coverage Analysis: 60% of requirements met\n"
"5. Score Synthesis: Strong but incomplete coverage"
),
"decision": "4",
}
}
}