fact-checker-backend/app/models/fact_check_models.py
2024-12-14 18:19:37 +06:00

229 lines
No EOL
7.4 KiB
Python

from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
from typing import Dict, List, Optional, Any, Union
from enum import Enum
from datetime import datetime
from urllib.parse import urlparse
# Common Models
class TokenUsage(BaseModel):
prompt_tokens: Optional[int] = 0
completion_tokens: Optional[int] = 0
total_tokens: Optional[int] = 0
class ErrorResponse(BaseModel):
detail: str
error_code: str = Field(..., description="Unique error code for this type of error")
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
model_config = ConfigDict(json_schema_extra={
"example": {
"detail": "Error description",
"error_code": "ERROR_CODE",
"timestamp": "2024-12-09T16:49:30.905765",
"path": "/check-facts"
}
})
# Fact Check Models
class Publisher(BaseModel):
name: str
site: Optional[str] = Field(None, description="Publisher's website")
@validator('site')
def validate_site(cls, v):
if v and not (v.startswith('http://') or v.startswith('https://')):
return f"https://{v}"
return v
class ClaimReview(BaseModel):
publisher: Publisher
url: Optional[HttpUrl] = None
title: Optional[str] = None
reviewDate: Optional[str] = None
textualRating: Optional[str] = None
languageCode: str = Field(default="en-US")
class Claim(BaseModel):
text: str
claimant: Optional[str] = None
claimDate: Optional[str] = None
claimReview: List[ClaimReview]
class SourceType(str, Enum):
FACT_CHECKER = "fact_checker"
NEWS_SITE = "news_site"
class FactCheckSource(BaseModel):
domain: str
type: SourceType
priority: int = Field(default=1, ge=1, le=10)
# Verification Models
class VerificationResult(BaseModel):
verdict: str = Field(..., description="True/False/Insufficient Information")
confidence: str = Field(..., description="High/Medium/Low")
evidence: Union[str, List[str]]
reasoning: str
missing_info: Optional[str] = None
model_config = ConfigDict(json_schema_extra={
"example": {
"verdict": "True",
"confidence": "High",
"evidence": ["Direct quote from source supporting the claim"],
"reasoning": "Detailed analysis of why the claim is considered true",
"missing_info": "Any caveats or limitations of the verification"
}
})
# Request Models
class BaseFactCheckRequest(BaseModel):
content: str = Field(
...,
min_length=10,
max_length=1000,
description="The claim to be fact-checked"
)
@validator('content')
def validate_content(cls, v):
if not v.strip():
raise ValueError("Content cannot be empty or just whitespace")
return v.strip()
class GoogleFactCheckRequest(BaseFactCheckRequest):
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
max_results_per_source: int = Field(default=10, ge=1, le=50)
class AIFactCheckRequest(BaseFactCheckRequest):
urls: List[str] = Field(
...,
min_items=1,
max_items=5,
description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing"
)
@validator('urls')
def validate_urls(cls, urls):
validated_urls = []
for url in urls:
if not url.strip():
raise ValueError("URL cannot be empty")
# Add https:// if no protocol specified
if not url.startswith(('http://', 'https://')):
url = f'https://{url}'
try:
result = urlparse(url)
if not result.netloc:
raise ValueError(f"Invalid URL structure for {url}")
validated_urls.append(url)
except Exception as e:
raise ValueError(f"Invalid URL {url}: {str(e)}")
return validated_urls
model_config = ConfigDict(json_schema_extra={
"example": {
"content": "Indian flag was drawn in BUET campus",
"urls": [
"www.altnews.in/article-about-flag",
"www.another-source.com/related-news"
]
}
})
# Response Models
class BaseFactCheckResponse(BaseModel):
query: str
token_usage: TokenUsage
sources: List[str]
model_config = ConfigDict(json_schema_extra={
"example": {
"query": "Example statement to verify",
"token_usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150
},
"sources": ["source1.com", "source2.com"],
}
})
class GoogleFactCheckResponse(BaseFactCheckResponse):
total_claims_found: int
results: List[Dict[str, Any]]
verification_result: Dict[str, Any]
summary: Dict[str, int]
model_config = ConfigDict(json_schema_extra={
"example": {
"query": "Example claim",
"total_claims_found": 1,
"results": [{
"text": "Example claim text",
"claimant": "Source name",
"claimReview": [{
"publisher": {
"name": "Fact Checker",
"site": "factchecker.com"
},
"textualRating": "True"
}]
}],
"verification_result": {
"verdict": "True",
"confidence": "High",
"evidence": ["Supporting evidence"],
"reasoning": "Detailed analysis"
},
"sources": ["factchecker.com"],
"token_usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150
},
"summary": {
"total_sources": 1,
"fact_checking_sites_queried": 10
}
}
})
class AIFactCheckResponse(BaseFactCheckResponse):
verification_result: Dict[str, VerificationResult] # Changed to Dict to store results per URL
model_config = ConfigDict(json_schema_extra={
"example": {
"query": "Indian flag was drawn in BUET campus",
"verification_result": {
"https://www.source1.com": {
"verdict": "True",
"confidence": "High",
"evidence": ["Supporting evidence from source 1"],
"reasoning": "Detailed analysis from source 1",
"missing_info": None
},
"https://www.source2.com": {
"verdict": "True",
"confidence": "Medium",
"evidence": ["Supporting evidence from source 2"],
"reasoning": "Analysis from source 2",
"missing_info": "Additional context needed"
}
},
"sources": ["source1.com", "source2.com"],
"token_usage": {
"prompt_tokens": 200,
"completion_tokens": 100,
"total_tokens": 300
}
}
})
# Backwards compatibility aliases
FactCheckRequest = GoogleFactCheckRequest
FactCheckResponse = GoogleFactCheckResponse