236 lines
No EOL
7.6 KiB
Python
236 lines
No EOL
7.6 KiB
Python
from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
|
|
from typing import Dict, List, Optional, Any, Union
|
|
from enum import Enum
|
|
from datetime import datetime
|
|
from urllib.parse import urlparse
|
|
|
|
# Common Models
|
|
class TokenUsage(BaseModel):
|
|
prompt_tokens: Optional[int] = 0
|
|
completion_tokens: Optional[int] = 0
|
|
total_tokens: Optional[int] = 0
|
|
|
|
class ErrorResponse(BaseModel):
|
|
detail: str
|
|
error_code: str = Field(..., description="Unique error code for this type of error")
|
|
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
|
|
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"detail": "Error description",
|
|
"error_code": "ERROR_CODE",
|
|
"timestamp": "2024-12-09T16:49:30.905765",
|
|
"path": "/check-facts"
|
|
}
|
|
})
|
|
|
|
# Fact Check Models
|
|
class Publisher(BaseModel):
|
|
name: str
|
|
site: Optional[str] = Field(None, description="Publisher's website")
|
|
|
|
@validator('site')
|
|
def validate_site(cls, v):
|
|
if v and not (v.startswith('http://') or v.startswith('https://')):
|
|
return f"https://{v}"
|
|
return v
|
|
|
|
class ClaimReview(BaseModel):
|
|
publisher: Publisher
|
|
url: Optional[HttpUrl] = None
|
|
title: Optional[str] = None
|
|
reviewDate: Optional[str] = None
|
|
textualRating: Optional[str] = None
|
|
languageCode: str = Field(default="en-US")
|
|
|
|
class Claim(BaseModel):
|
|
text: str
|
|
claimant: Optional[str] = None
|
|
claimDate: Optional[str] = None
|
|
claimReview: List[ClaimReview]
|
|
|
|
class SourceType(str, Enum):
|
|
FACT_CHECKER = "fact_checker"
|
|
NEWS_SITE = "news_site"
|
|
|
|
class FactCheckSource(BaseModel):
|
|
domain: str
|
|
type: SourceType
|
|
priority: int = Field(default=1, ge=1, le=10)
|
|
|
|
# Verification Models
|
|
class VerificationResult(BaseModel):
|
|
verdict: str = Field(..., description="True/False/Insufficient Information")
|
|
confidence: str = Field(..., description="High/Medium/Low")
|
|
evidence: Union[str, List[str]]
|
|
reasoning: str
|
|
missing_info: Optional[str] = None
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"verdict": "True",
|
|
"confidence": "High",
|
|
"evidence": ["Direct quote from source supporting the claim"],
|
|
"reasoning": "Detailed analysis of why the claim is considered true",
|
|
"missing_info": "Any caveats or limitations of the verification"
|
|
}
|
|
})
|
|
|
|
# Request Models
|
|
class BaseFactCheckRequest(BaseModel):
|
|
content: str = Field(
|
|
...,
|
|
min_length=10,
|
|
max_length=1000,
|
|
description="The claim to be fact-checked"
|
|
)
|
|
|
|
@validator('content')
|
|
def validate_content(cls, v):
|
|
if not v.strip():
|
|
raise ValueError("Content cannot be empty or just whitespace")
|
|
return v.strip()
|
|
|
|
class GoogleFactCheckRequest(BaseFactCheckRequest):
|
|
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
|
|
max_results_per_source: int = Field(default=10, ge=1, le=50)
|
|
|
|
class AIFactCheckRequest(BaseFactCheckRequest):
|
|
urls: List[str] = Field(
|
|
...,
|
|
min_items=1,
|
|
max_items=5,
|
|
description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing"
|
|
)
|
|
|
|
@validator('urls')
|
|
def validate_urls(cls, urls):
|
|
validated_urls = []
|
|
for url in urls:
|
|
if not url.strip():
|
|
raise ValueError("URL cannot be empty")
|
|
|
|
# Add https:// if no protocol specified
|
|
if not url.startswith(('http://', 'https://')):
|
|
url = f'https://{url}'
|
|
|
|
try:
|
|
result = urlparse(url)
|
|
if not result.netloc:
|
|
raise ValueError(f"Invalid URL structure for {url}")
|
|
validated_urls.append(url)
|
|
except Exception as e:
|
|
raise ValueError(f"Invalid URL {url}: {str(e)}")
|
|
|
|
return validated_urls
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"content": "Indian flag was drawn in BUET campus",
|
|
"urls": [
|
|
"www.altnews.in/article-about-flag",
|
|
"www.another-source.com/related-news"
|
|
]
|
|
}
|
|
})
|
|
|
|
# Response Models
|
|
class BaseFactCheckResponse(BaseModel):
|
|
query: str
|
|
token_usage: TokenUsage
|
|
sources: List[str]
|
|
context_used: List[str]
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"query": "Example statement to verify",
|
|
"token_usage": {
|
|
"prompt_tokens": 100,
|
|
"completion_tokens": 50,
|
|
"total_tokens": 150
|
|
},
|
|
"sources": ["source1.com", "source2.com"],
|
|
"context_used": ["Relevant context from sources"]
|
|
}
|
|
})
|
|
|
|
class GoogleFactCheckResponse(BaseFactCheckResponse):
|
|
total_claims_found: int
|
|
results: List[Dict[str, Any]]
|
|
verification_result: Dict[str, Any]
|
|
summary: Dict[str, int]
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"query": "Example claim",
|
|
"total_claims_found": 1,
|
|
"results": [{
|
|
"text": "Example claim text",
|
|
"claimant": "Source name",
|
|
"claimReview": [{
|
|
"publisher": {
|
|
"name": "Fact Checker",
|
|
"site": "factchecker.com"
|
|
},
|
|
"textualRating": "True"
|
|
}]
|
|
}],
|
|
"verification_result": {
|
|
"verdict": "True",
|
|
"confidence": "High",
|
|
"evidence": ["Supporting evidence"],
|
|
"reasoning": "Detailed analysis"
|
|
},
|
|
"sources": ["factchecker.com"],
|
|
"context_used": ["Relevant context"],
|
|
"token_usage": {
|
|
"prompt_tokens": 100,
|
|
"completion_tokens": 50,
|
|
"total_tokens": 150
|
|
},
|
|
"summary": {
|
|
"total_sources": 1,
|
|
"fact_checking_sites_queried": 10
|
|
}
|
|
}
|
|
})
|
|
|
|
class AIFactCheckResponse(BaseFactCheckResponse):
|
|
verification_result: Dict[str, VerificationResult] # Changed to Dict to store results per URL
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"query": "Indian flag was drawn in BUET campus",
|
|
"verification_result": {
|
|
"https://www.source1.com": {
|
|
"verdict": "True",
|
|
"confidence": "High",
|
|
"evidence": ["Supporting evidence from source 1"],
|
|
"reasoning": "Detailed analysis from source 1",
|
|
"missing_info": None
|
|
},
|
|
"https://www.source2.com": {
|
|
"verdict": "True",
|
|
"confidence": "Medium",
|
|
"evidence": ["Supporting evidence from source 2"],
|
|
"reasoning": "Analysis from source 2",
|
|
"missing_info": "Additional context needed"
|
|
}
|
|
},
|
|
"sources": ["source1.com", "source2.com"],
|
|
"context_used": [
|
|
"Context from source 1",
|
|
"Context from source 2"
|
|
],
|
|
"token_usage": {
|
|
"prompt_tokens": 200,
|
|
"completion_tokens": 100,
|
|
"total_tokens": 300
|
|
}
|
|
}
|
|
})
|
|
|
|
# Backwards compatibility aliases
|
|
FactCheckRequest = GoogleFactCheckRequest
|
|
FactCheckResponse = GoogleFactCheckResponse |