from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict from typing import Dict, List, Optional, Any, Union from enum import Enum from datetime import datetime from urllib.parse import urlparse # Common Models class TokenUsage(BaseModel): prompt_tokens: Optional[int] = 0 completion_tokens: Optional[int] = 0 total_tokens: Optional[int] = 0 class ErrorResponse(BaseModel): detail: str error_code: str = Field(..., description="Unique error code for this type of error") timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) path: Optional[str] = Field( None, description="The endpoint path where error occurred" ) model_config = ConfigDict( json_schema_extra={ "example": { "detail": "Error description", "error_code": "ERROR_CODE", "timestamp": "2024-12-09T16:49:30.905765", "path": "/check-facts", } } ) # Fact Check Models class Publisher(BaseModel): name: str site: Optional[str] = Field(None, description="Publisher's website") @validator("site") def validate_site(cls, v): if v and not (v.startswith("http://") or v.startswith("https://")): return f"https://{v}" return v class ClaimReview(BaseModel): publisher: Publisher url: Optional[HttpUrl] = None title: Optional[str] = None reviewDate: Optional[str] = None textualRating: Optional[str] = None languageCode: str = Field(default="en-US") class Claim(BaseModel): text: str claimant: Optional[str] = None claimDate: Optional[str] = None claimReview: List[ClaimReview] class SourceType(str, Enum): FACT_CHECKER = "fact_checker" NEWS_SITE = "news_site" class FactCheckSource(BaseModel): domain: str type: SourceType priority: int = Field(default=1, ge=1, le=10) # Verification Models class VerificationResult(BaseModel): verdict: str = Field(..., description="True/False/Insufficient Information") confidence: str = Field(..., description="High/Medium/Low") evidence: Union[str, List[str]] reasoning: str missing_info: Optional[str] = None model_config = ConfigDict( json_schema_extra={ "example": { "verdict": "True", "confidence": "High", "evidence": ["Direct quote from source supporting the claim"], "reasoning": "Detailed analysis of why the claim is considered true", "missing_info": "Any caveats or limitations of the verification", } } ) # Request Models class BaseFactCheckRequest(BaseModel): content: str = Field( ..., min_length=10, max_length=1000, description="The claim to be fact-checked" ) @validator("content") def validate_content(cls, v): if not v.strip(): raise ValueError("Content cannot be empty or just whitespace") return v.strip() class GoogleFactCheckRequest(BaseFactCheckRequest): language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") max_results_per_source: int = Field(default=10, ge=1, le=50) class AIFactCheckRequest(BaseFactCheckRequest): urls: List[str] = Field( ..., min_items=1, max_items=5, description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing", ) @validator("urls") def validate_urls(cls, urls): validated_urls = [] for url in urls: if not url.strip(): raise ValueError("URL cannot be empty") # Add https:// if no protocol specified if not url.startswith(("http://", "https://")): url = f"https://{url}" try: result = urlparse(url) if not result.netloc: raise ValueError(f"Invalid URL structure for {url}") validated_urls.append(url) except Exception as e: raise ValueError(f"Invalid URL {url}: {str(e)}") return validated_urls model_config = ConfigDict( json_schema_extra={ "example": { "content": "Indian flag was drawn in BUET campus", "urls": [ "www.altnews.in/article-about-flag", "www.another-source.com/related-news", ], } } ) # Response Models class BaseFactCheckResponse(BaseModel): query: str token_usage: TokenUsage sources: List[str] model_config = ConfigDict( json_schema_extra={ "example": { "query": "Example statement to verify", "token_usage": { "prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150, }, "sources": ["source1.com", "source2.com"], } } ) class GoogleFactCheckResponse(BaseFactCheckResponse): total_claims_found: int results: List[Dict[str, Any]] verification_result: Dict[str, Any] summary: Dict[str, int] model_config = ConfigDict( json_schema_extra={ "example": { "query": "Example claim", "total_claims_found": 1, "results": [ { "text": "Example claim text", "claimant": "Source name", "claimReview": [ { "publisher": { "name": "Fact Checker", "site": "factchecker.com", }, "textualRating": "True", } ], } ], "verification_result": { "verdict": "True", "confidence": "High", "evidence": ["Supporting evidence"], "reasoning": "Detailed analysis", }, "sources": ["factchecker.com"], "token_usage": { "prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150, }, "summary": {"total_sources": 1, "fact_checking_sites_queried": 10}, } } ) class AIFactCheckResponse(BaseFactCheckResponse): verification_result: Dict[ str, VerificationResult ] # Changed to Dict to store results per URL model_config = ConfigDict( json_schema_extra={ "example": { "query": "Indian flag was drawn in BUET campus", "verification_result": { "https://www.source1.com": { "verdict": "True", "confidence": "High", "evidence": ["Supporting evidence from source 1"], "reasoning": "Detailed analysis from source 1", "missing_info": None, }, "https://www.source2.com": { "verdict": "True", "confidence": "Medium", "evidence": ["Supporting evidence from source 2"], "reasoning": "Analysis from source 2", "missing_info": "Additional context needed", }, }, "sources": ["source1.com", "source2.com"], "token_usage": { "prompt_tokens": 200, "completion_tokens": 100, "total_tokens": 300, }, } } ) # Backwards compatibility aliases FactCheckRequest = GoogleFactCheckRequest FactCheckResponse = GoogleFactCheckResponse