from fastapi import APIRouter, HTTPException from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict from typing import Dict, List, Optional, Union import requests from enum import Enum from datetime import datetime import json from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL fact_check_router = APIRouter() class CustomJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, datetime): return obj.isoformat() return super().default(obj) class ErrorResponse(BaseModel): detail: str error_code: str = Field(..., description="Unique error code for this type of error") timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) path: Optional[str] = Field(None, description="The endpoint path where error occurred") model_config = ConfigDict(json_schema_extra={ "example": { "detail": "Error description", "error_code": "ERROR_CODE", "timestamp": "2024-12-09T16:49:30.905765", "path": "/check-facts" } }) class RequestValidationError(BaseModel): loc: List[str] msg: str type: str class Publisher(BaseModel): name: str site: Optional[str] = Field(None, description="Publisher's website") @validator('site') def validate_site(cls, v): if v and not (v.startswith('http://') or v.startswith('https://')): return f"https://{v}" return v class ClaimReview(BaseModel): publisher: Publisher url: Optional[HttpUrl] = None title: Optional[str] = None reviewDate: Optional[str] = None textualRating: Optional[str] = None languageCode: str = Field(default="en-US") class Claim(BaseModel): text: str claimant: Optional[str] = None claimDate: Optional[str] = None claimReview: List[ClaimReview] class FactCheckResponse(BaseModel): query: str = Field(..., description="Original query that was fact-checked") total_claims_found: int = Field(..., ge=0) results: List[Claim] = Field(default_factory=list) summary: Dict[str, int] = Field(...) model_config = ConfigDict(json_schema_extra={ "example": { "query": "Example claim", "total_claims_found": 1, "results": [{ "text": "Example claim text", "claimant": "Source name", "claimReview": [{ "publisher": { "name": "Fact Checker", "site": "factchecker.com" }, "textualRating": "True" }] }], "summary": { "total_sources": 1, "fact_checking_sites_queried": 10 } } }) class SourceType(str, Enum): FACT_CHECKER = "fact_checker" NEWS_SITE = "news_site" class FactCheckSource(BaseModel): domain: str type: SourceType priority: int = Field(default=1, ge=1, le=10) model_config = ConfigDict(json_schema_extra={ "example": { "domain": "factcheck.org", "type": "fact_checker", "priority": 1 } }) # Sources configuration with validation SOURCES = { "fact_checkers": [ FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) for domain in [ "factcheck.org", "snopes.com", "politifact.com", "reuters.com", "bbc.com", "apnews.com", "usatoday.com", "nytimes.com", "washingtonpost.com", "afp.com", "fullfact.org", "truthorfiction.com", "leadstories.com", "altnews.in", "boomlive.in", "en.prothomalo.com" ] ], "news_sites": [ FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) for domain in [ "www.thedailystar.net", "www.thefinancialexpress.com.bd", "www.theindependentbd.com", "www.dhakatribune.com", "www.newagebd.net", "www.observerbd.com", "www.daily-sun.com", "www.tbsnews.net", "www.businesspostbd.com", "www.banglanews24.com/english", "www.bdnews24.com/english", "www.risingbd.com/english", "www.dailyindustry.news", "www.bangladeshpost.net", "www.daily-bangladesh.com/english" ] ] } class FactCheckRequest(BaseModel): content: str = Field( ..., min_length=10, max_length=1000, description="The claim to be fact-checked" ) language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") max_results_per_source: int = Field(default=10, ge=1, le=50) @validator('content') def validate_content(cls, v): if not v.strip(): raise ValueError("Content cannot be empty or just whitespace") return v.strip() async def fetch_fact_checks( api_key: str, base_url: str, query: str, site: FactCheckSource ) -> Dict: """ Fetch fact checks from a specific site using the Google Fact Check API """ try: if not api_key or not base_url: raise ValueError("API key or base URL not configured") params = { "key": api_key, "query": query, "languageCode": "en-US", "reviewPublisherSiteFilter": site.domain, "pageSize": 10 } response = requests.get(base_url, params=params) response.raise_for_status() return response.json() except requests.RequestException as e: raise HTTPException( status_code=503, detail=ErrorResponse( detail=f"Error fetching from {site.domain}: {str(e)}", error_code="FACT_CHECK_SERVICE_ERROR", path="/check-facts" ).dict() ) except ValueError as e: raise HTTPException( status_code=500, detail=ErrorResponse( detail=str(e), error_code="CONFIGURATION_ERROR", path="/check-facts" ).dict() ) @fact_check_router.post( "/check-facts", response_model=FactCheckResponse, responses={ 400: {"model": ErrorResponse}, 404: {"model": ErrorResponse}, 500: {"model": ErrorResponse}, 503: {"model": ErrorResponse} } ) async def check_facts(request: FactCheckRequest) -> FactCheckResponse: """ Check facts using multiple fact-checking sources """ all_results = [] # Validate configuration if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: raise HTTPException( status_code=500, detail=ErrorResponse( detail="API configuration is missing", error_code="CONFIGURATION_ERROR", path="/check-facts" ).dict() ) # Check all sources in priority order all_sources = ( SOURCES["fact_checkers"] + SOURCES["news_sites"] ) all_sources.sort(key=lambda x: x.priority) for source in all_sources: try: result = await fetch_fact_checks( GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, request.content, source ) if "claims" in result: # Validate each claim through Pydantic validated_claims = [ Claim(**claim).dict() for claim in result["claims"] ] all_results.extend(validated_claims) except HTTPException: raise except Exception as e: # Log the error but continue with other sources print(f"Error processing {source.domain}: {str(e)}") continue if not all_results: raise HTTPException( status_code=404, detail=ErrorResponse( detail="No fact check results found", error_code="NO_RESULTS_FOUND", path="/check-facts" ).dict() ) # Create the response using Pydantic model response = FactCheckResponse( query=request.content, total_claims_found=len(all_results), results=all_results, summary={ "total_sources": len(set(claim.get("claimReview", [{}])[0].get("publisher", {}).get("site", "") for claim in all_results if claim.get("claimReview"))), "fact_checking_sites_queried": len(all_sources) } ) return response