from typing import Dict, List import requests from fastapi import HTTPException from app.models.fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType # Sources configuration with validation SOURCES = { "fact_checkers": [ FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) for domain in [ "factcheck.org", "snopes.com", "politifact.com", "reuters.com", "bbc.com", "apnews.com", "usatoday.com", "nytimes.com", "washingtonpost.com", "afp.com", "fullfact.org", "truthorfiction.com", "leadstories.com", "altnews.in", "boomlive.in", "en.prothomalo.com" ] ], "news_sites": [ FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) for domain in [ "www.thedailystar.net", "www.thefinancialexpress.com.bd", "www.theindependentbd.com", "www.dhakatribune.com", "www.newagebd.net", "www.observerbd.com", "www.daily-sun.com", "www.tbsnews.net", "www.businesspostbd.com", "www.banglanews24.com/english", "www.bdnews24.com/english", "www.risingbd.com/english", "www.dailyindustry.news", "www.bangladeshpost.net", "www.daily-bangladesh.com/english" ] ] } async def fetch_fact_checks( api_key: str, base_url: str, query: str, site: FactCheckSource ) -> Dict: """ Fetch fact checks from a specific site using the Google Fact Check API """ try: if not api_key or not base_url: raise ValueError("API key or base URL not configured") params = { "key": api_key, "query": query, "languageCode": "en-US", "reviewPublisherSiteFilter": site.domain, "pageSize": 10 } response = requests.get(base_url, params=params) response.raise_for_status() return response.json() except requests.RequestException as e: raise HTTPException( status_code=503, detail=ErrorResponse( detail=f"Error fetching from {site.domain}: {str(e)}", error_code="FACT_CHECK_SERVICE_ERROR", path="/check-facts" ).dict() ) except ValueError as e: raise HTTPException( status_code=500, detail=ErrorResponse( detail=str(e), error_code="CONFIGURATION_ERROR", path="/check-facts" ).dict() ) def get_all_sources() -> List[FactCheckSource]: """ Get all sources sorted by priority """ all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"] return sorted(all_sources, key=lambda x: x.priority)