fact-checker-backend/app/websites/fact_checker_website.py

from typing import Dict, List
import requests
from fastapi import HTTPException
from app.models.ai_fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType

# Sources configuration with validation
SOURCES = {
    "fact_checkers": [
        FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
        for domain in [
    "snopes.com",
    "politifact.com",
    "factcheck.org",
    "reuters.com/fact-check",
    "apnews.com/hub/ap-fact-check",
    "bbc.com/news/reality_check",
    "fullfact.org",
    "afp.com/fact-check",
    "truthorfiction.com",
    "leadstories.com",
    "checkyourfact.com",
    "washingtonpost.com/news/fact-checker",
    "factcheck.kz",
    "poynter.org/ifcn",
    "factcheckeu.info",
    "africacheck.org",
    "thequint.com/webqoof",
    "altnews.in",
    "facta.news",
    "factcheckni.org",
    "mythdetector.ge",
    "verificado.mx",
    "euvsdisinfo.eu",
    "factcheck.afp.com",
    "newtral.es",
    "maldita.es",
    "faktograf.hr",
    "demagog.org.pl",
    "factnameh.com",
    "faktiskt.se",
    "teyit.org",
    "factly.in",
    "boom.live",
    "stopfake.org",
    "factcheck.ge",
    "factcheck.kg",
    "factcheck.uz",
    "factcheck.tj",
    "factcheck.az",
    "factcheck.am",
    "factcheck.md",
    "verafiles.org",
    "rappler.com/fact-check",
    "vera.com.gt",
    "chequeado.com",
    "aosfatos.org",
    "lasillavacia.com/detector-mentiras",
    "colombiacheck.com",
    "ecuadorchequea.com",
    "elsurti.com/checado",
    "verificat.cat",
    "mafindo.or.id",
    "tempo.co/cek-fakta",
    "factcheck.mk",
    "raskrinkavanje.ba",
    "faktograf.hr",
    "demagog.cz",
    "faktabaari.fi",
    "correctiv.org",
    "mimikama.at",
    "factcheck.vlaanderen",
    "factuel.afp.com",
    "nieuwscheckers.nl",
    "faktisk.no",
    "tjekdet.dk",
    "ellinikahoaxes.gr",
    "faktograf.id",
    "stopfake.kz",
    "pesacheck.org",
    "dubawa.org",
    "namibiafactcheck.org.na",
    "zimfact.org",
    "ghanafact.com",
    "factspace.africa",
    "factcrescendo.com",
    "vishvasnews.com",
    "factcheck.lk",
    "newschecker.in",
    "boomlive.in",
    "digiteye.in",
    "indiatoday.in/fact-check",
    "factcrescendo.com",
    "piyasa.com/fact-check",
    "taiwanese.facts.news",
    "taiwanfactcheck.com",
    "mygopen.com",
    "tfc-taiwan.org.tw",
    "cofacts.tw",
    "rumor.taipei",
    "fact.qq.com",
    "factcheck.afp.com/list",
    "acfta.org",
    "crosscheck.firstdraftnews.org",
    "healthfeedback.org",
    "climatefeedback.org",
    "sciencefeedback.co",
    "factcheck.aap.com.au",
    "emergent.info",
    "hoax-slayer.net",
    "truthorfiction.com",
    "factcheck.media",
    "mediawise.org",
    "thejournal.ie/factcheck",
    "journalistsresource.org",
    "metafact.io",
    "reporterslab.org/fact-checking"
]
    ],
    "news_sites": [
        FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
        for domain in [
            "www.thedailystar.net",
            "www.thefinancialexpress.com.bd",
            "www.theindependentbd.com",
            "www.dhakatribune.com",
            "www.newagebd.net",
            "www.observerbd.com",
            "www.daily-sun.com",
            "www.tbsnews.net",
            "www.businesspostbd.com",
            "www.banglanews24.com/english",
            "www.bdnews24.com/english",
            "www.risingbd.com/english",
            "www.dailyindustry.news",
            "www.bangladeshpost.net",
            "www.daily-bangladesh.com/english"
        ]
    ]
}

async def fetch_fact_checks(
    api_key: str,
    base_url: str,
    query: str,
    site: FactCheckSource
) -> Dict:
    """
    Fetch fact checks from a specific site using the Google Fact Check API
    """
    try:
        if not api_key or not base_url:
            raise ValueError("API key or base URL not configured")

        params = {
            "key": api_key,
            "query": query,
            "languageCode": "en-US",
            "reviewPublisherSiteFilter": site.domain,
            "pageSize": 10
        }

        response = requests.get(base_url, params=params)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        raise HTTPException(
            status_code=503,
            detail=ErrorResponse(
                detail=f"Error fetching from {site.domain}: {str(e)}",
                error_code="FACT_CHECK_SERVICE_ERROR",
                path="/check-facts"
            ).dict()
        )
    except ValueError as e:
        raise HTTPException(
            status_code=500,
            detail=ErrorResponse(
                detail=str(e),
                error_code="CONFIGURATION_ERROR",
                path="/check-facts"
            ).dict()
        )

def get_all_sources() -> List[FactCheckSource]:
    """
    Get all sources sorted by priority
    """
    # all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"]
    all_sources = SOURCES["fact_checkers"]
    return sorted(all_sources, key=lambda x: x.priority)