fact-checker-backend/app/websites/fact_checker_website.py
2024-12-17 17:31:13 +06:00

190 lines
No EOL
5 KiB
Python

from typing import Dict, List
import requests
from fastapi import HTTPException
from app.models.ai_fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType
# Sources configuration with validation
SOURCES = {
"fact_checkers": [
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
for domain in [
"snopes.com",
"politifact.com",
"factcheck.org",
"reuters.com/fact-check",
"apnews.com/hub/ap-fact-check",
"bbc.com/news/reality_check",
"fullfact.org",
"afp.com/fact-check",
"truthorfiction.com",
"leadstories.com",
"checkyourfact.com",
"washingtonpost.com/news/fact-checker",
"factcheck.kz",
"poynter.org/ifcn",
"factcheckeu.info",
"africacheck.org",
"thequint.com/webqoof",
"altnews.in",
"facta.news",
"factcheckni.org",
"mythdetector.ge",
"verificado.mx",
"euvsdisinfo.eu",
"factcheck.afp.com",
"newtral.es",
"maldita.es",
"faktograf.hr",
"demagog.org.pl",
"factnameh.com",
"faktiskt.se",
"teyit.org",
"factly.in",
"boom.live",
"stopfake.org",
"factcheck.ge",
"factcheck.kg",
"factcheck.uz",
"factcheck.tj",
"factcheck.az",
"factcheck.am",
"factcheck.md",
"verafiles.org",
"rappler.com/fact-check",
"vera.com.gt",
"chequeado.com",
"aosfatos.org",
"lasillavacia.com/detector-mentiras",
"colombiacheck.com",
"ecuadorchequea.com",
"elsurti.com/checado",
"verificat.cat",
"mafindo.or.id",
"tempo.co/cek-fakta",
"factcheck.mk",
"raskrinkavanje.ba",
"faktograf.hr",
"demagog.cz",
"faktabaari.fi",
"correctiv.org",
"mimikama.at",
"factcheck.vlaanderen",
"factuel.afp.com",
"nieuwscheckers.nl",
"faktisk.no",
"tjekdet.dk",
"ellinikahoaxes.gr",
"faktograf.id",
"stopfake.kz",
"pesacheck.org",
"dubawa.org",
"namibiafactcheck.org.na",
"zimfact.org",
"ghanafact.com",
"factspace.africa",
"factcrescendo.com",
"vishvasnews.com",
"factcheck.lk",
"newschecker.in",
"boomlive.in",
"digiteye.in",
"indiatoday.in/fact-check",
"factcrescendo.com",
"piyasa.com/fact-check",
"taiwanese.facts.news",
"taiwanfactcheck.com",
"mygopen.com",
"tfc-taiwan.org.tw",
"cofacts.tw",
"rumor.taipei",
"fact.qq.com",
"factcheck.afp.com/list",
"acfta.org",
"crosscheck.firstdraftnews.org",
"healthfeedback.org",
"climatefeedback.org",
"sciencefeedback.co",
"factcheck.aap.com.au",
"emergent.info",
"hoax-slayer.net",
"truthorfiction.com",
"factcheck.media",
"mediawise.org",
"thejournal.ie/factcheck",
"journalistsresource.org",
"metafact.io",
"reporterslab.org/fact-checking"
]
],
"news_sites": [
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
for domain in [
"www.thedailystar.net",
"www.thefinancialexpress.com.bd",
"www.theindependentbd.com",
"www.dhakatribune.com",
"www.newagebd.net",
"www.observerbd.com",
"www.daily-sun.com",
"www.tbsnews.net",
"www.businesspostbd.com",
"www.banglanews24.com/english",
"www.bdnews24.com/english",
"www.risingbd.com/english",
"www.dailyindustry.news",
"www.bangladeshpost.net",
"www.daily-bangladesh.com/english"
]
]
}
async def fetch_fact_checks(
api_key: str,
base_url: str,
query: str,
site: FactCheckSource
) -> Dict:
"""
Fetch fact checks from a specific site using the Google Fact Check API
"""
try:
if not api_key or not base_url:
raise ValueError("API key or base URL not configured")
params = {
"key": api_key,
"query": query,
"languageCode": "en-US",
"reviewPublisherSiteFilter": site.domain,
"pageSize": 10
}
response = requests.get(base_url, params=params)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
raise HTTPException(
status_code=503,
detail=ErrorResponse(
detail=f"Error fetching from {site.domain}: {str(e)}",
error_code="FACT_CHECK_SERVICE_ERROR",
path="/check-facts"
).dict()
)
except ValueError as e:
raise HTTPException(
status_code=500,
detail=ErrorResponse(
detail=str(e),
error_code="CONFIGURATION_ERROR",
path="/check-facts"
).dict()
)
def get_all_sources() -> List[FactCheckSource]:
"""
Get all sources sorted by priority
"""
# all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"]
all_sources = SOURCES["fact_checkers"]
return sorted(all_sources, key=lambda x: x.priority)