190 lines
No EOL
5 KiB
Python
190 lines
No EOL
5 KiB
Python
from typing import Dict, List
|
|
import requests
|
|
from fastapi import HTTPException
|
|
from app.models.ai_fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType
|
|
|
|
# Sources configuration with validation
|
|
SOURCES = {
|
|
"fact_checkers": [
|
|
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
|
|
for domain in [
|
|
"snopes.com",
|
|
"politifact.com",
|
|
"factcheck.org",
|
|
"reuters.com/fact-check",
|
|
"apnews.com/hub/ap-fact-check",
|
|
"bbc.com/news/reality_check",
|
|
"fullfact.org",
|
|
"afp.com/fact-check",
|
|
"truthorfiction.com",
|
|
"leadstories.com",
|
|
"checkyourfact.com",
|
|
"washingtonpost.com/news/fact-checker",
|
|
"factcheck.kz",
|
|
"poynter.org/ifcn",
|
|
"factcheckeu.info",
|
|
"africacheck.org",
|
|
"thequint.com/webqoof",
|
|
"altnews.in",
|
|
"facta.news",
|
|
"factcheckni.org",
|
|
"mythdetector.ge",
|
|
"verificado.mx",
|
|
"euvsdisinfo.eu",
|
|
"factcheck.afp.com",
|
|
"newtral.es",
|
|
"maldita.es",
|
|
"faktograf.hr",
|
|
"demagog.org.pl",
|
|
"factnameh.com",
|
|
"faktiskt.se",
|
|
"teyit.org",
|
|
"factly.in",
|
|
"boom.live",
|
|
"stopfake.org",
|
|
"factcheck.ge",
|
|
"factcheck.kg",
|
|
"factcheck.uz",
|
|
"factcheck.tj",
|
|
"factcheck.az",
|
|
"factcheck.am",
|
|
"factcheck.md",
|
|
"verafiles.org",
|
|
"rappler.com/fact-check",
|
|
"vera.com.gt",
|
|
"chequeado.com",
|
|
"aosfatos.org",
|
|
"lasillavacia.com/detector-mentiras",
|
|
"colombiacheck.com",
|
|
"ecuadorchequea.com",
|
|
"elsurti.com/checado",
|
|
"verificat.cat",
|
|
"mafindo.or.id",
|
|
"tempo.co/cek-fakta",
|
|
"factcheck.mk",
|
|
"raskrinkavanje.ba",
|
|
"faktograf.hr",
|
|
"demagog.cz",
|
|
"faktabaari.fi",
|
|
"correctiv.org",
|
|
"mimikama.at",
|
|
"factcheck.vlaanderen",
|
|
"factuel.afp.com",
|
|
"nieuwscheckers.nl",
|
|
"faktisk.no",
|
|
"tjekdet.dk",
|
|
"ellinikahoaxes.gr",
|
|
"faktograf.id",
|
|
"stopfake.kz",
|
|
"pesacheck.org",
|
|
"dubawa.org",
|
|
"namibiafactcheck.org.na",
|
|
"zimfact.org",
|
|
"ghanafact.com",
|
|
"factspace.africa",
|
|
"factcrescendo.com",
|
|
"vishvasnews.com",
|
|
"factcheck.lk",
|
|
"newschecker.in",
|
|
"boomlive.in",
|
|
"digiteye.in",
|
|
"indiatoday.in/fact-check",
|
|
"factcrescendo.com",
|
|
"piyasa.com/fact-check",
|
|
"taiwanese.facts.news",
|
|
"taiwanfactcheck.com",
|
|
"mygopen.com",
|
|
"tfc-taiwan.org.tw",
|
|
"cofacts.tw",
|
|
"rumor.taipei",
|
|
"fact.qq.com",
|
|
"factcheck.afp.com/list",
|
|
"acfta.org",
|
|
"crosscheck.firstdraftnews.org",
|
|
"healthfeedback.org",
|
|
"climatefeedback.org",
|
|
"sciencefeedback.co",
|
|
"factcheck.aap.com.au",
|
|
"emergent.info",
|
|
"hoax-slayer.net",
|
|
"truthorfiction.com",
|
|
"factcheck.media",
|
|
"mediawise.org",
|
|
"thejournal.ie/factcheck",
|
|
"journalistsresource.org",
|
|
"metafact.io",
|
|
"reporterslab.org/fact-checking"
|
|
]
|
|
],
|
|
"news_sites": [
|
|
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
|
|
for domain in [
|
|
"www.thedailystar.net",
|
|
"www.thefinancialexpress.com.bd",
|
|
"www.theindependentbd.com",
|
|
"www.dhakatribune.com",
|
|
"www.newagebd.net",
|
|
"www.observerbd.com",
|
|
"www.daily-sun.com",
|
|
"www.tbsnews.net",
|
|
"www.businesspostbd.com",
|
|
"www.banglanews24.com/english",
|
|
"www.bdnews24.com/english",
|
|
"www.risingbd.com/english",
|
|
"www.dailyindustry.news",
|
|
"www.bangladeshpost.net",
|
|
"www.daily-bangladesh.com/english"
|
|
]
|
|
]
|
|
}
|
|
|
|
async def fetch_fact_checks(
|
|
api_key: str,
|
|
base_url: str,
|
|
query: str,
|
|
site: FactCheckSource
|
|
) -> Dict:
|
|
"""
|
|
Fetch fact checks from a specific site using the Google Fact Check API
|
|
"""
|
|
try:
|
|
if not api_key or not base_url:
|
|
raise ValueError("API key or base URL not configured")
|
|
|
|
params = {
|
|
"key": api_key,
|
|
"query": query,
|
|
"languageCode": "en-US",
|
|
"reviewPublisherSiteFilter": site.domain,
|
|
"pageSize": 10
|
|
}
|
|
|
|
response = requests.get(base_url, params=params)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.RequestException as e:
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail=ErrorResponse(
|
|
detail=f"Error fetching from {site.domain}: {str(e)}",
|
|
error_code="FACT_CHECK_SERVICE_ERROR",
|
|
path="/check-facts"
|
|
).dict()
|
|
)
|
|
except ValueError as e:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=ErrorResponse(
|
|
detail=str(e),
|
|
error_code="CONFIGURATION_ERROR",
|
|
path="/check-facts"
|
|
).dict()
|
|
)
|
|
|
|
def get_all_sources() -> List[FactCheckSource]:
|
|
"""
|
|
Get all sources sorted by priority
|
|
"""
|
|
# all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"]
|
|
all_sources = SOURCES["fact_checkers"]
|
|
return sorted(all_sources, key=lambda x: x.priority) |