291 lines
No EOL
8.7 KiB
Python
291 lines
No EOL
8.7 KiB
Python
from fastapi import APIRouter, HTTPException
|
|
from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
|
|
from typing import Dict, List, Optional, Union
|
|
import requests
|
|
from enum import Enum
|
|
from datetime import datetime
|
|
import json
|
|
from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
|
|
|
|
fact_check_router = APIRouter()
|
|
|
|
class CustomJSONEncoder(json.JSONEncoder):
|
|
def default(self, obj):
|
|
if isinstance(obj, datetime):
|
|
return obj.isoformat()
|
|
return super().default(obj)
|
|
|
|
class ErrorResponse(BaseModel):
|
|
detail: str
|
|
error_code: str = Field(..., description="Unique error code for this type of error")
|
|
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
|
|
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"detail": "Error description",
|
|
"error_code": "ERROR_CODE",
|
|
"timestamp": "2024-12-09T16:49:30.905765",
|
|
"path": "/check-facts"
|
|
}
|
|
})
|
|
|
|
class RequestValidationError(BaseModel):
|
|
loc: List[str]
|
|
msg: str
|
|
type: str
|
|
|
|
class Publisher(BaseModel):
|
|
name: str
|
|
site: Optional[str] = Field(None, description="Publisher's website")
|
|
|
|
@validator('site')
|
|
def validate_site(cls, v):
|
|
if v and not (v.startswith('http://') or v.startswith('https://')):
|
|
return f"https://{v}"
|
|
return v
|
|
|
|
class ClaimReview(BaseModel):
|
|
publisher: Publisher
|
|
url: Optional[HttpUrl] = None
|
|
title: Optional[str] = None
|
|
reviewDate: Optional[str] = None
|
|
textualRating: Optional[str] = None
|
|
languageCode: str = Field(default="en-US")
|
|
|
|
class Claim(BaseModel):
|
|
text: str
|
|
claimant: Optional[str] = None
|
|
claimDate: Optional[str] = None
|
|
claimReview: List[ClaimReview]
|
|
|
|
class FactCheckResponse(BaseModel):
|
|
query: str = Field(..., description="Original query that was fact-checked")
|
|
total_claims_found: int = Field(..., ge=0)
|
|
results: List[Claim] = Field(default_factory=list)
|
|
summary: Dict[str, int] = Field(...)
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"query": "Example claim",
|
|
"total_claims_found": 1,
|
|
"results": [{
|
|
"text": "Example claim text",
|
|
"claimant": "Source name",
|
|
"claimReview": [{
|
|
"publisher": {
|
|
"name": "Fact Checker",
|
|
"site": "factchecker.com"
|
|
},
|
|
"textualRating": "True"
|
|
}]
|
|
}],
|
|
"summary": {
|
|
"total_sources": 1,
|
|
"fact_checking_sites_queried": 10
|
|
}
|
|
}
|
|
})
|
|
|
|
class SourceType(str, Enum):
|
|
FACT_CHECKER = "fact_checker"
|
|
NEWS_SITE = "news_site"
|
|
|
|
class FactCheckSource(BaseModel):
|
|
domain: str
|
|
type: SourceType
|
|
priority: int = Field(default=1, ge=1, le=10)
|
|
|
|
model_config = ConfigDict(json_schema_extra={
|
|
"example": {
|
|
"domain": "factcheck.org",
|
|
"type": "fact_checker",
|
|
"priority": 1
|
|
}
|
|
})
|
|
|
|
# Sources configuration with validation
|
|
SOURCES = {
|
|
"fact_checkers": [
|
|
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
|
|
for domain in [
|
|
"factcheck.org",
|
|
"snopes.com",
|
|
"politifact.com",
|
|
"reuters.com",
|
|
"bbc.com",
|
|
"apnews.com",
|
|
"usatoday.com",
|
|
"nytimes.com",
|
|
"washingtonpost.com",
|
|
"afp.com",
|
|
"fullfact.org",
|
|
"truthorfiction.com",
|
|
"leadstories.com",
|
|
"altnews.in",
|
|
"boomlive.in",
|
|
"en.prothomalo.com"
|
|
]
|
|
],
|
|
"news_sites": [
|
|
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
|
|
for domain in [
|
|
"www.thedailystar.net",
|
|
"www.thefinancialexpress.com.bd",
|
|
"www.theindependentbd.com",
|
|
"www.dhakatribune.com",
|
|
"www.newagebd.net",
|
|
"www.observerbd.com",
|
|
"www.daily-sun.com",
|
|
"www.tbsnews.net",
|
|
"www.businesspostbd.com",
|
|
"www.banglanews24.com/english",
|
|
"www.bdnews24.com/english",
|
|
"www.risingbd.com/english",
|
|
"www.dailyindustry.news",
|
|
"www.bangladeshpost.net",
|
|
"www.daily-bangladesh.com/english"
|
|
]
|
|
]
|
|
}
|
|
|
|
class FactCheckRequest(BaseModel):
|
|
content: str = Field(
|
|
...,
|
|
min_length=10,
|
|
max_length=1000,
|
|
description="The claim to be fact-checked"
|
|
)
|
|
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
|
|
max_results_per_source: int = Field(default=10, ge=1, le=50)
|
|
|
|
@validator('content')
|
|
def validate_content(cls, v):
|
|
if not v.strip():
|
|
raise ValueError("Content cannot be empty or just whitespace")
|
|
return v.strip()
|
|
|
|
async def fetch_fact_checks(
|
|
api_key: str,
|
|
base_url: str,
|
|
query: str,
|
|
site: FactCheckSource
|
|
) -> Dict:
|
|
"""
|
|
Fetch fact checks from a specific site using the Google Fact Check API
|
|
"""
|
|
try:
|
|
if not api_key or not base_url:
|
|
raise ValueError("API key or base URL not configured")
|
|
|
|
params = {
|
|
"key": api_key,
|
|
"query": query,
|
|
"languageCode": "en-US",
|
|
"reviewPublisherSiteFilter": site.domain,
|
|
"pageSize": 10
|
|
}
|
|
|
|
response = requests.get(base_url, params=params)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.RequestException as e:
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail=ErrorResponse(
|
|
detail=f"Error fetching from {site.domain}: {str(e)}",
|
|
error_code="FACT_CHECK_SERVICE_ERROR",
|
|
path="/check-facts"
|
|
).dict()
|
|
)
|
|
except ValueError as e:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=ErrorResponse(
|
|
detail=str(e),
|
|
error_code="CONFIGURATION_ERROR",
|
|
path="/check-facts"
|
|
).dict()
|
|
)
|
|
|
|
@fact_check_router.post(
|
|
"/check-facts",
|
|
response_model=FactCheckResponse,
|
|
responses={
|
|
400: {"model": ErrorResponse},
|
|
404: {"model": ErrorResponse},
|
|
500: {"model": ErrorResponse},
|
|
503: {"model": ErrorResponse}
|
|
}
|
|
)
|
|
async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
|
|
"""
|
|
Check facts using multiple fact-checking sources
|
|
"""
|
|
all_results = []
|
|
|
|
# Validate configuration
|
|
if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=ErrorResponse(
|
|
detail="API configuration is missing",
|
|
error_code="CONFIGURATION_ERROR",
|
|
path="/check-facts"
|
|
).dict()
|
|
)
|
|
|
|
# Check all sources in priority order
|
|
all_sources = (
|
|
SOURCES["fact_checkers"] +
|
|
SOURCES["news_sites"]
|
|
)
|
|
all_sources.sort(key=lambda x: x.priority)
|
|
|
|
for source in all_sources:
|
|
try:
|
|
result = await fetch_fact_checks(
|
|
GOOGLE_FACT_CHECK_API_KEY,
|
|
GOOGLE_FACT_CHECK_BASE_URL,
|
|
request.content,
|
|
source
|
|
)
|
|
|
|
if "claims" in result:
|
|
# Validate each claim through Pydantic
|
|
validated_claims = [
|
|
Claim(**claim).dict()
|
|
for claim in result["claims"]
|
|
]
|
|
all_results.extend(validated_claims)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
# Log the error but continue with other sources
|
|
print(f"Error processing {source.domain}: {str(e)}")
|
|
continue
|
|
|
|
if not all_results:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=ErrorResponse(
|
|
detail="No fact check results found",
|
|
error_code="NO_RESULTS_FOUND",
|
|
path="/check-facts"
|
|
).dict()
|
|
)
|
|
|
|
# Create the response using Pydantic model
|
|
response = FactCheckResponse(
|
|
query=request.content,
|
|
total_claims_found=len(all_results),
|
|
results=all_results,
|
|
summary={
|
|
"total_sources": len(set(claim.get("claimReview", [{}])[0].get("publisher", {}).get("site", "")
|
|
for claim in all_results if claim.get("claimReview"))),
|
|
"fact_checking_sites_queried": len(all_sources)
|
|
}
|
|
)
|
|
|
|
return response |