fact-checker-backend/app/api/fact_check.py
2024-12-15 18:22:04 +06:00

173 lines
No EOL
5.9 KiB
Python

from fastapi import APIRouter, HTTPException
import json
from datetime import datetime
from typing import Dict, List
import httpx
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
from app.models.fact_check_models import (
GoogleFactCheckRequest as FactCheckRequest,
GoogleFactCheckResponse as FactCheckResponse,
Claim,
ErrorResponse,
TokenUsage
)
from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources
fact_check_router = APIRouter()
class CustomJSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
async def validate_api_key():
"""Validate the Google API key with a test request"""
async with httpx.AsyncClient() as client:
try:
test_url = f"{GOOGLE_FACT_CHECK_BASE_URL}claims:search"
params = {
"key": GOOGLE_API_KEY,
"query": "test",
"languageCode": "en-US",
"pageSize": 1
}
response = await client.get(test_url, params=params)
response.raise_for_status()
return True
except httpx.HTTPStatusError as e:
if e.response.status_code == 403:
raise HTTPException(
status_code=503,
detail=ErrorResponse(
detail="Invalid or expired API key",
error_code="INVALID_API_KEY",
path="/check-facts"
).dict()
)
raise HTTPException(
status_code=503,
detail=ErrorResponse(
detail=f"API validation failed: {str(e)}",
error_code="API_VALIDATION_ERROR",
path="/check-facts"
).dict()
)
@fact_check_router.post(
"/check-facts",
response_model=FactCheckResponse,
responses={
400: {"model": ErrorResponse},
404: {"model": ErrorResponse},
500: {"model": ErrorResponse},
503: {"model": ErrorResponse}
}
)
async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
"""
Check facts using multiple fact-checking sources
"""
all_results = []
verified_results = []
# Validate configuration
if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
raise HTTPException(
status_code=500,
detail=ErrorResponse(
detail="API configuration is missing",
error_code="CONFIGURATION_ERROR",
path="/check-facts"
).dict()
)
# Validate API key before proceeding
await validate_api_key()
# Get all sources in priority order
all_sources = get_all_sources()
all_sources_list = [] # To store source URLs
contexts_used = [] # To store context snippets
failed_sources = [] # Track failed sources
for source in all_sources:
try:
result = await fetch_fact_checks(
GOOGLE_API_KEY,
GOOGLE_FACT_CHECK_BASE_URL,
request.content,
source
)
if "claims" in result:
# Validate each claim through Pydantic
for claim in result["claims"]:
validated_claim = Claim(**claim).dict()
all_results.append(validated_claim)
# Extract source and context information
if "claimReview" in validated_claim:
review = validated_claim["claimReview"][0]
if "publisher" in review and "site" in review["publisher"]:
all_sources_list.append(review["publisher"]["site"])
if "textualRating" in review:
contexts_used.append(review["textualRating"])
except HTTPException as http_err:
failed_sources.append({
"source": source.domain,
"error": str(http_err.detail)
})
continue
except Exception as e:
failed_sources.append({
"source": source.domain,
"error": str(e)
})
continue
# Return partial results if some sources failed but we have data
if all_results:
verification_result = {
"verdict": "Partial Results Available" if failed_sources else "Complete Results",
"confidence": "Medium" if failed_sources else "High",
"evidence": contexts_used,
"reasoning": "Based on available fact checks",
"missing_info": f"{len(failed_sources)} sources failed" if failed_sources else None
}
else:
raise HTTPException(
status_code=404,
detail=ErrorResponse(
detail="No fact check results found. Failed sources: " +
", ".join([f"{f['source']}: {f['error']}" for f in failed_sources]),
error_code="NO_RESULTS_FOUND",
path="/check-facts"
).dict()
)
# Create token usage information
token_usage = TokenUsage(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0
)
# Create the response using Pydantic model with all required fields
response = FactCheckResponse(
query=request.content,
total_claims_found=len(all_results),
results=all_results,
verification_result=verification_result,
sources=list(set(all_sources_list)),
context_used=contexts_used,
token_usage=token_usage,
summary={
"total_sources": len(set(all_sources_list)),
"fact_checking_sites_queried": len(all_sources),
"failed_sources": failed_sources
}
)
return response