fact-checker-backend/app/api/fact_check.py
2024-12-14 18:19:37 +06:00

171 lines
No EOL
6 KiB
Python

from fastapi import APIRouter, HTTPException
import json
from datetime import datetime
from typing import Dict, List
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
from app.models.fact_check_models import (
GoogleFactCheckRequest as FactCheckRequest,
GoogleFactCheckResponse as FactCheckResponse,
Claim,
ErrorResponse,
TokenUsage
)
from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources
from app.api.scrap_websites import SearchRequest, search_websites
fact_check_router = APIRouter()
class CustomJSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
@fact_check_router.post(
"/check-facts",
response_model=FactCheckResponse,
responses={
400: {"model": ErrorResponse},
404: {"model": ErrorResponse},
500: {"model": ErrorResponse},
503: {"model": ErrorResponse}
}
)
async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
"""
Check facts using multiple fact-checking sources and fallback to web search
"""
all_results = []
verified_results = []
# Validate configuration
if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
raise HTTPException(
status_code=500,
detail=ErrorResponse(
detail="API configuration is missing",
error_code="CONFIGURATION_ERROR",
path="/check-facts"
).dict()
)
# Get all sources in priority order
all_sources = get_all_sources()
all_sources_list = [] # To store source URLs
contexts_used = [] # To store context snippets
for source in all_sources:
try:
result = await fetch_fact_checks(
GOOGLE_API_KEY,
GOOGLE_FACT_CHECK_BASE_URL,
request.content,
source
)
if "claims" in result:
# Validate each claim through Pydantic
for claim in result["claims"]:
validated_claim = Claim(**claim).dict()
all_results.append(validated_claim)
# Extract source and context information
if "claimReview" in validated_claim:
review = validated_claim["claimReview"][0]
if "publisher" in review and "site" in review["publisher"]:
all_sources_list.append(review["publisher"]["site"])
if "textualRating" in review:
contexts_used.append(review["textualRating"])
except HTTPException:
continue
except Exception as e:
# Log the error but continue with other sources
print(f"Error processing {source.domain}: {str(e)}")
continue
# If no results found, try searching websites
if not all_results:
try:
# Create search request
search_request = SearchRequest(
search_text=request.content,
source_types=["fact_checkers"]
)
# Perform website search
search_response = await search_websites(search_request)
# If AI fact check results are available, use them
if search_response.ai_fact_check_result:
# Create a claim from AI fact check result
ai_claim = {
"text": request.content,
"claimant": "AI Analysis",
"claimDate": datetime.now().isoformat(),
"claimReview": [{
"publisher": {
"name": "AI Fact Checker",
"site": "ai-fact-check"
},
"textualRating": search_response.ai_fact_check_result.verification_result["verdict"],
"title": "AI Fact Check Analysis",
"reviewDate": datetime.now().isoformat(),
"url": ""
}]
}
validated_claim = Claim(**ai_claim).dict()
all_results.append(validated_claim)
# Add sources and contexts
all_sources_list.extend(search_response.results.keys())
if search_response.ai_fact_check_result.verification_result["evidence"]:
contexts_used.extend(search_response.ai_fact_check_result.verification_result["evidence"])
except Exception as e:
print(f"Error during website search: {str(e)}")
# If still no results found after searching websites
if not all_results:
raise HTTPException(
status_code=404,
detail=ErrorResponse(
detail="No fact check results found",
error_code="NO_RESULTS_FOUND",
path="/check-facts"
).dict()
)
# Prepare the verification result
verification_result = {
"verdict": "Insufficient Information", # Default verdict
"confidence": "Low",
"evidence": contexts_used,
"reasoning": "Based on available fact checks and web search results",
"missing_info": "Additional verification may be needed"
}
# Create token usage information
token_usage = TokenUsage(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0
)
# Create the response using Pydantic model with all required fields
response = FactCheckResponse(
query=request.content,
total_claims_found=len(all_results),
results=all_results,
verification_result=verification_result,
sources=list(set(all_sources_list)),
token_usage=token_usage,
summary={
"total_sources": len(set(all_sources_list)),
"fact_checking_sites_queried": len(all_sources)
}
)
return response