fact-checker-backend/app/api/fact_check.py

from fastapi import APIRouter, HTTPException
import json
from datetime import datetime
from typing import Dict, List
import httpx
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
from app.models.fact_check_models import (
    GoogleFactCheckRequest as FactCheckRequest,
    GoogleFactCheckResponse as FactCheckResponse,
    Claim,
    ErrorResponse,
    TokenUsage
)
from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources

fact_check_router = APIRouter()

class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        return super().default(obj)

async def validate_api_key():
    """Validate the Google API key with a test request"""
    async with httpx.AsyncClient() as client:
        try:
            test_url = f"{GOOGLE_FACT_CHECK_BASE_URL}claims:search"
            params = {
                "key": GOOGLE_API_KEY,
                "query": "test",
                "languageCode": "en-US",
                "pageSize": 1
            }
            response = await client.get(test_url, params=params)
            response.raise_for_status()
            return True
        except httpx.HTTPStatusError as e:
            if e.response.status_code == 403:
                raise HTTPException(
                    status_code=503,
                    detail=ErrorResponse(
                        detail="Invalid or expired API key",
                        error_code="INVALID_API_KEY",
                        path="/check-facts"
                    ).dict()
                )
            raise HTTPException(
                status_code=503,
                detail=ErrorResponse(
                    detail=f"API validation failed: {str(e)}",
                    error_code="API_VALIDATION_ERROR",
                    path="/check-facts"
                ).dict()
            )

@fact_check_router.post(
    "/check-facts",
    response_model=FactCheckResponse,
    responses={
        400: {"model": ErrorResponse},
        404: {"model": ErrorResponse},
        500: {"model": ErrorResponse},
        503: {"model": ErrorResponse}
    }
)
async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
    """
    Check facts using multiple fact-checking sources
    """
    all_results = []
    verified_results = []

    # Validate configuration
    if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
        raise HTTPException(
            status_code=500,
            detail=ErrorResponse(
                detail="API configuration is missing",
                error_code="CONFIGURATION_ERROR",
                path="/check-facts"
            ).dict()
        )

    # Validate API key before proceeding
    await validate_api_key()

    # Get all sources in priority order
    all_sources = get_all_sources()
    all_sources_list = []  # To store source URLs
    contexts_used = []     # To store context snippets
    failed_sources = []    # Track failed sources

    for source in all_sources:
        try:
            result = await fetch_fact_checks(
                GOOGLE_API_KEY,
                GOOGLE_FACT_CHECK_BASE_URL,
                request.content,
                source
            )

            if "claims" in result:
                # Validate each claim through Pydantic
                for claim in result["claims"]:
                    validated_claim = Claim(**claim).dict()
                    all_results.append(validated_claim)

                    # Extract source and context information
                    if "claimReview" in validated_claim:
                        review = validated_claim["claimReview"][0]
                        if "publisher" in review and "site" in review["publisher"]:
                            all_sources_list.append(review["publisher"]["site"])
                        if "textualRating" in review:
                            contexts_used.append(review["textualRating"])

        except HTTPException as http_err:
            failed_sources.append({
                "source": source.domain,
                "error": str(http_err.detail)
            })
            continue
        except Exception as e:
            failed_sources.append({
                "source": source.domain,
                "error": str(e)
            })
            continue

    # Return partial results if some sources failed but we have data
    if all_results:
        verification_result = {
            "verdict": "Partial Results Available" if failed_sources else "Complete Results",
            "confidence": "Medium" if failed_sources else "High",
            "evidence": contexts_used,
            "reasoning": "Based on available fact checks",
            "missing_info": f"{len(failed_sources)} sources failed" if failed_sources else None
        }
    else:
        raise HTTPException(
            status_code=404,
            detail=ErrorResponse(
                detail="No fact check results found. Failed sources: " +
                       ", ".join([f"{f['source']}: {f['error']}" for f in failed_sources]),
                error_code="NO_RESULTS_FOUND",
                path="/check-facts"
            ).dict()
        )

    # Create token usage information
    token_usage = TokenUsage(
        prompt_tokens=0,
        completion_tokens=0,
        total_tokens=0
    )

    # Create the response using Pydantic model with all required fields
    response = FactCheckResponse(
        query=request.content,
        total_claims_found=len(all_results),
        results=all_results,
        verification_result=verification_result,
        sources=list(set(all_sources_list)),
        context_used=contexts_used,
        token_usage=token_usage,
        summary={
            "total_sources": len(set(all_sources_list)),
            "fact_checking_sites_queried": len(all_sources),
            "failed_sources": failed_sources
        }
    )

    return response