fact-checker-backend/app/api/fact_check.py

from fastapi import APIRouter, HTTPException
import json
from datetime import datetime
from typing import Dict, List

from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
from app.models.fact_check_models import (
    GoogleFactCheckRequest as FactCheckRequest,
    GoogleFactCheckResponse as FactCheckResponse,
    Claim,
    ErrorResponse,
    TokenUsage
)
from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources
from app.api.scrap_websites import SearchRequest, search_websites

fact_check_router = APIRouter()

class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        return super().default(obj)

@fact_check_router.post(
    "/check-facts",
    response_model=FactCheckResponse,
    responses={
        400: {"model": ErrorResponse},
        404: {"model": ErrorResponse},
        500: {"model": ErrorResponse},
        503: {"model": ErrorResponse}
    }
)
async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
    """
    Check facts using multiple fact-checking sources and fallback to web search
    """
    all_results = []
    verified_results = []

    # Validate configuration
    if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
        raise HTTPException(
            status_code=500,
            detail=ErrorResponse(
                detail="API configuration is missing",
                error_code="CONFIGURATION_ERROR",
                path="/check-facts"
            ).dict()
        )

    # Get all sources in priority order
    all_sources = get_all_sources()
    all_sources_list = []  # To store source URLs
    contexts_used = []     # To store context snippets

    for source in all_sources:
        try:
            result = await fetch_fact_checks(
                GOOGLE_API_KEY,
                GOOGLE_FACT_CHECK_BASE_URL,
                request.content,
                source
            )

            if "claims" in result:
                # Validate each claim through Pydantic
                for claim in result["claims"]:
                    validated_claim = Claim(**claim).dict()
                    all_results.append(validated_claim)

                    # Extract source and context information
                    if "claimReview" in validated_claim:
                        review = validated_claim["claimReview"][0]
                        if "publisher" in review and "site" in review["publisher"]:
                            all_sources_list.append(review["publisher"]["site"])
                        if "textualRating" in review:
                            contexts_used.append(review["textualRating"])

        except HTTPException:
            continue
        except Exception as e:
            # Log the error but continue with other sources
            print(f"Error processing {source.domain}: {str(e)}")
            continue

    # If no results found, try searching websites
    if not all_results:
        try:
            # Create search request
            search_request = SearchRequest(
                search_text=request.content,
                source_types=["fact_checkers"]
            )

            # Perform website search
            search_response = await search_websites(search_request)

            # If AI fact check results are available, use them
            if search_response.ai_fact_check_result:
                # Create a claim from AI fact check result
                ai_claim = {
                    "text": request.content,
                    "claimant": "AI Analysis",
                    "claimDate": datetime.now().isoformat(),
                    "claimReview": [{
                        "publisher": {
                            "name": "AI Fact Checker",
                            "site": "ai-fact-check"
                        },
                        "textualRating": search_response.ai_fact_check_result.verification_result["verdict"],
                        "title": "AI Fact Check Analysis",
                        "reviewDate": datetime.now().isoformat(),
                        "url": ""
                    }]
                }

                validated_claim = Claim(**ai_claim).dict()
                all_results.append(validated_claim)

                # Add sources and contexts
                all_sources_list.extend(search_response.results.keys())
                if search_response.ai_fact_check_result.verification_result["evidence"]:
                    contexts_used.extend(search_response.ai_fact_check_result.verification_result["evidence"])

        except Exception as e:
            print(f"Error during website search: {str(e)}")

    # If still no results found after searching websites
    if not all_results:
        raise HTTPException(
            status_code=404,
            detail=ErrorResponse(
                detail="No fact check results found",
                error_code="NO_RESULTS_FOUND",
                path="/check-facts"
            ).dict()
        )

    # Prepare the verification result
    verification_result = {
        "verdict": "Insufficient Information",  # Default verdict
        "confidence": "Low",
        "evidence": contexts_used,
        "reasoning": "Based on available fact checks and web search results",
        "missing_info": "Additional verification may be needed"
    }

    # Create token usage information
    token_usage = TokenUsage(
        prompt_tokens=0,
        completion_tokens=0,
        total_tokens=0
    )

    # Create the response using Pydantic model with all required fields
    response = FactCheckResponse(
        query=request.content,
        total_claims_found=len(all_results),
        results=all_results,
        verification_result=verification_result,
        sources=list(set(all_sources_list)),
        token_usage=token_usage,
        summary={
            "total_sources": len(set(all_sources_list)),
            "fact_checking_sites_queried": len(all_sources)
        }
    )

    return response