fact-checker-backend/app/api/fact_check.py

from fastapi import APIRouter, HTTPException
import httpx
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY
from app.api.scrap_websites import search_websites, SearchRequest
from app.services.openai_client import OpenAIClient
from app.models.fact_check_models import (
    FactCheckRequest,
    FactCheckResponse,
    ErrorResponse,
    Source,
)
from app.websites.fact_checker_website import get_all_sources

fact_check_router = APIRouter()
openai_client = OpenAIClient(OPENAI_API_KEY)


async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse:
    """Generate a fact check report using OpenAI based on the fact check results."""
    try:
        base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources.

Rules:
1. Include all source URLs and names in the sources list
2. Keep the explanation focused on verifiable facts
3. Include dates when available
4. Maintain objectivity in the report"""

        base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format:
{
    "claim": "Write the exact claim being verified",
    "verdict": "One of: True/False/Partially True/Unverified",
    "confidence": "One of: High/Medium/Low",
    "sources": [
        {
            "url": "Full URL of the source",
            "name": "Name of the source organization"
        }
    ],
    "evidence": "A concise summary of the key evidence (1-2 sentences)",
    "explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)",
    "additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)"
}

Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name."""

        if "claims" in fact_check_data:
            system_prompt = base_system_prompt
            user_prompt = f"""Query: {query}
            Fact Check Results: {fact_check_data}

            {base_user_prompt}

            The report should:
            1. Include ALL source URLs and organization names
            2. Specify verification dates when available
            3. Name the fact-checking organizations involved
            4. Describe the verification process"""

        else:
            system_prompt = base_system_prompt
            user_prompt = f"""Query: {query}
            Fact Check Results: {fact_check_data}

            {base_user_prompt}

            The report should:
            1. Include ALL source URLs and names from both verification_result and sources fields
            2. Mention all fact-checking organizations involved
            3. Describe the verification process
            4. Note any conflicting information between sources"""

        response = await openai_client.generate_text_response(
            system_prompt=system_prompt, user_prompt=user_prompt, max_tokens=1000
        )

        try:
            # First try to parse the response directly
            response_data = response["response"]

            # Clean up sources before validation
            if isinstance(response_data.get("sources"), list):
                cleaned_sources = []
                for source in response_data["sources"]:
                    if isinstance(source, str):
                        # Convert string sources to Source objects
                        url = (
                            source if source.startswith("http") else f"https://{source}"
                        )
                        cleaned_sources.append({"url": url, "name": source})
                    elif isinstance(source, dict):
                        # Ensure URL has proper scheme
                        url = source.get("url", "")
                        if url and not url.startswith("http"):
                            source["url"] = f"https://{url}"
                        cleaned_sources.append(source)
                response_data["sources"] = cleaned_sources

            fact_check_response = FactCheckResponse(**response_data)
            return fact_check_response

        except Exception as validation_error:
            print(f"Response validation error: {str(validation_error)}")
            raise HTTPException(
                status_code=422,
                detail=ErrorResponse(
                    detail=f"Invalid response format: {str(validation_error)}",
                    error_code="VALIDATION_ERROR",
                    path="/check-facts",
                ).dict(),
            )

    except Exception as e:
        print(f"Error generating fact report: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=ErrorResponse(
                detail="Error generating fact report",
                error_code="FACT_CHECK_ERROR",
                path="/check-facts",
            ).dict(),
        )


@fact_check_router.post("/check-facts", response_model=FactCheckResponse)
async def check_facts(request: FactCheckRequest):
    """
    Fetch fact check results and generate a comprehensive report.
    """
    if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
        raise HTTPException(
            status_code=500,
            detail=ErrorResponse(
                detail="Google API key or base URL is not configured",
                error_code="CONFIGURATION_ERROR",
                path="/check-facts",
            ).dict(),
        )

    headers = {"Content-Type": "application/json"}
    async with httpx.AsyncClient() as client:
        # Get fact checker sources from the centralized configuration
        fact_checker_sources = get_all_sources()

        for source in fact_checker_sources:
            params = {
                "key": GOOGLE_API_KEY,
                "query": request.query,
                "languageCode": "en-US",
                "reviewPublisherSiteFilter": source.domain,
                "pageSize": 10,
            }

            try:
                response = await client.get(
                    GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers
                )
                response.raise_for_status()
                json_response = response.json()

                if json_response.get("claims"):
                    return await generate_fact_report(request.query, json_response)

            except httpx.RequestError as e:
                print(f"Error fetching results for site {source.domain}: {str(e)}")
                continue
            except Exception as e:
                print(f"Unexpected error for site {source.domain}: {str(e)}")
                continue

        try:
            search_request = SearchRequest(
                search_text=request.query, source_types=["fact_checkers"]
            )

            ai_response = await search_websites(search_request)
            return await generate_fact_report(request.query, ai_response)

        except Exception as e:
            print(f"Error in AI fact check: {str(e)}")
            raise HTTPException(
                status_code=404,
                detail=ErrorResponse(
                    detail="No fact check results found",
                    error_code="NOT_FOUND",
                    path="/check-facts",
                ).dict(),
            )