fact-checker-backend/app/api/fact_check.py

from fastapi import APIRouter, HTTPException
import httpx
from typing import Union
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY
from app.api.scrap_websites import search_websites, SearchRequest
from app.services.openai_client import OpenAIClient
from app.models.fact_check_models import (
    FactCheckRequest,
    FactCheckResponse,
    UnverifiedFactCheckResponse,
    ErrorResponse,
    Source,
    VerdictEnum,
    ConfidenceEnum
)
from app.websites.fact_checker_website import get_all_sources

fact_check_router = APIRouter()
openai_client = OpenAIClient(OPENAI_API_KEY)


async def generate_fact_report(query: str, fact_check_data: dict) -> Union[FactCheckResponse, UnverifiedFactCheckResponse]:
    """Generate a fact check report using OpenAI based on the fact check results."""
    try:
        base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources.

Rules:
1. Include all source URLs and names in the sources list
2. Keep the explanation focused on verifiable facts
3. Include dates when available
4. Maintain objectivity in the report
5. If no reliable sources are found, provide a clear explanation why"""

        # If no sources were found, return an unverified response
        if not fact_check_data.get("claims") and (
            not fact_check_data.get("urls_found") or
            fact_check_data.get("status") == "no_results" or
            fact_check_data.get("verification_result", {}).get("no_sources_found")
        ):
            return UnverifiedFactCheckResponse(
                claim=query,
                verdict=VerdictEnum.UNVERIFIED,
                confidence=ConfidenceEnum.LOW,
                sources=[],
                evidence="No fact-checking sources have verified this claim yet.",
                explanation="Our search across reputable fact-checking websites did not find any formal verification of this claim. This doesn't mean the claim is false - just that it hasn't been formally fact-checked yet.",
                additional_context="The claim may be too recent for fact-checkers to have investigated, or it may not have been widely circulated enough to warrant formal fact-checking."
            )

        base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format:
{
    "claim": "Write the exact claim being verified",
    "verdict": "One of: True/False/Partially True/Unverified",
    "confidence": "One of: High/Medium/Low",
    "sources": [
        {
            "url": "Full URL of the source",
            "name": "Name of the source organization"
        }
    ],
    "evidence": "A concise summary of the key evidence (1-2 sentences)",
    "explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)",
    "additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)"
}"""

        if "claims" in fact_check_data:
            system_prompt = base_system_prompt
            user_prompt = f"""Query: {query}
            Fact Check Results: {fact_check_data}

            {base_user_prompt}

            The report should:
            1. Include ALL source URLs and organization names
            2. Specify verification dates when available
            3. Name the fact-checking organizations involved
            4. Describe the verification process"""

        else:
            system_prompt = base_system_prompt
            user_prompt = f"""Query: {query}
            Fact Check Results: {fact_check_data}

            {base_user_prompt}

            The report should:
            1. Include ALL source URLs and names from both verification_result and sources fields
            2. Mention all fact-checking organizations involved
            3. Describe the verification process
            4. Note any conflicting information between sources"""

        response = await openai_client.generate_text_response(
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            max_tokens=1000
        )

        try:
            response_data = response["response"]

            if isinstance(response_data.get("sources"), list):
                cleaned_sources = []
                for source in response_data["sources"]:
                    if isinstance(source, str):
                        url = source if source.startswith("http") else f"https://{source}"
                        cleaned_sources.append({"url": url, "name": source})
                    elif isinstance(source, dict):
                        url = source.get("url", "")
                        if url and not url.startswith("http"):
                            source["url"] = f"https://{url}"
                        cleaned_sources.append(source)
                response_data["sources"] = cleaned_sources

            if response_data["verdict"] == "Unverified" or not response_data.get("sources"):
                return UnverifiedFactCheckResponse(**response_data)
            return FactCheckResponse(**response_data)

        except Exception as validation_error:
            print(f"Response validation error: {str(validation_error)}")
            return UnverifiedFactCheckResponse(
                claim=query,
                verdict=VerdictEnum.UNVERIFIED,
                confidence=ConfidenceEnum.LOW,
                sources=[],
                evidence="An error occurred while processing the fact check results.",
                explanation="The system encountered an error while validating the fact check results.",
                additional_context="This is a technical error and does not reflect on the truthfulness of the claim."
            )

    except Exception as e:
        print(f"Error generating fact report: {str(e)}")
        return UnverifiedFactCheckResponse(
            claim=query,
            verdict=VerdictEnum.UNVERIFIED,
            confidence=ConfidenceEnum.LOW,
            sources=[],
            evidence="An error occurred while generating the fact check report.",
            explanation="The system encountered an unexpected error while processing the fact check request.",
            additional_context="This is a technical error and does not reflect on the truthfulness of the claim."
        )


@fact_check_router.post("/check-facts", response_model=Union[FactCheckResponse, UnverifiedFactCheckResponse])
async def check_facts(request: FactCheckRequest):
    """
    Fetch fact check results and generate a comprehensive report.
    """
    if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
        return UnverifiedFactCheckResponse(
            claim=request.query,
            verdict=VerdictEnum.UNVERIFIED,
            confidence=ConfidenceEnum.LOW,
            sources=[],
            evidence="The fact-checking service is not properly configured.",
            explanation="The system is missing required API configuration for fact-checking services.",
            additional_context="This is a temporary system configuration issue."
        )

    headers = {"Content-Type": "application/json"}
    async with httpx.AsyncClient() as client:
        fact_checker_sources = get_all_sources()

        for source in fact_checker_sources:
            params = {
                "key": GOOGLE_API_KEY,
                "query": request.query,
                "languageCode": "en-US",
                "reviewPublisherSiteFilter": source.domain,
                "pageSize": 10,
            }

            try:
                response = await client.get(
                    GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers
                )
                response.raise_for_status()
                json_response = response.json()

                if json_response.get("claims"):
                    return await generate_fact_report(request.query, json_response)

            except httpx.RequestError as e:
                print(f"Error fetching results for site {source.domain}: {str(e)}")
                continue
            except Exception as e:
                print(f"Unexpected error for site {source.domain}: {str(e)}")
                continue

        try:
            search_request = SearchRequest(
                search_text=request.query,
                source_types=["fact_checkers"]
            )

            ai_response = await search_websites(search_request)
            return await generate_fact_report(request.query, ai_response)

        except Exception as e:
            print(f"Error in AI fact check: {str(e)}")
            return await generate_fact_report(request.query, {
                "status": "no_results",
                "verification_result": {
                    "no_sources_found": True,
                    "reason": str(e)
                }
            })