from fastapi import APIRouter, HTTPException import httpx from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY from app.api.scrap_websites import search_websites, SearchRequest from app.services.openai_client import OpenAIClient from app.models.fact_check_models import ( FactCheckRequest, FactCheckResponse, ErrorResponse, Source ) from app.websites.fact_checker_website import get_all_sources fact_check_router = APIRouter() openai_client = OpenAIClient(OPENAI_API_KEY) async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse: """Generate a fact check report using OpenAI based on the fact check results.""" try: base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources. Rules: 1. Include all source URLs and names in the sources list 2. Keep the explanation focused on verifiable facts 3. Include dates when available 4. Maintain objectivity in the report""" base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format: { "claim": "Write the exact claim being verified", "verdict": "One of: True/False/Partially True/Unverified", "confidence": "One of: High/Medium/Low", "sources": [ { "url": "Full URL of the source", "name": "Name of the source organization" } ], "evidence": "A concise summary of the key evidence (1-2 sentences)", "explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)", "additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)" } Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name.""" if "claims" in fact_check_data: system_prompt = base_system_prompt user_prompt = f"""Query: {query} Fact Check Results: {fact_check_data} {base_user_prompt} The report should: 1. Include ALL source URLs and organization names 2. Specify verification dates when available 3. Name the fact-checking organizations involved 4. Describe the verification process""" else: system_prompt = base_system_prompt user_prompt = f"""Query: {query} Fact Check Results: {fact_check_data} {base_user_prompt} The report should: 1. Include ALL source URLs and names from both verification_result and sources fields 2. Mention all fact-checking organizations involved 3. Describe the verification process 4. Note any conflicting information between sources""" response = await openai_client.generate_text_response( system_prompt=system_prompt, user_prompt=user_prompt, max_tokens=1000 ) try: # First try to parse the response directly response_data = response["response"] # Clean up sources before validation if isinstance(response_data.get('sources'), list): cleaned_sources = [] for source in response_data['sources']: if isinstance(source, str): # Convert string sources to Source objects url = source if source.startswith('http') else f"https://{source}" cleaned_sources.append({ "url": url, "name": source }) elif isinstance(source, dict): # Ensure URL has proper scheme url = source.get('url', '') if url and not url.startswith('http'): source['url'] = f"https://{url}" cleaned_sources.append(source) response_data['sources'] = cleaned_sources fact_check_response = FactCheckResponse(**response_data) return fact_check_response except Exception as validation_error: print(f"Response validation error: {str(validation_error)}") raise HTTPException( status_code=422, detail=ErrorResponse( detail=f"Invalid response format: {str(validation_error)}", error_code="VALIDATION_ERROR", path="/check-facts" ).dict() ) except Exception as e: print(f"Error generating fact report: {str(e)}") raise HTTPException( status_code=500, detail=ErrorResponse( detail="Error generating fact report", error_code="FACT_CHECK_ERROR", path="/check-facts" ).dict() ) @fact_check_router.post("/check-facts", response_model=FactCheckResponse) async def check_facts(request: FactCheckRequest): """ Fetch fact check results and generate a comprehensive report. """ if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: raise HTTPException( status_code=500, detail=ErrorResponse( detail="Google API key or base URL is not configured", error_code="CONFIGURATION_ERROR", path="/check-facts" ).dict() ) headers = {"Content-Type": "application/json"} async with httpx.AsyncClient() as client: # Get fact checker sources from the centralized configuration fact_checker_sources = get_all_sources() for source in fact_checker_sources: params = { "key": GOOGLE_API_KEY, "query": request.query, "languageCode": "en-US", "reviewPublisherSiteFilter": source.domain, "pageSize": 10 } try: response = await client.get( GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers ) response.raise_for_status() json_response = response.json() if json_response.get("claims"): return await generate_fact_report(request.query, json_response) except httpx.RequestError as e: print(f"Error fetching results for site {source.domain}: {str(e)}") continue except Exception as e: print(f"Unexpected error for site {source.domain}: {str(e)}") continue try: search_request = SearchRequest( search_text=request.query, source_types=["fact_checkers"] ) ai_response = await search_websites(search_request) return await generate_fact_report(request.query, ai_response) except Exception as e: print(f"Error in AI fact check: {str(e)}") raise HTTPException( status_code=404, detail=ErrorResponse( detail="No fact check results found", error_code="NOT_FOUND", path="/check-facts" ).dict() )