188 lines
7.3 KiB
Python
188 lines
7.3 KiB
Python
from fastapi import APIRouter, HTTPException
|
|
import httpx
|
|
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY
|
|
from app.api.scrap_websites import search_websites, SearchRequest
|
|
from app.services.openai_client import OpenAIClient
|
|
from app.models.fact_check_models import (
|
|
FactCheckRequest,
|
|
FactCheckResponse,
|
|
ErrorResponse,
|
|
Source,
|
|
)
|
|
from app.websites.fact_checker_website import get_all_sources
|
|
|
|
fact_check_router = APIRouter()
|
|
openai_client = OpenAIClient(OPENAI_API_KEY)
|
|
|
|
|
|
async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse:
|
|
"""Generate a fact check report using OpenAI based on the fact check results."""
|
|
try:
|
|
base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources.
|
|
|
|
Rules:
|
|
1. Include all source URLs and names in the sources list
|
|
2. Keep the explanation focused on verifiable facts
|
|
3. Include dates when available
|
|
4. Maintain objectivity in the report"""
|
|
|
|
base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format:
|
|
{
|
|
"claim": "Write the exact claim being verified",
|
|
"verdict": "One of: True/False/Partially True/Unverified",
|
|
"confidence": "One of: High/Medium/Low",
|
|
"sources": [
|
|
{
|
|
"url": "Full URL of the source",
|
|
"name": "Name of the source organization"
|
|
}
|
|
],
|
|
"evidence": "A concise summary of the key evidence (1-2 sentences)",
|
|
"explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)",
|
|
"additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)"
|
|
}
|
|
|
|
Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name."""
|
|
|
|
if "claims" in fact_check_data:
|
|
system_prompt = base_system_prompt
|
|
user_prompt = f"""Query: {query}
|
|
Fact Check Results: {fact_check_data}
|
|
|
|
{base_user_prompt}
|
|
|
|
The report should:
|
|
1. Include ALL source URLs and organization names
|
|
2. Specify verification dates when available
|
|
3. Name the fact-checking organizations involved
|
|
4. Describe the verification process"""
|
|
|
|
else:
|
|
system_prompt = base_system_prompt
|
|
user_prompt = f"""Query: {query}
|
|
Fact Check Results: {fact_check_data}
|
|
|
|
{base_user_prompt}
|
|
|
|
The report should:
|
|
1. Include ALL source URLs and names from both verification_result and sources fields
|
|
2. Mention all fact-checking organizations involved
|
|
3. Describe the verification process
|
|
4. Note any conflicting information between sources"""
|
|
|
|
response = await openai_client.generate_text_response(
|
|
system_prompt=system_prompt, user_prompt=user_prompt, max_tokens=1000
|
|
)
|
|
|
|
try:
|
|
# First try to parse the response directly
|
|
response_data = response["response"]
|
|
|
|
# Clean up sources before validation
|
|
if isinstance(response_data.get("sources"), list):
|
|
cleaned_sources = []
|
|
for source in response_data["sources"]:
|
|
if isinstance(source, str):
|
|
# Convert string sources to Source objects
|
|
url = (
|
|
source if source.startswith("http") else f"https://{source}"
|
|
)
|
|
cleaned_sources.append({"url": url, "name": source})
|
|
elif isinstance(source, dict):
|
|
# Ensure URL has proper scheme
|
|
url = source.get("url", "")
|
|
if url and not url.startswith("http"):
|
|
source["url"] = f"https://{url}"
|
|
cleaned_sources.append(source)
|
|
response_data["sources"] = cleaned_sources
|
|
|
|
fact_check_response = FactCheckResponse(**response_data)
|
|
return fact_check_response
|
|
|
|
except Exception as validation_error:
|
|
print(f"Response validation error: {str(validation_error)}")
|
|
raise HTTPException(
|
|
status_code=422,
|
|
detail=ErrorResponse(
|
|
detail=f"Invalid response format: {str(validation_error)}",
|
|
error_code="VALIDATION_ERROR",
|
|
path="/check-facts",
|
|
).dict(),
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Error generating fact report: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=ErrorResponse(
|
|
detail="Error generating fact report",
|
|
error_code="FACT_CHECK_ERROR",
|
|
path="/check-facts",
|
|
).dict(),
|
|
)
|
|
|
|
|
|
@fact_check_router.post("/check-facts", response_model=FactCheckResponse)
|
|
async def check_facts(request: FactCheckRequest):
|
|
"""
|
|
Fetch fact check results and generate a comprehensive report.
|
|
"""
|
|
if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=ErrorResponse(
|
|
detail="Google API key or base URL is not configured",
|
|
error_code="CONFIGURATION_ERROR",
|
|
path="/check-facts",
|
|
).dict(),
|
|
)
|
|
|
|
headers = {"Content-Type": "application/json"}
|
|
async with httpx.AsyncClient() as client:
|
|
# Get fact checker sources from the centralized configuration
|
|
fact_checker_sources = get_all_sources()
|
|
|
|
for source in fact_checker_sources:
|
|
params = {
|
|
"key": GOOGLE_API_KEY,
|
|
"query": request.query,
|
|
"languageCode": "en-US",
|
|
"reviewPublisherSiteFilter": source.domain,
|
|
"pageSize": 10,
|
|
}
|
|
|
|
try:
|
|
response = await client.get(
|
|
GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers
|
|
)
|
|
response.raise_for_status()
|
|
json_response = response.json()
|
|
|
|
if json_response.get("claims"):
|
|
return await generate_fact_report(request.query, json_response)
|
|
|
|
except httpx.RequestError as e:
|
|
print(f"Error fetching results for site {source.domain}: {str(e)}")
|
|
continue
|
|
except Exception as e:
|
|
print(f"Unexpected error for site {source.domain}: {str(e)}")
|
|
continue
|
|
|
|
try:
|
|
search_request = SearchRequest(
|
|
search_text=request.query, source_types=["fact_checkers"]
|
|
)
|
|
|
|
ai_response = await search_websites(search_request)
|
|
return await generate_fact_report(request.query, ai_response)
|
|
|
|
except Exception as e:
|
|
print(f"Error in AI fact check: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=ErrorResponse(
|
|
detail="No fact check results found",
|
|
error_code="NOT_FOUND",
|
|
path="/check-facts",
|
|
).dict(),
|
|
)
|