fact-checker-backend/app/api/fact_check.py
2024-12-17 17:31:13 +06:00

192 lines
No EOL
7.5 KiB
Python

from fastapi import APIRouter, HTTPException
import httpx
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY
from app.api.scrap_websites import search_websites, SearchRequest
from app.services.openai_client import OpenAIClient
from app.models.fact_check_models import (
FactCheckRequest,
FactCheckResponse,
ErrorResponse,
Source
)
from app.websites.fact_checker_website import get_all_sources
fact_check_router = APIRouter()
openai_client = OpenAIClient(OPENAI_API_KEY)
async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse:
"""Generate a fact check report using OpenAI based on the fact check results."""
try:
base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources.
Rules:
1. Include all source URLs and names in the sources list
2. Keep the explanation focused on verifiable facts
3. Include dates when available
4. Maintain objectivity in the report"""
base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format:
{
"claim": "Write the exact claim being verified",
"verdict": "One of: True/False/Partially True/Unverified",
"confidence": "One of: High/Medium/Low",
"sources": [
{
"url": "Full URL of the source",
"name": "Name of the source organization"
}
],
"evidence": "A concise summary of the key evidence (1-2 sentences)",
"explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)",
"additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)"
}
Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name."""
if "claims" in fact_check_data:
system_prompt = base_system_prompt
user_prompt = f"""Query: {query}
Fact Check Results: {fact_check_data}
{base_user_prompt}
The report should:
1. Include ALL source URLs and organization names
2. Specify verification dates when available
3. Name the fact-checking organizations involved
4. Describe the verification process"""
else:
system_prompt = base_system_prompt
user_prompt = f"""Query: {query}
Fact Check Results: {fact_check_data}
{base_user_prompt}
The report should:
1. Include ALL source URLs and names from both verification_result and sources fields
2. Mention all fact-checking organizations involved
3. Describe the verification process
4. Note any conflicting information between sources"""
response = await openai_client.generate_text_response(
system_prompt=system_prompt,
user_prompt=user_prompt,
max_tokens=1000
)
try:
# First try to parse the response directly
response_data = response["response"]
# Clean up sources before validation
if isinstance(response_data.get('sources'), list):
cleaned_sources = []
for source in response_data['sources']:
if isinstance(source, str):
# Convert string sources to Source objects
url = source if source.startswith('http') else f"https://{source}"
cleaned_sources.append({
"url": url,
"name": source
})
elif isinstance(source, dict):
# Ensure URL has proper scheme
url = source.get('url', '')
if url and not url.startswith('http'):
source['url'] = f"https://{url}"
cleaned_sources.append(source)
response_data['sources'] = cleaned_sources
fact_check_response = FactCheckResponse(**response_data)
return fact_check_response
except Exception as validation_error:
print(f"Response validation error: {str(validation_error)}")
raise HTTPException(
status_code=422,
detail=ErrorResponse(
detail=f"Invalid response format: {str(validation_error)}",
error_code="VALIDATION_ERROR",
path="/check-facts"
).dict()
)
except Exception as e:
print(f"Error generating fact report: {str(e)}")
raise HTTPException(
status_code=500,
detail=ErrorResponse(
detail="Error generating fact report",
error_code="FACT_CHECK_ERROR",
path="/check-facts"
).dict()
)
@fact_check_router.post("/check-facts", response_model=FactCheckResponse)
async def check_facts(request: FactCheckRequest):
"""
Fetch fact check results and generate a comprehensive report.
"""
if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
raise HTTPException(
status_code=500,
detail=ErrorResponse(
detail="Google API key or base URL is not configured",
error_code="CONFIGURATION_ERROR",
path="/check-facts"
).dict()
)
headers = {"Content-Type": "application/json"}
async with httpx.AsyncClient() as client:
# Get fact checker sources from the centralized configuration
fact_checker_sources = get_all_sources()
for source in fact_checker_sources:
params = {
"key": GOOGLE_API_KEY,
"query": request.query,
"languageCode": "en-US",
"reviewPublisherSiteFilter": source.domain,
"pageSize": 10
}
try:
response = await client.get(
GOOGLE_FACT_CHECK_BASE_URL,
params=params,
headers=headers
)
response.raise_for_status()
json_response = response.json()
if json_response.get("claims"):
return await generate_fact_report(request.query, json_response)
except httpx.RequestError as e:
print(f"Error fetching results for site {source.domain}: {str(e)}")
continue
except Exception as e:
print(f"Unexpected error for site {source.domain}: {str(e)}")
continue
try:
search_request = SearchRequest(
search_text=request.query,
source_types=["fact_checkers"]
)
ai_response = await search_websites(search_request)
return await generate_fact_report(request.query, ai_response)
except Exception as e:
print(f"Error in AI fact check: {str(e)}")
raise HTTPException(
status_code=404,
detail=ErrorResponse(
detail="No fact check results found",
error_code="NOT_FOUND",
path="/check-facts"
).dict()
)