206 lines
No EOL
8.8 KiB
Python
206 lines
No EOL
8.8 KiB
Python
from fastapi import APIRouter, HTTPException
|
|
import httpx
|
|
from typing import Union
|
|
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY
|
|
from app.api.scrap_websites import search_websites, SearchRequest
|
|
from app.services.openai_client import OpenAIClient
|
|
from app.models.fact_check_models import (
|
|
FactCheckRequest,
|
|
FactCheckResponse,
|
|
UnverifiedFactCheckResponse,
|
|
ErrorResponse,
|
|
Source,
|
|
VerdictEnum,
|
|
ConfidenceEnum
|
|
)
|
|
from app.websites.fact_checker_website import get_all_sources
|
|
|
|
fact_check_router = APIRouter()
|
|
openai_client = OpenAIClient(OPENAI_API_KEY)
|
|
|
|
|
|
async def generate_fact_report(query: str, fact_check_data: dict) -> Union[FactCheckResponse, UnverifiedFactCheckResponse]:
|
|
"""Generate a fact check report using OpenAI based on the fact check results."""
|
|
try:
|
|
base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources.
|
|
|
|
Rules:
|
|
1. Include all source URLs and names in the sources list
|
|
2. Keep the explanation focused on verifiable facts
|
|
3. Include dates when available
|
|
4. Maintain objectivity in the report
|
|
5. If no reliable sources are found, provide a clear explanation why"""
|
|
|
|
# If no sources were found, return an unverified response
|
|
if not fact_check_data.get("claims") and (
|
|
not fact_check_data.get("urls_found") or
|
|
fact_check_data.get("status") == "no_results" or
|
|
fact_check_data.get("verification_result", {}).get("no_sources_found")
|
|
):
|
|
return UnverifiedFactCheckResponse(
|
|
claim=query,
|
|
verdict=VerdictEnum.UNVERIFIED,
|
|
confidence=ConfidenceEnum.LOW,
|
|
sources=[],
|
|
evidence="No fact-checking sources have verified this claim yet.",
|
|
explanation="Our search across reputable fact-checking websites did not find any formal verification of this claim. This doesn't mean the claim is false - just that it hasn't been formally fact-checked yet.",
|
|
additional_context="The claim may be too recent for fact-checkers to have investigated, or it may not have been widely circulated enough to warrant formal fact-checking."
|
|
)
|
|
|
|
base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format:
|
|
{
|
|
"claim": "Write the exact claim being verified",
|
|
"verdict": "One of: True/False/Partially True/Unverified",
|
|
"confidence": "One of: High/Medium/Low",
|
|
"sources": [
|
|
{
|
|
"url": "Full URL of the source",
|
|
"name": "Name of the source organization"
|
|
}
|
|
],
|
|
"evidence": "A concise summary of the key evidence (1-2 sentences)",
|
|
"explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)",
|
|
"additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)"
|
|
}"""
|
|
|
|
if "claims" in fact_check_data:
|
|
system_prompt = base_system_prompt
|
|
user_prompt = f"""Query: {query}
|
|
Fact Check Results: {fact_check_data}
|
|
|
|
{base_user_prompt}
|
|
|
|
The report should:
|
|
1. Include ALL source URLs and organization names
|
|
2. Specify verification dates when available
|
|
3. Name the fact-checking organizations involved
|
|
4. Describe the verification process"""
|
|
|
|
else:
|
|
system_prompt = base_system_prompt
|
|
user_prompt = f"""Query: {query}
|
|
Fact Check Results: {fact_check_data}
|
|
|
|
{base_user_prompt}
|
|
|
|
The report should:
|
|
1. Include ALL source URLs and names from both verification_result and sources fields
|
|
2. Mention all fact-checking organizations involved
|
|
3. Describe the verification process
|
|
4. Note any conflicting information between sources"""
|
|
|
|
response = await openai_client.generate_text_response(
|
|
system_prompt=system_prompt,
|
|
user_prompt=user_prompt,
|
|
max_tokens=1000
|
|
)
|
|
|
|
try:
|
|
response_data = response["response"]
|
|
|
|
if isinstance(response_data.get("sources"), list):
|
|
cleaned_sources = []
|
|
for source in response_data["sources"]:
|
|
if isinstance(source, str):
|
|
url = source if source.startswith("http") else f"https://{source}"
|
|
cleaned_sources.append({"url": url, "name": source})
|
|
elif isinstance(source, dict):
|
|
url = source.get("url", "")
|
|
if url and not url.startswith("http"):
|
|
source["url"] = f"https://{url}"
|
|
cleaned_sources.append(source)
|
|
response_data["sources"] = cleaned_sources
|
|
|
|
if response_data["verdict"] == "Unverified" or not response_data.get("sources"):
|
|
return UnverifiedFactCheckResponse(**response_data)
|
|
return FactCheckResponse(**response_data)
|
|
|
|
except Exception as validation_error:
|
|
print(f"Response validation error: {str(validation_error)}")
|
|
return UnverifiedFactCheckResponse(
|
|
claim=query,
|
|
verdict=VerdictEnum.UNVERIFIED,
|
|
confidence=ConfidenceEnum.LOW,
|
|
sources=[],
|
|
evidence="An error occurred while processing the fact check results.",
|
|
explanation="The system encountered an error while validating the fact check results.",
|
|
additional_context="This is a technical error and does not reflect on the truthfulness of the claim."
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Error generating fact report: {str(e)}")
|
|
return UnverifiedFactCheckResponse(
|
|
claim=query,
|
|
verdict=VerdictEnum.UNVERIFIED,
|
|
confidence=ConfidenceEnum.LOW,
|
|
sources=[],
|
|
evidence="An error occurred while generating the fact check report.",
|
|
explanation="The system encountered an unexpected error while processing the fact check request.",
|
|
additional_context="This is a technical error and does not reflect on the truthfulness of the claim."
|
|
)
|
|
|
|
|
|
@fact_check_router.post("/check-facts", response_model=Union[FactCheckResponse, UnverifiedFactCheckResponse])
|
|
async def check_facts(request: FactCheckRequest):
|
|
"""
|
|
Fetch fact check results and generate a comprehensive report.
|
|
"""
|
|
if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
|
|
return UnverifiedFactCheckResponse(
|
|
claim=request.query,
|
|
verdict=VerdictEnum.UNVERIFIED,
|
|
confidence=ConfidenceEnum.LOW,
|
|
sources=[],
|
|
evidence="The fact-checking service is not properly configured.",
|
|
explanation="The system is missing required API configuration for fact-checking services.",
|
|
additional_context="This is a temporary system configuration issue."
|
|
)
|
|
|
|
headers = {"Content-Type": "application/json"}
|
|
async with httpx.AsyncClient() as client:
|
|
fact_checker_sources = get_all_sources()
|
|
|
|
for source in fact_checker_sources:
|
|
params = {
|
|
"key": GOOGLE_API_KEY,
|
|
"query": request.query,
|
|
"languageCode": "en-US",
|
|
"reviewPublisherSiteFilter": source.domain,
|
|
"pageSize": 10,
|
|
}
|
|
|
|
try:
|
|
response = await client.get(
|
|
GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers
|
|
)
|
|
response.raise_for_status()
|
|
json_response = response.json()
|
|
|
|
if json_response.get("claims"):
|
|
return await generate_fact_report(request.query, json_response)
|
|
|
|
except httpx.RequestError as e:
|
|
print(f"Error fetching results for site {source.domain}: {str(e)}")
|
|
continue
|
|
except Exception as e:
|
|
print(f"Unexpected error for site {source.domain}: {str(e)}")
|
|
continue
|
|
|
|
try:
|
|
search_request = SearchRequest(
|
|
search_text=request.query,
|
|
source_types=["fact_checkers"]
|
|
)
|
|
|
|
ai_response = await search_websites(search_request)
|
|
return await generate_fact_report(request.query, ai_response)
|
|
|
|
except Exception as e:
|
|
print(f"Error in AI fact check: {str(e)}")
|
|
return await generate_fact_report(request.query, {
|
|
"status": "no_results",
|
|
"verification_result": {
|
|
"no_sources_found": True,
|
|
"reason": str(e)
|
|
}
|
|
}) |