base code added updated

This commit is contained in:
Utsho Dey 2024-12-15 18:22:04 +06:00
parent d5b977e1d4
commit 790d58402a
7 changed files with 390 additions and 311 deletions

View file

@ -2,7 +2,7 @@ from fastapi import APIRouter, HTTPException
import json import json
from datetime import datetime from datetime import datetime
from typing import Dict, List from typing import Dict, List
import httpx
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
from app.models.fact_check_models import ( from app.models.fact_check_models import (
GoogleFactCheckRequest as FactCheckRequest, GoogleFactCheckRequest as FactCheckRequest,
@ -12,7 +12,6 @@ from app.models.fact_check_models import (
TokenUsage TokenUsage
) )
from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources
from app.api.scrap_websites import SearchRequest, search_websites
fact_check_router = APIRouter() fact_check_router = APIRouter()
@ -22,6 +21,39 @@ class CustomJSONEncoder(json.JSONEncoder):
return obj.isoformat() return obj.isoformat()
return super().default(obj) return super().default(obj)
async def validate_api_key():
"""Validate the Google API key with a test request"""
async with httpx.AsyncClient() as client:
try:
test_url = f"{GOOGLE_FACT_CHECK_BASE_URL}claims:search"
params = {
"key": GOOGLE_API_KEY,
"query": "test",
"languageCode": "en-US",
"pageSize": 1
}
response = await client.get(test_url, params=params)
response.raise_for_status()
return True
except httpx.HTTPStatusError as e:
if e.response.status_code == 403:
raise HTTPException(
status_code=503,
detail=ErrorResponse(
detail="Invalid or expired API key",
error_code="INVALID_API_KEY",
path="/check-facts"
).dict()
)
raise HTTPException(
status_code=503,
detail=ErrorResponse(
detail=f"API validation failed: {str(e)}",
error_code="API_VALIDATION_ERROR",
path="/check-facts"
).dict()
)
@fact_check_router.post( @fact_check_router.post(
"/check-facts", "/check-facts",
response_model=FactCheckResponse, response_model=FactCheckResponse,
@ -34,7 +66,7 @@ class CustomJSONEncoder(json.JSONEncoder):
) )
async def check_facts(request: FactCheckRequest) -> FactCheckResponse: async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
""" """
Check facts using multiple fact-checking sources and fallback to web search Check facts using multiple fact-checking sources
""" """
all_results = [] all_results = []
verified_results = [] verified_results = []
@ -50,10 +82,14 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
).dict() ).dict()
) )
# Validate API key before proceeding
await validate_api_key()
# Get all sources in priority order # Get all sources in priority order
all_sources = get_all_sources() all_sources = get_all_sources()
all_sources_list = [] # To store source URLs all_sources_list = [] # To store source URLs
contexts_used = [] # To store context snippets contexts_used = [] # To store context snippets
failed_sources = [] # Track failed sources
for source in all_sources: for source in all_sources:
try: try:
@ -78,75 +114,39 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
if "textualRating" in review: if "textualRating" in review:
contexts_used.append(review["textualRating"]) contexts_used.append(review["textualRating"])
except HTTPException: except HTTPException as http_err:
failed_sources.append({
"source": source.domain,
"error": str(http_err.detail)
})
continue continue
except Exception as e: except Exception as e:
# Log the error but continue with other sources failed_sources.append({
print(f"Error processing {source.domain}: {str(e)}") "source": source.domain,
"error": str(e)
})
continue continue
# If no results found, try searching websites # Return partial results if some sources failed but we have data
if not all_results: if all_results:
try: verification_result = {
# Create search request "verdict": "Partial Results Available" if failed_sources else "Complete Results",
search_request = SearchRequest( "confidence": "Medium" if failed_sources else "High",
search_text=request.content, "evidence": contexts_used,
source_types=["fact_checkers"] "reasoning": "Based on available fact checks",
) "missing_info": f"{len(failed_sources)} sources failed" if failed_sources else None
# Perform website search
search_response = await search_websites(search_request)
# If AI fact check results are available, use them
if search_response.ai_fact_check_result:
# Create a claim from AI fact check result
ai_claim = {
"text": request.content,
"claimant": "AI Analysis",
"claimDate": datetime.now().isoformat(),
"claimReview": [{
"publisher": {
"name": "AI Fact Checker",
"site": "ai-fact-check"
},
"textualRating": search_response.ai_fact_check_result.verification_result["verdict"],
"title": "AI Fact Check Analysis",
"reviewDate": datetime.now().isoformat(),
"url": ""
}]
} }
else:
validated_claim = Claim(**ai_claim).dict()
all_results.append(validated_claim)
# Add sources and contexts
all_sources_list.extend(search_response.results.keys())
if search_response.ai_fact_check_result.verification_result["evidence"]:
contexts_used.extend(search_response.ai_fact_check_result.verification_result["evidence"])
except Exception as e:
print(f"Error during website search: {str(e)}")
# If still no results found after searching websites
if not all_results:
raise HTTPException( raise HTTPException(
status_code=404, status_code=404,
detail=ErrorResponse( detail=ErrorResponse(
detail="No fact check results found", detail="No fact check results found. Failed sources: " +
", ".join([f"{f['source']}: {f['error']}" for f in failed_sources]),
error_code="NO_RESULTS_FOUND", error_code="NO_RESULTS_FOUND",
path="/check-facts" path="/check-facts"
).dict() ).dict()
) )
# Prepare the verification result
verification_result = {
"verdict": "Insufficient Information", # Default verdict
"confidence": "Low",
"evidence": contexts_used,
"reasoning": "Based on available fact checks and web search results",
"missing_info": "Additional verification may be needed"
}
# Create token usage information # Create token usage information
token_usage = TokenUsage( token_usage = TokenUsage(
prompt_tokens=0, prompt_tokens=0,
@ -161,10 +161,12 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
results=all_results, results=all_results,
verification_result=verification_result, verification_result=verification_result,
sources=list(set(all_sources_list)), sources=list(set(all_sources_list)),
context_used=contexts_used,
token_usage=token_usage, token_usage=token_usage,
summary={ summary={
"total_sources": len(set(all_sources_list)), "total_sources": len(set(all_sources_list)),
"fact_checking_sites_queried": len(all_sources) "fact_checking_sites_queried": len(all_sources),
"failed_sources": failed_sources
} }
) )

View file

@ -1,309 +1,342 @@
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from pydantic import BaseModel import httpx
from typing import List, Dict, Optional
from urllib.parse import urlencode, urlparse
import urllib.parse
import numpy as np
from time import sleep
import logging import logging
import requests from urllib.parse import urlparse
from bs4 import BeautifulSoup import json
import re
from app.services.openai_client import OpenAIClient from app.services.openai_client import OpenAIClient
from app.config import OPENAI_API_KEY from app.config import OPENAI_API_KEY, GOOGLE_API_KEY, GOOGLE_ENGINE_ID
from app.websites.fact_checker_website import SOURCES, get_all_sources from app.websites.fact_checker_website import SOURCES, get_all_sources
from app.api.ai_fact_check import ai_fact_check from app.api.ai_fact_check import ai_fact_check
from typing import List, Dict, Optional
from pydantic import BaseModel
from app.models.fact_check_models import ( from app.models.fact_check_models import (
AIFactCheckRequest, AIFactCheckRequest,
AIFactCheckResponse, FactCheckSource,
VerificationResult, SourceType
TokenUsage
) )
# Define Pydantic models
class Publisher(BaseModel):
name: str
site: str
class ClaimReview(BaseModel):
publisher: Publisher
textualRating: str
class Claim(BaseModel):
claimReview: List[ClaimReview]
claimant: str
text: str
class Summary(BaseModel):
fact_checking_sites_queried: int
total_sources: int
class VerificationResult(BaseModel):
verdict: str
confidence: str
evidence: List[str]
reasoning: str
fact_check_type: str
class SearchRequest(BaseModel):
search_text: str
source_types: List[str]
class EnhancedFactCheckResponse(BaseModel):
query: str
results: List[Dict]
sources: List
summary: Summary
token_usage: Dict[str, int]
total_claims_found: int
verification_result: VerificationResult
# Configure logging # Configure logging
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO, # Changed back to INFO from DEBUG
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
scrap_websites_router = APIRouter() scrap_websites_router = APIRouter()
# Configuration for scraping # Constants
MAX_RETRIES = 2 RESULTS_PER_PAGE = 10
RETRY_DELAY = 2 MAX_PAGES = 5
MAX_URLS_PER_DOMAIN = 5
GOOGLE_SEARCH_URL = "https://www.googleapis.com/customsearch/v1"
class SearchRequest(BaseModel): def get_domain_from_url(url: str) -> str:
search_text: str """Extract domain from URL with improved handling."""
source_types: List[str] = ["fact_checkers"]
class UrlSimilarityInfo(BaseModel):
url: str
similarity: float
extracted_text: str
class SearchResponse(BaseModel):
results: Dict[str, List[str]]
error_messages: Dict[str, str]
ai_fact_check_result: Optional[Dict] = None
def extract_url_text(url: str) -> str:
"""Extract and process meaningful text from URL path with improved cleaning"""
logger.debug(f"Extracting text from URL: {url}")
try: try:
parsed = urllib.parse.urlparse(url) parsed = urlparse(url)
path = parsed.path domain = parsed.netloc.lower()
path = path.replace('.html', '').replace('/index', '').replace('.php', '') # Remove 'www.' if present
segments = [seg for seg in path.split('/') if seg] if domain.startswith('www.'):
cleaned_segments = [] domain = domain[4:]
for segment in segments: return domain
segment = segment.replace('-', ' ').replace('_', ' ') except Exception as e:
if not (segment.replace(' ', '').isdigit() or logger.error(f"Error extracting domain from URL {url}: {str(e)}")
all(part.isdigit() for part in segment.split() if part)): return ""
cleaned_segments.append(segment)
common_words = { def is_valid_source_domain(domain: str, sources: List[FactCheckSource]) -> bool:
'www', 'live', 'news', 'intl', 'index', 'world', 'us', 'uk', """Check if domain matches any source with improved matching logic."""
'updates', 'update', 'latest', 'breaking', 'new', 'article' if not domain:
return False
domain = domain.lower()
if domain.startswith('www.'):
domain = domain[4:]
for source in sources:
source_domain = source.domain.lower()
if source_domain.startswith('www.'):
source_domain = source_domain[4:]
# Check exact match
if domain == source_domain:
logger.debug(f"Exact domain match found: {domain} = {source_domain}")
return True
# Check if domain ends with source domain
if domain.endswith('.' + source_domain):
logger.debug(f"Subdomain match found: {domain} ends with {source_domain}")
return True
logger.debug(f"No match found for domain: {domain}")
return False
async def build_enhanced_search_query(query: str, sources: List[FactCheckSource]) -> str:
"""Build search query with site restrictions."""
site_queries = [f"site:{source.domain}" for source in sources]
site_restriction = " OR ".join(site_queries)
enhanced_query = f"({query}) ({site_restriction})"
logger.debug(f"Enhanced search query: {enhanced_query}")
return enhanced_query
async def google_custom_search(query: str, sources: List[FactCheckSource], page: int = 1) -> Optional[Dict]:
"""Perform Google Custom Search with enhanced query."""
enhanced_query = await build_enhanced_search_query(query, sources)
start_index = ((page - 1) * RESULTS_PER_PAGE) + 1
params = {
"key": GOOGLE_API_KEY,
"cx": GOOGLE_ENGINE_ID,
"q": enhanced_query,
"num": RESULTS_PER_PAGE,
"start": start_index
} }
text = ' '.join(cleaned_segments) async with httpx.AsyncClient(timeout=30.0) as client:
words = [word.lower() for word in text.split()
if word.lower() not in common_words and len(word) > 1]
result = ' '.join(words)
logger.debug(f"Extracted text: {result}")
return result
except Exception as e:
logger.error(f"Error extracting text from URL {url}: {str(e)}")
return ''
def extract_search_results(html_content):
"""Extract URLs using multiple selectors and patterns"""
soup = BeautifulSoup(html_content, 'html.parser')
urls = set() # Using set to avoid duplicates
# Multiple CSS selectors to try
selectors = [
'div.g div.yuRUbf > a', # Main result links
'div.g a.l', # Alternative link format
'div.rc a', # Another possible format
'div[class*="g"] > div > div > div > a', # Broader match
'a[href^="http"]' # Any http link
]
for selector in selectors:
try: try:
elements = soup.select(selector) logger.info(f"Making API request to Google Custom Search with params: {params}")
for element in elements: response = await client.get(GOOGLE_SEARCH_URL, params=params)
url = element.get('href') response.raise_for_status()
if url and url.startswith('http') and not url.startswith('https://www.google.com'):
urls.add(url)
except Exception as e:
logger.debug(f"Error with selector {selector}: {str(e)}")
# Also try finding URLs in the raw HTML using regex data = response.json()
url_pattern = r'href="(https?://[^"]+)"'
raw_urls = re.findall(url_pattern, html_content)
for url in raw_urls:
if not url.startswith('https://www.google.com'):
urls.add(url)
return list(urls) search_info = data.get('searchInformation', {})
logger.info(f"Search info: Total results: {search_info.get('totalResults', 0)}, "
f"Time taken: {search_info.get('searchTime', 0)}s")
def google_search_scraper(search_text: str, site_domain: str, retry_count: int = 0) -> List[str]: if 'error' in data:
"""Scrape Google search results with multiple query formats""" error_details = data['error']
logger.info(f"Searching for '{search_text}' on domain: {site_domain}") logger.error(f"API Error: {error_details}")
raise HTTPException(
headers = { status_code=response.status_code,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', detail=f"Google API Error: {error_details.get('message')}"
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://www.google.com/',
'DNT': '1'
}
# Try different query formats
query_formats = [
f"{search_text} site:{site_domain}",
f"site:{site_domain} {search_text}",
f"\"{search_text}\" site:{site_domain}"
]
all_urls = set()
for query in query_formats:
try:
google_url = f"https://www.google.com/search?q={urlencode({'q': query})}"
logger.debug(f"Trying query format: {query}")
response = requests.get(google_url, headers=headers)
if response.status_code == 200:
urls = extract_search_results(response.text)
domain_urls = [url for url in urls if site_domain in urlparse(url).netloc]
all_urls.update(domain_urls)
else:
logger.warning(f"Received status code {response.status_code} for query format: {query}")
sleep(2) # Delay between requests
except Exception as e:
logger.error(f"Error processing query format '{query}': {str(e)}")
if retry_count < MAX_RETRIES:
sleep(RETRY_DELAY)
return google_search_scraper(search_text, site_domain, retry_count + 1)
valid_urls = list(all_urls)
logger.info(f"Found {len(valid_urls)} unique URLs for domain: {site_domain}")
return valid_urls[:5] # Return up to 5 URLs
def calculate_similarity(query_embedding: List[float], url_embedding: List[float]) -> float:
"""Calculate cosine similarity between two embeddings"""
query_array = np.array(query_embedding)
url_array = np.array(url_embedding)
similarity = np.dot(url_array, query_array) / (
np.linalg.norm(url_array) * np.linalg.norm(query_array)
) )
return float(similarity)
return data
@scrap_websites_router.post("/search", response_model=SearchResponse) except Exception as e:
logger.error(f"Search error: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")
async def analyze_fact_check_results(openai_client: OpenAIClient, original_response: Dict) -> Dict:
"""Analyze fact check results using OpenAI to generate a consolidated verdict."""
# Extract verification results from sources
verification_results = []
for url, result in original_response.get('verification_result', {}).items():
verification_results.append(f"""
Source: {url}
Verdict: {result.get('verdict')}
Confidence: {result.get('confidence')}
Evidence: {result.get('evidence')}
Reasoning: {result.get('reasoning')}
""")
system_prompt = """You are a professional fact-checking analyzer. Your task is to analyze multiple fact-checking results
and provide a consolidated verdict. Respond with a valid JSON object containing your analysis."""
user_prompt = f"""
Analyze these fact-checking results and provide a final verdict.
Query: {original_response.get('query', '')}
Fact Check Results:
{'\n'.join(verification_results)}"""
try:
logger.info("Generating AI analysis of fact check results")
response = await openai_client.generate_text_response(
system_prompt=system_prompt,
user_prompt=user_prompt,
max_tokens=2000
)
# Create the enhanced result structure
enhanced_result = {
"query": original_response.get('query', ''),
"results": [
{
"claimReview": [
{
"publisher": {
"name": source,
"site": source
},
"textualRating": result.get('verdict', '')
} for source in original_response.get('sources', [])
],
"claimant": "source",
"text": original_response.get('query', '')
}
],
"sources": original_response.get('sources', []),
"summary": {
"fact_checking_sites_queried": len(original_response.get('sources', [])),
"total_sources": len(original_response.get('verification_result', {}))
},
"verification_result": {
"verdict": next(iter(original_response.get('verification_result', {}).values()), {}).get('verdict', ''),
"confidence": next(iter(original_response.get('verification_result', {}).values()), {}).get('confidence', ''),
"evidence": [next(iter(original_response.get('verification_result', {}).values()), {}).get('evidence', '')],
"reasoning": next(iter(original_response.get('verification_result', {}).values()), {}).get('reasoning', ''),
"fact_check_type": "ai fact checker"
},
"token_usage": original_response.get('token_usage', {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
})
}
enhanced_result["total_claims_found"] = len(enhanced_result.get("results", []))
logger.info("Successfully generated AI analysis")
return enhanced_result
except Exception as e:
logger.error(f"Error in OpenAI analysis: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error in fact check analysis: {str(e)}")
@scrap_websites_router.post("/search", response_model=EnhancedFactCheckResponse)
async def search_websites(request: SearchRequest): async def search_websites(request: SearchRequest):
logger.info(f"Starting search with query: {request.search_text}") logger.info(f"Starting search with query: {request.search_text}")
logger.info(f"Source types requested: {request.source_types}") logger.info(f"Source types requested: {request.source_types}")
results = {} # Get sources for requested types
error_messages = {} selected_sources = []
# Initialize OpenAI client
logger.debug("Initializing OpenAI client")
openai_client = OpenAIClient(OPENAI_API_KEY)
# Get domains based on requested source types
domains = []
for source_type in request.source_types: for source_type in request.source_types:
if source_type in SOURCES: if source_type in SOURCES:
domains.extend([source.domain for source in SOURCES[source_type]]) selected_sources.extend(SOURCES[source_type])
if not domains: if not selected_sources:
logger.warning("No valid source types provided. Using all available domains.") logger.warning("No valid source types provided. Using all available sources.")
domains = [source.domain for source in get_all_sources()] selected_sources = get_all_sources()
logger.info(f"Processing {len(domains)} domains") logger.info(f"Selected sources: {[source.domain for source in selected_sources]}")
# Enhance search text with key terms # Initialize collections for URLs
search_context = request.search_text all_urls = []
logger.debug("Getting query embedding from OpenAI") domain_results = {}
query_embedding = openai_client.get_embeddings([search_context])[0]
# Higher similarity threshold for better filtering
SIMILARITY_THRESHOLD = 0.75
MAX_URLS_PER_DOMAIN = 2 # Adjusted to ensure total stays under 5
TOTAL_MAX_URLS = 5 # Maximum URLs allowed for AIFactCheckRequest
total_urls_collected = 0
for domain in domains[:3]: # Limit to 3 domains for testing
if total_urls_collected >= TOTAL_MAX_URLS:
break
logger.info(f"Processing domain: {domain}")
try: try:
urls = google_search_scraper(request.search_text, domain) # Search and collect URLs
valid_urls = [] for page in range(1, MAX_PAGES + 1):
if len(all_urls) >= 50:
logger.debug(f"Found {len(urls)} URLs for domain {domain}") logger.info("Reached maximum URL limit of 50")
for url in urls:
if len(valid_urls) >= MAX_URLS_PER_DOMAIN or total_urls_collected >= TOTAL_MAX_URLS:
break break
url_text = extract_url_text(url) logger.info(f"Fetching page {page} of search results")
search_response = await google_custom_search(request.search_text, selected_sources, page)
if not url_text: if not search_response or not search_response.get("items"):
logger.debug(f"No meaningful text extracted from URL: {url}") logger.warning(f"No results found on page {page}")
break
for item in search_response.get("items", []):
url = item.get("link")
if not url:
continue continue
logger.debug("Getting URL embedding from OpenAI") domain = get_domain_from_url(url)
url_embedding = openai_client.get_embeddings([url_text])[0] logger.debug(f"Processing URL: {url} with domain: {domain}")
similarity = calculate_similarity(query_embedding, url_embedding)
logger.debug(f"Similarity score for {url}: {similarity}") if is_valid_source_domain(domain, selected_sources):
if domain not in domain_results:
domain_results[domain] = []
if similarity >= SIMILARITY_THRESHOLD: if len(domain_results[domain]) < MAX_URLS_PER_DOMAIN:
valid_urls.append(url) domain_results[domain].append({
total_urls_collected += 1 "url": url,
"title": item.get("title", ""),
"snippet": item.get("snippet", "")
})
all_urls.append(url)
else:
logger.debug(f"Skipping URL {url} - domain not in allowed list")
results[domain] = valid_urls if len(all_urls) >= 50:
logger.info(f"Successfully processed domain {domain}. Found {len(valid_urls)} valid URLs") break
except HTTPException as e: logger.info(f"Total URLs collected: {len(all_urls)}")
logger.error(f"HTTP Exception for domain {domain}: {str(e.detail)}")
error_messages[domain] = str(e.detail)
except Exception as e:
logger.error(f"Unexpected error for domain {domain}: {str(e)}")
error_messages[domain] = f"Unexpected error for {domain}: {str(e)}"
sleep(1) # Add delay between processing different domains if not all_urls:
return EnhancedFactCheckResponse(
query=request.search_text,
results=[],
sources=[],
summary=Summary(
fact_checking_sites_queried=len(selected_sources),
total_sources=0
),
token_usage={
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
},
total_claims_found=0,
verification_result=VerificationResult(
verdict="Insufficient Evidence",
confidence="Low",
evidence=["No relevant sources found"],
reasoning="No fact-checking sources were found for this claim",
fact_check_type="ai fact checker"
)
)
logger.info("Search completed") # Perform fact check with collected URLs
logger.debug(f"Results found for {len(results)} domains")
logger.debug(f"Errors encountered for {len(error_messages)} domains")
# Collect all valid URLs from results
all_valid_urls = []
for domain_urls in results.values():
all_valid_urls.extend(domain_urls)
logger.info(f"Total valid URLs collected: {len(all_valid_urls)}")
# Create request body for AI fact check
if all_valid_urls:
fact_check_request = AIFactCheckRequest( fact_check_request = AIFactCheckRequest(
content=request.search_text, content=request.search_text,
urls=all_valid_urls[:TOTAL_MAX_URLS] # Ensure we don't exceed the limit urls=all_urls[:5] # Limit to 5 URLs
) )
logger.info("Calling AI fact check service") logger.info(f"Performing fact check with {len(fact_check_request.urls)} URLs")
try: fact_check_response = await ai_fact_check(fact_check_request)
ai_response = await ai_fact_check(fact_check_request)
logger.info("AI fact check completed successfully")
# Format AI fact check response # Get enhanced analysis
formatted_response = { openai_client = OpenAIClient(OPENAI_API_KEY)
"query": ai_response.query, enhanced_response = await analyze_fact_check_results(
"token_usage": { openai_client,
"prompt_tokens": ai_response.token_usage.prompt_tokens, fact_check_response.dict()
"completion_tokens": ai_response.token_usage.completion_tokens,
"total_tokens": ai_response.token_usage.total_tokens
},
"sources": ai_response.sources,
"verification_result": {
url: {
"verdict": result.verdict,
"confidence": result.confidence,
"evidence": result.evidence,
"reasoning": result.reasoning,
"missing_info": result.missing_info
} for url, result in ai_response.verification_result.items()
}
}
# Return response with AI fact check results
return SearchResponse(
results=results,
error_messages=error_messages,
ai_fact_check_result=formatted_response
) )
return EnhancedFactCheckResponse(**enhanced_response)
except Exception as e: except Exception as e:
logger.error(f"Error during AI fact check: {str(e)}") logger.error(f"Error during search/fact-check process: {str(e)}", exc_info=True)
error_messages["ai_fact_check"] = f"Error during fact checking: {str(e)}" raise HTTPException(status_code=500, detail=str(e))
# Return response without AI fact check if no valid URLs or error occurred
return SearchResponse(
results=results,
error_messages=error_messages,
ai_fact_check_result=None
)

View file

@ -5,6 +5,7 @@ load_dotenv()
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
FRONTEND_URL = os.environ["FRONTEND_URL"] FRONTEND_URL = os.environ["FRONTEND_URL"]

View file

@ -0,0 +1,43 @@
from pydantic import BaseModel
from typing import List, Dict
class SearchRequest(BaseModel):
search_text: str
source_types: List[str] = ["fact_checkers"]
class Publisher(BaseModel):
name: str
site: str
class ClaimReview(BaseModel):
publisher: Publisher
textualRating: str
class Claim(BaseModel):
claimReview: List[ClaimReview]
claimant: str
text: str
class Summary(BaseModel):
fact_checking_sites_queried: int
total_sources: int
class TokenUsage(BaseModel):
prompt_tokens: int
completion_tokens: int
total_tokens: int
class VerificationResult(BaseModel):
verdict: str
confidence: str
evidence: List[str]
reasoning: str
class EnhancedFactCheckResponse(BaseModel):
query: str
results: List[Claim]
sources: List[str]
summary: Summary
token_usage: Dict[str, int]
total_claims_found: int
verification_result: VerificationResult