diff --git a/app/__pycache__/__init__.cpython-312.pyc b/app/__pycache__/__init__.cpython-312.pyc index ba12e2f..477fd12 100644 Binary files a/app/__pycache__/__init__.cpython-312.pyc and b/app/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index f94cc0b..538cf22 100644 Binary files a/app/__pycache__/config.cpython-312.pyc and b/app/__pycache__/config.cpython-312.pyc differ diff --git a/app/api/__pycache__/__init__.cpython-312.pyc b/app/api/__pycache__/__init__.cpython-312.pyc index 3e21318..3c5c13c 100644 Binary files a/app/api/__pycache__/__init__.cpython-312.pyc and b/app/api/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index 39e026b..c3f0bdb 100644 Binary files a/app/api/__pycache__/fact_check.cpython-312.pyc and b/app/api/__pycache__/fact_check.cpython-312.pyc differ diff --git a/app/api/fact_check.py b/app/api/fact_check.py index ee94bd6..7aa8ff1 100644 --- a/app/api/fact_check.py +++ b/app/api/fact_check.py @@ -1,9 +1,12 @@ from fastapi import APIRouter, HTTPException -import httpx import asyncio import logging +import httpx +import json +import re from typing import Union, Optional, Dict, Any -from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY +from datetime import datetime +from app.config import OPENAI_API_KEY,PERPLEXITY_API_KEY from app.api.scrap_websites import search_websites, SearchRequest from app.services.openai_client import OpenAIClient, AIFactChecker from app.services.image_text_extractor import ImageTextExtractor @@ -12,12 +15,10 @@ from app.models.fact_check_models import ( FactCheckRequest, FactCheckResponse, UnverifiedFactCheckResponse, - ErrorResponse, Source, VerdictEnum, ConfidenceEnum -) -from app.websites.fact_checker_website import get_all_sources +) # Setup logging logger = logging.getLogger(__name__) @@ -42,10 +43,11 @@ async def process_url_content(url: str) -> Optional[str]: logger.error(f"Error extracting text from URL: {str(e)}") return None +# Assuming the enums and models like FactCheckResponse, VerdictEnum, etc., are already imported async def process_fact_check(query: str) -> Union[FactCheckResponse, UnverifiedFactCheckResponse]: - """Process a single fact check query.""" - if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: + if not PERPLEXITY_API_KEY: + logger.error("Perplexity API key not configured") return UnverifiedFactCheckResponse( claim=query, verdict=VerdictEnum.UNVERIFIED, @@ -56,51 +58,229 @@ async def process_fact_check(query: str) -> Union[FactCheckResponse, UnverifiedF additional_context="This is a temporary system configuration issue." ) - headers = {"Content-Type": "application/json"} - async with httpx.AsyncClient() as client: - fact_checker_sources = get_all_sources() + url = "https://api.perplexity.ai/chat/completions" + headers = { + "accept": "application/json", + "content-type": "application/json", + "Authorization": f"Bearer {PERPLEXITY_API_KEY}" + } - for source in fact_checker_sources: - params = { - "key": GOOGLE_API_KEY, - "query": query, - "languageCode": "en-US", - "reviewPublisherSiteFilter": source.domain, - "pageSize": 10, + payload = { + "model": "sonar", + "messages": [ + { + "role": "system", + "content": ( + "You are a precise fact checker. Analyze the following claim and determine if it's true, false, or partially true. " + "Provide a clear verdict, confidence level (HIGH, MEDIUM, LOW), and cite reliable sources. " + "Format your response as JSON with fields: verdict, confidence, sources (array of URLs), " + "evidence (key facts as a string), and explanation (detailed reasoning as a string)." + ) + }, + { + "role": "user", + "content": f"Fact check this claim: {query}" + } + ] + } + + try: + async with httpx.AsyncClient(timeout=30) as client: + response = await client.post(url, headers=headers, json=payload) + response.raise_for_status() + result = response.json() + perplexity_response = result["choices"][0]["message"]["content"] + + # Attempt to extract JSON + try: + parsed_data = json.loads(perplexity_response) + except json.JSONDecodeError: + match = re.search(r'\{.*\}', perplexity_response, re.DOTALL) + if match: + parsed_data = json.loads(match.group(0)) + else: + parsed_data = extract_fact_check_info(perplexity_response) + + verdict_mapping = { + "true": VerdictEnum.TRUE, + "false": VerdictEnum.FALSE, + "partially true": VerdictEnum.PARTIALLY_TRUE, + "partially false": VerdictEnum.PARTIALLY_TRUE, + "unverified": VerdictEnum.UNVERIFIED } - try: - response = await client.get( - GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers + confidence_mapping = { + "high": ConfidenceEnum.HIGH, + "medium": ConfidenceEnum.MEDIUM, + "low": ConfidenceEnum.LOW + } + + raw_verdict = parsed_data.get("verdict", "").lower() + verdict = verdict_mapping.get(raw_verdict, VerdictEnum.UNVERIFIED) + + raw_confidence = parsed_data.get("confidence", "").lower() + confidence = confidence_mapping.get(raw_confidence, ConfidenceEnum.MEDIUM) + + sources = [ + Source( + url=url, + domain=extract_domain(url), + title=f"Source from {extract_domain(url)}", + publisher=extract_domain(url), + date_published=None, + snippet="Source cited by Perplexity AI" ) - response.raise_for_status() - json_response = response.json() + for url in parsed_data.get("sources", []) + ] - if json_response.get("claims"): - return await generate_fact_report(query, json_response) + # Convert evidence to string if it's not already + evidence = parsed_data.get("evidence", "") + if isinstance(evidence, dict): + # Convert dictionary evidence to string format + evidence_str = "" + for key, value in evidence.items(): + evidence_str += f"{key}: {value}\n" + evidence = evidence_str.strip() + + # Convert explanation to string if it's not already + explanation = parsed_data.get("explanation", "") + if isinstance(explanation, dict): + explanation_str = "" + for key, value in explanation.items(): + explanation_str += f"{key}: {value}\n" + explanation = explanation_str.strip() - except Exception as e: - logger.error(f"Error with source {source.domain}: {str(e)}") - continue - - try: - search_request = SearchRequest( - search_text=query, - source_types=["fact_checkers"] + return FactCheckResponse( + claim=query, + verdict=verdict, + confidence=confidence, + sources=sources, + evidence=evidence, + explanation=explanation, + additional_context=f"Fact checked using PlanPost AI on {datetime.now().strftime('%Y-%m-%d')}" ) - ai_response = await search_websites(search_request) - return await generate_fact_report(query, ai_response) + except Exception as e: + logger.error(f"Fact check error: {str(e)}") + return UnverifiedFactCheckResponse( + claim=query, + verdict=VerdictEnum.UNVERIFIED, + confidence=ConfidenceEnum.LOW, + sources=[], + evidence=str(e), + explanation="Failed to contact Perplexity AI or parse its response.", + additional_context="Possible API issue or malformed response." + ) - except Exception as e: - logger.error(f"Error in AI fact check: {str(e)}") - return await generate_fact_report(query, { - "status": "no_results", - "verification_result": { - "no_sources_found": True, - "reason": str(e) - } - }) + + +def extract_domain(url: str) -> str: + """Extract domain from URL. + + Args: + url: The URL to extract domain from + + Returns: + The domain name or "unknown" if parsing fails + """ + try: + from urllib.parse import urlparse + parsed_url = urlparse(url) + domain = parsed_url.netloc + return domain if domain else "unknown" + except Exception as e: + logger.warning(f"Failed to extract domain from URL {url}: {str(e)}") + return "unknown" + + +def extract_fact_check_info(text_response: str) -> Dict[str, Any]: + """Extract fact-checking information from a text response when JSON parsing fails. + + Args: + text_response: The text response from Perplexity AI + + Returns: + A dictionary with fact-checking information extracted from the text + """ + import re + + result = { + "verdict": "unverified", + "confidence": "medium", + "sources": [], + "evidence": "", + "explanation": "" + } + + # Try to extract verdict with more comprehensive pattern matching + verdict_patterns = [ + r'verdict[:\s]+(true|false|partially true|partially false|inconclusive|unverified)', + r'(true|false|partially true|partially false|inconclusive|unverified)[:\s]+verdict', + r'claim is (true|false|partially true|partially false|inconclusive|unverified)', + r'statement is (true|false|partially true|partially false|inconclusive|unverified)' + ] + + for pattern in verdict_patterns: + verdict_match = re.search(pattern, text_response.lower(), re.IGNORECASE) + if verdict_match: + result["verdict"] = verdict_match.group(1) + break + + # Try to extract confidence with multiple patterns + confidence_patterns = [ + r'confidence[:\s]+(high|medium|low)', + r'(high|medium|low)[:\s]+confidence', + r'confidence level[:\s]+(high|medium|low)', + r'(high|medium|low)[:\s]+confidence level' + ] + + for pattern in confidence_patterns: + confidence_match = re.search(pattern, text_response.lower(), re.IGNORECASE) + if confidence_match: + result["confidence"] = confidence_match.group(1) + break + + # Try to extract URLs as sources - more robust pattern + urls = re.findall(r'https?://[^\s"\'\]\)]+', text_response) + # Filter out any malformed URLs + valid_urls = [] + for url in urls: + if '.' in url and len(url) > 10: # Basic validation + valid_urls.append(url) + result["sources"] = valid_urls + + # Try to extract evidence and explanation with multiple patterns + evidence_patterns = [ + r'evidence[:\s]+(.*?)(?=explanation|\Z)', + r'key facts[:\s]+(.*?)(?=explanation|\Z)', + r'facts[:\s]+(.*?)(?=explanation|\Z)' + ] + + for pattern in evidence_patterns: + evidence_match = re.search(pattern, text_response, re.IGNORECASE | re.DOTALL) + if evidence_match: + result["evidence"] = evidence_match.group(1).strip() + break + + explanation_patterns = [ + r'explanation[:\s]+(.*?)(?=\Z)', + r'reasoning[:\s]+(.*?)(?=\Z)', + r'analysis[:\s]+(.*?)(?=\Z)' + ] + + for pattern in explanation_patterns: + explanation_match = re.search(pattern, text_response, re.IGNORECASE | re.DOTALL) + if explanation_match: + result["explanation"] = explanation_match.group(1).strip() + break + + # If no structured information found, use the whole response as evidence + if not result["evidence"] and not result["explanation"]: + result["evidence"] = text_response + # Generate a minimal explanation if none was found + result["explanation"] = "The fact-checking service provided information about this claim but did not structure it in the expected format. The full response has been included as evidence for you to review." + + return result async def generate_fact_report(query: str, fact_check_data: dict | AIFactCheckResponse) -> Union[FactCheckResponse, UnverifiedFactCheckResponse]: diff --git a/app/config.py b/app/config.py index 6e7437c..6a54faa 100644 --- a/app/config.py +++ b/app/config.py @@ -7,6 +7,7 @@ GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] GOOGLE_FACT_CHECK_BASE_URL = os.environ["GOOGLE_FACT_CHECK_BASE_URL"] GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"] GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"] +PERPLEXITY_API_KEY= os.environ["PERPLEXITY_API_KEY"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] FRONTEND_URL = os.environ["FRONTEND_URL"] diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc index 64a9403..23d388c 100644 Binary files a/app/models/__pycache__/fact_check_models.cpython-312.pyc and b/app/models/__pycache__/fact_check_models.cpython-312.pyc differ diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc index c943a2c..68d90e7 100644 Binary files a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc and b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc differ diff --git a/requirements.txt b/requirements.txt index d4bd3bf..9a9117f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,7 +43,7 @@ mdurl==0.1.2 multidict==6.1.0 mypy-extensions==1.0.0 numpy==1.26.4 -openai==0.28.0 +openai==1.23.6 orjson==3.10.12 packaging==24.2 pathspec==0.12.1