diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index 53d89a6..267bba1 100644 Binary files a/app/__pycache__/config.cpython-312.pyc and b/app/__pycache__/config.cpython-312.pyc differ diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index daa45a8..56cc898 100644 Binary files a/app/api/__pycache__/fact_check.cpython-312.pyc and b/app/api/__pycache__/fact_check.cpython-312.pyc differ diff --git a/app/api/fact_check.py b/app/api/fact_check.py index 3e7a12d..dcd30de 100644 --- a/app/api/fact_check.py +++ b/app/api/fact_check.py @@ -1,11 +1,13 @@ from fastapi import APIRouter, HTTPException -from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict -from typing import Dict, List, Optional, Union -import requests -from enum import Enum -from datetime import datetime import json -from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL +from datetime import datetime +from typing import Dict + +from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL +from app.models.fact_check_models import ( + FactCheckResponse, FactCheckRequest, Claim, ErrorResponse +) +from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources fact_check_router = APIRouter() @@ -15,199 +17,6 @@ class CustomJSONEncoder(json.JSONEncoder): return obj.isoformat() return super().default(obj) -class ErrorResponse(BaseModel): - detail: str - error_code: str = Field(..., description="Unique error code for this type of error") - timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) - path: Optional[str] = Field(None, description="The endpoint path where error occurred") - - model_config = ConfigDict(json_schema_extra={ - "example": { - "detail": "Error description", - "error_code": "ERROR_CODE", - "timestamp": "2024-12-09T16:49:30.905765", - "path": "/check-facts" - } - }) - -class RequestValidationError(BaseModel): - loc: List[str] - msg: str - type: str - -class Publisher(BaseModel): - name: str - site: Optional[str] = Field(None, description="Publisher's website") - - @validator('site') - def validate_site(cls, v): - if v and not (v.startswith('http://') or v.startswith('https://')): - return f"https://{v}" - return v - -class ClaimReview(BaseModel): - publisher: Publisher - url: Optional[HttpUrl] = None - title: Optional[str] = None - reviewDate: Optional[str] = None - textualRating: Optional[str] = None - languageCode: str = Field(default="en-US") - -class Claim(BaseModel): - text: str - claimant: Optional[str] = None - claimDate: Optional[str] = None - claimReview: List[ClaimReview] - -class FactCheckResponse(BaseModel): - query: str = Field(..., description="Original query that was fact-checked") - total_claims_found: int = Field(..., ge=0) - results: List[Claim] = Field(default_factory=list) - summary: Dict[str, int] = Field(...) - - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Example claim", - "total_claims_found": 1, - "results": [{ - "text": "Example claim text", - "claimant": "Source name", - "claimReview": [{ - "publisher": { - "name": "Fact Checker", - "site": "factchecker.com" - }, - "textualRating": "True" - }] - }], - "summary": { - "total_sources": 1, - "fact_checking_sites_queried": 10 - } - } - }) - -class SourceType(str, Enum): - FACT_CHECKER = "fact_checker" - NEWS_SITE = "news_site" - -class FactCheckSource(BaseModel): - domain: str - type: SourceType - priority: int = Field(default=1, ge=1, le=10) - - model_config = ConfigDict(json_schema_extra={ - "example": { - "domain": "factcheck.org", - "type": "fact_checker", - "priority": 1 - } - }) - -# Sources configuration with validation -SOURCES = { - "fact_checkers": [ - FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) - for domain in [ - "factcheck.org", - "snopes.com", - "politifact.com", - "reuters.com", - "bbc.com", - "apnews.com", - "usatoday.com", - "nytimes.com", - "washingtonpost.com", - "afp.com", - "fullfact.org", - "truthorfiction.com", - "leadstories.com", - "altnews.in", - "boomlive.in", - "en.prothomalo.com" - ] - ], - "news_sites": [ - FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) - for domain in [ - "www.thedailystar.net", - "www.thefinancialexpress.com.bd", - "www.theindependentbd.com", - "www.dhakatribune.com", - "www.newagebd.net", - "www.observerbd.com", - "www.daily-sun.com", - "www.tbsnews.net", - "www.businesspostbd.com", - "www.banglanews24.com/english", - "www.bdnews24.com/english", - "www.risingbd.com/english", - "www.dailyindustry.news", - "www.bangladeshpost.net", - "www.daily-bangladesh.com/english" - ] - ] -} - -class FactCheckRequest(BaseModel): - content: str = Field( - ..., - min_length=10, - max_length=1000, - description="The claim to be fact-checked" - ) - language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") - max_results_per_source: int = Field(default=10, ge=1, le=50) - - @validator('content') - def validate_content(cls, v): - if not v.strip(): - raise ValueError("Content cannot be empty or just whitespace") - return v.strip() - -async def fetch_fact_checks( - api_key: str, - base_url: str, - query: str, - site: FactCheckSource -) -> Dict: - """ - Fetch fact checks from a specific site using the Google Fact Check API - """ - try: - if not api_key or not base_url: - raise ValueError("API key or base URL not configured") - - params = { - "key": api_key, - "query": query, - "languageCode": "en-US", - "reviewPublisherSiteFilter": site.domain, - "pageSize": 10 - } - - response = requests.get(base_url, params=params) - response.raise_for_status() - return response.json() - except requests.RequestException as e: - raise HTTPException( - status_code=503, - detail=ErrorResponse( - detail=f"Error fetching from {site.domain}: {str(e)}", - error_code="FACT_CHECK_SERVICE_ERROR", - path="/check-facts" - ).dict() - ) - except ValueError as e: - raise HTTPException( - status_code=500, - detail=ErrorResponse( - detail=str(e), - error_code="CONFIGURATION_ERROR", - path="/check-facts" - ).dict() - ) - @fact_check_router.post( "/check-facts", response_model=FactCheckResponse, @@ -225,7 +34,7 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: all_results = [] # Validate configuration - if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: + if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: raise HTTPException( status_code=500, detail=ErrorResponse( @@ -235,17 +44,13 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: ).dict() ) - # Check all sources in priority order - all_sources = ( - SOURCES["fact_checkers"] + - SOURCES["news_sites"] - ) - all_sources.sort(key=lambda x: x.priority) + # Get all sources in priority order + all_sources = get_all_sources() for source in all_sources: try: result = await fetch_fact_checks( - GOOGLE_FACT_CHECK_API_KEY, + GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, request.content, source diff --git a/app/config.py b/app/config.py index d9de9e9..a13fd4d 100644 --- a/app/config.py +++ b/app/config.py @@ -3,7 +3,7 @@ from dotenv import load_dotenv load_dotenv() -GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"] +GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc new file mode 100644 index 0000000..e2c8b7c Binary files /dev/null and b/app/models/__pycache__/fact_check_models.cpython-312.pyc differ diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py new file mode 100644 index 0000000..6c85771 --- /dev/null +++ b/app/models/fact_check_models.py @@ -0,0 +1,109 @@ +from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict +from typing import Dict, List, Optional +from enum import Enum +from datetime import datetime + +class ErrorResponse(BaseModel): + detail: str + error_code: str = Field(..., description="Unique error code for this type of error") + timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) + path: Optional[str] = Field(None, description="The endpoint path where error occurred") + + model_config = ConfigDict(json_schema_extra={ + "example": { + "detail": "Error description", + "error_code": "ERROR_CODE", + "timestamp": "2024-12-09T16:49:30.905765", + "path": "/check-facts" + } + }) + +class RequestValidationError(BaseModel): + loc: List[str] + msg: str + type: str + +class Publisher(BaseModel): + name: str + site: Optional[str] = Field(None, description="Publisher's website") + + @validator('site') + def validate_site(cls, v): + if v and not (v.startswith('http://') or v.startswith('https://')): + return f"https://{v}" + return v + +class ClaimReview(BaseModel): + publisher: Publisher + url: Optional[HttpUrl] = None + title: Optional[str] = None + reviewDate: Optional[str] = None + textualRating: Optional[str] = None + languageCode: str = Field(default="en-US") + +class Claim(BaseModel): + text: str + claimant: Optional[str] = None + claimDate: Optional[str] = None + claimReview: List[ClaimReview] + +class FactCheckResponse(BaseModel): + query: str = Field(..., description="Original query that was fact-checked") + total_claims_found: int = Field(..., ge=0) + results: List[Claim] = Field(default_factory=list) + summary: Dict[str, int] = Field(...) + + model_config = ConfigDict(json_schema_extra={ + "example": { + "query": "Example claim", + "total_claims_found": 1, + "results": [{ + "text": "Example claim text", + "claimant": "Source name", + "claimReview": [{ + "publisher": { + "name": "Fact Checker", + "site": "factchecker.com" + }, + "textualRating": "True" + }] + }], + "summary": { + "total_sources": 1, + "fact_checking_sites_queried": 10 + } + } + }) + +class SourceType(str, Enum): + FACT_CHECKER = "fact_checker" + NEWS_SITE = "news_site" + +class FactCheckSource(BaseModel): + domain: str + type: SourceType + priority: int = Field(default=1, ge=1, le=10) + + model_config = ConfigDict(json_schema_extra={ + "example": { + "domain": "factcheck.org", + "type": "fact_checker", + "priority": 1 + } + }) + +class FactCheckRequest(BaseModel): + content: str = Field( + ..., + min_length=10, + max_length=1000, + description="The claim to be fact-checked" + ) + language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") + max_results_per_source: int = Field(default=10, ge=1, le=50) + + @validator('content') + def validate_content(cls, v): + if not v.strip(): + raise ValueError("Content cannot be empty or just whitespace") + return v.strip() \ No newline at end of file diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc new file mode 100644 index 0000000..5fe153f Binary files /dev/null and b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc differ diff --git a/app/websites/fact_checker_website.py b/app/websites/fact_checker_website.py new file mode 100644 index 0000000..d6fae44 --- /dev/null +++ b/app/websites/fact_checker_website.py @@ -0,0 +1,99 @@ +from typing import Dict, List +import requests +from fastapi import HTTPException +from app.models.fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType + +# Sources configuration with validation +SOURCES = { + "fact_checkers": [ + FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) + for domain in [ + "factcheck.org", + "snopes.com", + "politifact.com", + "reuters.com", + "bbc.com", + "apnews.com", + "usatoday.com", + "nytimes.com", + "washingtonpost.com", + "afp.com", + "fullfact.org", + "truthorfiction.com", + "leadstories.com", + "altnews.in", + "boomlive.in", + "en.prothomalo.com" + ] + ], + "news_sites": [ + FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) + for domain in [ + "www.thedailystar.net", + "www.thefinancialexpress.com.bd", + "www.theindependentbd.com", + "www.dhakatribune.com", + "www.newagebd.net", + "www.observerbd.com", + "www.daily-sun.com", + "www.tbsnews.net", + "www.businesspostbd.com", + "www.banglanews24.com/english", + "www.bdnews24.com/english", + "www.risingbd.com/english", + "www.dailyindustry.news", + "www.bangladeshpost.net", + "www.daily-bangladesh.com/english" + ] + ] +} + +async def fetch_fact_checks( + api_key: str, + base_url: str, + query: str, + site: FactCheckSource +) -> Dict: + """ + Fetch fact checks from a specific site using the Google Fact Check API + """ + try: + if not api_key or not base_url: + raise ValueError("API key or base URL not configured") + + params = { + "key": api_key, + "query": query, + "languageCode": "en-US", + "reviewPublisherSiteFilter": site.domain, + "pageSize": 10 + } + + response = requests.get(base_url, params=params) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + raise HTTPException( + status_code=503, + detail=ErrorResponse( + detail=f"Error fetching from {site.domain}: {str(e)}", + error_code="FACT_CHECK_SERVICE_ERROR", + path="/check-facts" + ).dict() + ) + except ValueError as e: + raise HTTPException( + status_code=500, + detail=ErrorResponse( + detail=str(e), + error_code="CONFIGURATION_ERROR", + path="/check-facts" + ).dict() + ) + +def get_all_sources() -> List[FactCheckSource]: + """ + Get all sources sorted by priority + """ + all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"] + return sorted(all_sources, key=lambda x: x.priority) \ No newline at end of file