commit 9f87639b51d9f3846795d9d3c0b65a2a8b0b7500 Author: utshodeytech Date: Mon Dec 9 17:18:09 2024 +0600 base code added diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..21d6e87 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +env +.env +test.py +/__pycache__/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e69de29 diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/__pycache__/__init__.cpython-312.pyc b/app/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..ba12e2f Binary files /dev/null and b/app/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000..53d89a6 Binary files /dev/null and b/app/__pycache__/config.cpython-312.pyc differ diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/__pycache__/__init__.cpython-312.pyc b/app/api/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..3e21318 Binary files /dev/null and b/app/api/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc new file mode 100644 index 0000000..daa45a8 Binary files /dev/null and b/app/api/__pycache__/fact_check.cpython-312.pyc differ diff --git a/app/api/fact_check.py b/app/api/fact_check.py new file mode 100644 index 0000000..3e7a12d --- /dev/null +++ b/app/api/fact_check.py @@ -0,0 +1,291 @@ +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict +from typing import Dict, List, Optional, Union +import requests +from enum import Enum +from datetime import datetime +import json +from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL + +fact_check_router = APIRouter() + +class CustomJSONEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, datetime): + return obj.isoformat() + return super().default(obj) + +class ErrorResponse(BaseModel): + detail: str + error_code: str = Field(..., description="Unique error code for this type of error") + timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) + path: Optional[str] = Field(None, description="The endpoint path where error occurred") + + model_config = ConfigDict(json_schema_extra={ + "example": { + "detail": "Error description", + "error_code": "ERROR_CODE", + "timestamp": "2024-12-09T16:49:30.905765", + "path": "/check-facts" + } + }) + +class RequestValidationError(BaseModel): + loc: List[str] + msg: str + type: str + +class Publisher(BaseModel): + name: str + site: Optional[str] = Field(None, description="Publisher's website") + + @validator('site') + def validate_site(cls, v): + if v and not (v.startswith('http://') or v.startswith('https://')): + return f"https://{v}" + return v + +class ClaimReview(BaseModel): + publisher: Publisher + url: Optional[HttpUrl] = None + title: Optional[str] = None + reviewDate: Optional[str] = None + textualRating: Optional[str] = None + languageCode: str = Field(default="en-US") + +class Claim(BaseModel): + text: str + claimant: Optional[str] = None + claimDate: Optional[str] = None + claimReview: List[ClaimReview] + +class FactCheckResponse(BaseModel): + query: str = Field(..., description="Original query that was fact-checked") + total_claims_found: int = Field(..., ge=0) + results: List[Claim] = Field(default_factory=list) + summary: Dict[str, int] = Field(...) + + model_config = ConfigDict(json_schema_extra={ + "example": { + "query": "Example claim", + "total_claims_found": 1, + "results": [{ + "text": "Example claim text", + "claimant": "Source name", + "claimReview": [{ + "publisher": { + "name": "Fact Checker", + "site": "factchecker.com" + }, + "textualRating": "True" + }] + }], + "summary": { + "total_sources": 1, + "fact_checking_sites_queried": 10 + } + } + }) + +class SourceType(str, Enum): + FACT_CHECKER = "fact_checker" + NEWS_SITE = "news_site" + +class FactCheckSource(BaseModel): + domain: str + type: SourceType + priority: int = Field(default=1, ge=1, le=10) + + model_config = ConfigDict(json_schema_extra={ + "example": { + "domain": "factcheck.org", + "type": "fact_checker", + "priority": 1 + } + }) + +# Sources configuration with validation +SOURCES = { + "fact_checkers": [ + FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) + for domain in [ + "factcheck.org", + "snopes.com", + "politifact.com", + "reuters.com", + "bbc.com", + "apnews.com", + "usatoday.com", + "nytimes.com", + "washingtonpost.com", + "afp.com", + "fullfact.org", + "truthorfiction.com", + "leadstories.com", + "altnews.in", + "boomlive.in", + "en.prothomalo.com" + ] + ], + "news_sites": [ + FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) + for domain in [ + "www.thedailystar.net", + "www.thefinancialexpress.com.bd", + "www.theindependentbd.com", + "www.dhakatribune.com", + "www.newagebd.net", + "www.observerbd.com", + "www.daily-sun.com", + "www.tbsnews.net", + "www.businesspostbd.com", + "www.banglanews24.com/english", + "www.bdnews24.com/english", + "www.risingbd.com/english", + "www.dailyindustry.news", + "www.bangladeshpost.net", + "www.daily-bangladesh.com/english" + ] + ] +} + +class FactCheckRequest(BaseModel): + content: str = Field( + ..., + min_length=10, + max_length=1000, + description="The claim to be fact-checked" + ) + language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") + max_results_per_source: int = Field(default=10, ge=1, le=50) + + @validator('content') + def validate_content(cls, v): + if not v.strip(): + raise ValueError("Content cannot be empty or just whitespace") + return v.strip() + +async def fetch_fact_checks( + api_key: str, + base_url: str, + query: str, + site: FactCheckSource +) -> Dict: + """ + Fetch fact checks from a specific site using the Google Fact Check API + """ + try: + if not api_key or not base_url: + raise ValueError("API key or base URL not configured") + + params = { + "key": api_key, + "query": query, + "languageCode": "en-US", + "reviewPublisherSiteFilter": site.domain, + "pageSize": 10 + } + + response = requests.get(base_url, params=params) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + raise HTTPException( + status_code=503, + detail=ErrorResponse( + detail=f"Error fetching from {site.domain}: {str(e)}", + error_code="FACT_CHECK_SERVICE_ERROR", + path="/check-facts" + ).dict() + ) + except ValueError as e: + raise HTTPException( + status_code=500, + detail=ErrorResponse( + detail=str(e), + error_code="CONFIGURATION_ERROR", + path="/check-facts" + ).dict() + ) + +@fact_check_router.post( + "/check-facts", + response_model=FactCheckResponse, + responses={ + 400: {"model": ErrorResponse}, + 404: {"model": ErrorResponse}, + 500: {"model": ErrorResponse}, + 503: {"model": ErrorResponse} + } +) +async def check_facts(request: FactCheckRequest) -> FactCheckResponse: + """ + Check facts using multiple fact-checking sources + """ + all_results = [] + + # Validate configuration + if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: + raise HTTPException( + status_code=500, + detail=ErrorResponse( + detail="API configuration is missing", + error_code="CONFIGURATION_ERROR", + path="/check-facts" + ).dict() + ) + + # Check all sources in priority order + all_sources = ( + SOURCES["fact_checkers"] + + SOURCES["news_sites"] + ) + all_sources.sort(key=lambda x: x.priority) + + for source in all_sources: + try: + result = await fetch_fact_checks( + GOOGLE_FACT_CHECK_API_KEY, + GOOGLE_FACT_CHECK_BASE_URL, + request.content, + source + ) + + if "claims" in result: + # Validate each claim through Pydantic + validated_claims = [ + Claim(**claim).dict() + for claim in result["claims"] + ] + all_results.extend(validated_claims) + + except HTTPException: + raise + except Exception as e: + # Log the error but continue with other sources + print(f"Error processing {source.domain}: {str(e)}") + continue + + if not all_results: + raise HTTPException( + status_code=404, + detail=ErrorResponse( + detail="No fact check results found", + error_code="NO_RESULTS_FOUND", + path="/check-facts" + ).dict() + ) + + # Create the response using Pydantic model + response = FactCheckResponse( + query=request.content, + total_claims_found=len(all_results), + results=all_results, + summary={ + "total_sources": len(set(claim.get("claimReview", [{}])[0].get("publisher", {}).get("site", "") + for claim in all_results if claim.get("claimReview"))), + "fact_checking_sites_queried": len(all_sources) + } + ) + + return response \ No newline at end of file diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..d9de9e9 --- /dev/null +++ b/app/config.py @@ -0,0 +1,10 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + +GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"] +GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] + +OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] +FRONTEND_URL = os.environ["FRONTEND_URL"] \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..6b79e28 --- /dev/null +++ b/main.py @@ -0,0 +1,49 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from app.api.fact_check import fact_check_router +from app.config import FRONTEND_URL + +# Initialize FastAPI app +app = FastAPI( + title="Your API Title", + description="Your API Description", + version="1.0.0" +) + +# CORS configuration +origins = [ + FRONTEND_URL, + "http://localhost", + "http://localhost:5173", + "http://0.0.0.0", + "http://0.0.0.0:5173", +] + + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Basic root endpoint +@app.get("/") +async def root(): + return {"message": "Welcome to your FastAPI application"} + +# Health check endpoint +@app.get("/health") +async def health_check(): + return {"status": "healthy"} + +app.include_router(fact_check_router, prefix="") + +# Include routers (uncomment and modify as needed) +# from routes import some_router +# app.include_router(some_router, prefix="/your-prefix", tags=["your-tag"]) + +if __name__ == "__main__": + import uvicorn + uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..418141f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +certifi==2024.8.30 +charset-normalizer==3.4.0 +idna==3.10 +python-dotenv==1.0.1 +requests==2.32.3 +urllib3==2.2.3