Dev #1

Merged
utshodey merged 5 commits from dev into master 2024-12-17 11:33:44 +00:00
8 changed files with 221 additions and 208 deletions
Showing only changes of commit 83a886960b - Show all commits

View file

@ -1,11 +1,13 @@
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
from typing import Dict, List, Optional, Union
import requests
from enum import Enum
from datetime import datetime
import json import json
from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL from datetime import datetime
from typing import Dict
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
from app.models.fact_check_models import (
FactCheckResponse, FactCheckRequest, Claim, ErrorResponse
)
from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources
fact_check_router = APIRouter() fact_check_router = APIRouter()
@ -15,199 +17,6 @@ class CustomJSONEncoder(json.JSONEncoder):
return obj.isoformat() return obj.isoformat()
return super().default(obj) return super().default(obj)
class ErrorResponse(BaseModel):
detail: str
error_code: str = Field(..., description="Unique error code for this type of error")
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
model_config = ConfigDict(json_schema_extra={
"example": {
"detail": "Error description",
"error_code": "ERROR_CODE",
"timestamp": "2024-12-09T16:49:30.905765",
"path": "/check-facts"
}
})
class RequestValidationError(BaseModel):
loc: List[str]
msg: str
type: str
class Publisher(BaseModel):
name: str
site: Optional[str] = Field(None, description="Publisher's website")
@validator('site')
def validate_site(cls, v):
if v and not (v.startswith('http://') or v.startswith('https://')):
return f"https://{v}"
return v
class ClaimReview(BaseModel):
publisher: Publisher
url: Optional[HttpUrl] = None
title: Optional[str] = None
reviewDate: Optional[str] = None
textualRating: Optional[str] = None
languageCode: str = Field(default="en-US")
class Claim(BaseModel):
text: str
claimant: Optional[str] = None
claimDate: Optional[str] = None
claimReview: List[ClaimReview]
class FactCheckResponse(BaseModel):
query: str = Field(..., description="Original query that was fact-checked")
total_claims_found: int = Field(..., ge=0)
results: List[Claim] = Field(default_factory=list)
summary: Dict[str, int] = Field(...)
model_config = ConfigDict(json_schema_extra={
"example": {
"query": "Example claim",
"total_claims_found": 1,
"results": [{
"text": "Example claim text",
"claimant": "Source name",
"claimReview": [{
"publisher": {
"name": "Fact Checker",
"site": "factchecker.com"
},
"textualRating": "True"
}]
}],
"summary": {
"total_sources": 1,
"fact_checking_sites_queried": 10
}
}
})
class SourceType(str, Enum):
FACT_CHECKER = "fact_checker"
NEWS_SITE = "news_site"
class FactCheckSource(BaseModel):
domain: str
type: SourceType
priority: int = Field(default=1, ge=1, le=10)
model_config = ConfigDict(json_schema_extra={
"example": {
"domain": "factcheck.org",
"type": "fact_checker",
"priority": 1
}
})
# Sources configuration with validation
SOURCES = {
"fact_checkers": [
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
for domain in [
"factcheck.org",
"snopes.com",
"politifact.com",
"reuters.com",
"bbc.com",
"apnews.com",
"usatoday.com",
"nytimes.com",
"washingtonpost.com",
"afp.com",
"fullfact.org",
"truthorfiction.com",
"leadstories.com",
"altnews.in",
"boomlive.in",
"en.prothomalo.com"
]
],
"news_sites": [
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
for domain in [
"www.thedailystar.net",
"www.thefinancialexpress.com.bd",
"www.theindependentbd.com",
"www.dhakatribune.com",
"www.newagebd.net",
"www.observerbd.com",
"www.daily-sun.com",
"www.tbsnews.net",
"www.businesspostbd.com",
"www.banglanews24.com/english",
"www.bdnews24.com/english",
"www.risingbd.com/english",
"www.dailyindustry.news",
"www.bangladeshpost.net",
"www.daily-bangladesh.com/english"
]
]
}
class FactCheckRequest(BaseModel):
content: str = Field(
...,
min_length=10,
max_length=1000,
description="The claim to be fact-checked"
)
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
max_results_per_source: int = Field(default=10, ge=1, le=50)
@validator('content')
def validate_content(cls, v):
if not v.strip():
raise ValueError("Content cannot be empty or just whitespace")
return v.strip()
async def fetch_fact_checks(
api_key: str,
base_url: str,
query: str,
site: FactCheckSource
) -> Dict:
"""
Fetch fact checks from a specific site using the Google Fact Check API
"""
try:
if not api_key or not base_url:
raise ValueError("API key or base URL not configured")
params = {
"key": api_key,
"query": query,
"languageCode": "en-US",
"reviewPublisherSiteFilter": site.domain,
"pageSize": 10
}
response = requests.get(base_url, params=params)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
raise HTTPException(
status_code=503,
detail=ErrorResponse(
detail=f"Error fetching from {site.domain}: {str(e)}",
error_code="FACT_CHECK_SERVICE_ERROR",
path="/check-facts"
).dict()
)
except ValueError as e:
raise HTTPException(
status_code=500,
detail=ErrorResponse(
detail=str(e),
error_code="CONFIGURATION_ERROR",
path="/check-facts"
).dict()
)
@fact_check_router.post( @fact_check_router.post(
"/check-facts", "/check-facts",
response_model=FactCheckResponse, response_model=FactCheckResponse,
@ -225,7 +34,7 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
all_results = [] all_results = []
# Validate configuration # Validate configuration
if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
raise HTTPException( raise HTTPException(
status_code=500, status_code=500,
detail=ErrorResponse( detail=ErrorResponse(
@ -235,17 +44,13 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
).dict() ).dict()
) )
# Check all sources in priority order # Get all sources in priority order
all_sources = ( all_sources = get_all_sources()
SOURCES["fact_checkers"] +
SOURCES["news_sites"]
)
all_sources.sort(key=lambda x: x.priority)
for source in all_sources: for source in all_sources:
try: try:
result = await fetch_fact_checks( result = await fetch_fact_checks(
GOOGLE_FACT_CHECK_API_KEY, GOOGLE_API_KEY,
GOOGLE_FACT_CHECK_BASE_URL, GOOGLE_FACT_CHECK_BASE_URL,
request.content, request.content,
source source

View file

@ -3,7 +3,7 @@ from dotenv import load_dotenv
load_dotenv() load_dotenv()
GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"] GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

View file

@ -0,0 +1,109 @@
from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
from typing import Dict, List, Optional
from enum import Enum
from datetime import datetime
class ErrorResponse(BaseModel):
detail: str
error_code: str = Field(..., description="Unique error code for this type of error")
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
model_config = ConfigDict(json_schema_extra={
"example": {
"detail": "Error description",
"error_code": "ERROR_CODE",
"timestamp": "2024-12-09T16:49:30.905765",
"path": "/check-facts"
}
})
class RequestValidationError(BaseModel):
loc: List[str]
msg: str
type: str
class Publisher(BaseModel):
name: str
site: Optional[str] = Field(None, description="Publisher's website")
@validator('site')
def validate_site(cls, v):
if v and not (v.startswith('http://') or v.startswith('https://')):
return f"https://{v}"
return v
class ClaimReview(BaseModel):
publisher: Publisher
url: Optional[HttpUrl] = None
title: Optional[str] = None
reviewDate: Optional[str] = None
textualRating: Optional[str] = None
languageCode: str = Field(default="en-US")
class Claim(BaseModel):
text: str
claimant: Optional[str] = None
claimDate: Optional[str] = None
claimReview: List[ClaimReview]
class FactCheckResponse(BaseModel):
query: str = Field(..., description="Original query that was fact-checked")
total_claims_found: int = Field(..., ge=0)
results: List[Claim] = Field(default_factory=list)
summary: Dict[str, int] = Field(...)
model_config = ConfigDict(json_schema_extra={
"example": {
"query": "Example claim",
"total_claims_found": 1,
"results": [{
"text": "Example claim text",
"claimant": "Source name",
"claimReview": [{
"publisher": {
"name": "Fact Checker",
"site": "factchecker.com"
},
"textualRating": "True"
}]
}],
"summary": {
"total_sources": 1,
"fact_checking_sites_queried": 10
}
}
})
class SourceType(str, Enum):
FACT_CHECKER = "fact_checker"
NEWS_SITE = "news_site"
class FactCheckSource(BaseModel):
domain: str
type: SourceType
priority: int = Field(default=1, ge=1, le=10)
model_config = ConfigDict(json_schema_extra={
"example": {
"domain": "factcheck.org",
"type": "fact_checker",
"priority": 1
}
})
class FactCheckRequest(BaseModel):
content: str = Field(
...,
min_length=10,
max_length=1000,
description="The claim to be fact-checked"
)
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
max_results_per_source: int = Field(default=10, ge=1, le=50)
@validator('content')
def validate_content(cls, v):
if not v.strip():
raise ValueError("Content cannot be empty or just whitespace")
return v.strip()

View file

@ -0,0 +1,99 @@
from typing import Dict, List
import requests
from fastapi import HTTPException
from app.models.fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType
# Sources configuration with validation
SOURCES = {
"fact_checkers": [
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
for domain in [
"factcheck.org",
"snopes.com",
"politifact.com",
"reuters.com",
"bbc.com",
"apnews.com",
"usatoday.com",
"nytimes.com",
"washingtonpost.com",
"afp.com",
"fullfact.org",
"truthorfiction.com",
"leadstories.com",
"altnews.in",
"boomlive.in",
"en.prothomalo.com"
]
],
"news_sites": [
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
for domain in [
"www.thedailystar.net",
"www.thefinancialexpress.com.bd",
"www.theindependentbd.com",
"www.dhakatribune.com",
"www.newagebd.net",
"www.observerbd.com",
"www.daily-sun.com",
"www.tbsnews.net",
"www.businesspostbd.com",
"www.banglanews24.com/english",
"www.bdnews24.com/english",
"www.risingbd.com/english",
"www.dailyindustry.news",
"www.bangladeshpost.net",
"www.daily-bangladesh.com/english"
]
]
}
async def fetch_fact_checks(
api_key: str,
base_url: str,
query: str,
site: FactCheckSource
) -> Dict:
"""
Fetch fact checks from a specific site using the Google Fact Check API
"""
try:
if not api_key or not base_url:
raise ValueError("API key or base URL not configured")
params = {
"key": api_key,
"query": query,
"languageCode": "en-US",
"reviewPublisherSiteFilter": site.domain,
"pageSize": 10
}
response = requests.get(base_url, params=params)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
raise HTTPException(
status_code=503,
detail=ErrorResponse(
detail=f"Error fetching from {site.domain}: {str(e)}",
error_code="FACT_CHECK_SERVICE_ERROR",
path="/check-facts"
).dict()
)
except ValueError as e:
raise HTTPException(
status_code=500,
detail=ErrorResponse(
detail=str(e),
error_code="CONFIGURATION_ERROR",
path="/check-facts"
).dict()
)
def get_all_sources() -> List[FactCheckSource]:
"""
Get all sources sorted by priority
"""
all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"]
return sorted(all_sources, key=lambda x: x.priority)