Dev #1
8 changed files with 221 additions and 208 deletions
Binary file not shown.
Binary file not shown.
|
|
@ -1,11 +1,13 @@
|
|||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
|
||||
from typing import Dict, List, Optional, Union
|
||||
import requests
|
||||
from enum import Enum
|
||||
from datetime import datetime
|
||||
import json
|
||||
from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
|
||||
from datetime import datetime
|
||||
from typing import Dict
|
||||
|
||||
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
|
||||
from app.models.fact_check_models import (
|
||||
FactCheckResponse, FactCheckRequest, Claim, ErrorResponse
|
||||
)
|
||||
from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources
|
||||
|
||||
fact_check_router = APIRouter()
|
||||
|
||||
|
|
@ -15,199 +17,6 @@ class CustomJSONEncoder(json.JSONEncoder):
|
|||
return obj.isoformat()
|
||||
return super().default(obj)
|
||||
|
||||
class ErrorResponse(BaseModel):
|
||||
detail: str
|
||||
error_code: str = Field(..., description="Unique error code for this type of error")
|
||||
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
|
||||
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
|
||||
|
||||
model_config = ConfigDict(json_schema_extra={
|
||||
"example": {
|
||||
"detail": "Error description",
|
||||
"error_code": "ERROR_CODE",
|
||||
"timestamp": "2024-12-09T16:49:30.905765",
|
||||
"path": "/check-facts"
|
||||
}
|
||||
})
|
||||
|
||||
class RequestValidationError(BaseModel):
|
||||
loc: List[str]
|
||||
msg: str
|
||||
type: str
|
||||
|
||||
class Publisher(BaseModel):
|
||||
name: str
|
||||
site: Optional[str] = Field(None, description="Publisher's website")
|
||||
|
||||
@validator('site')
|
||||
def validate_site(cls, v):
|
||||
if v and not (v.startswith('http://') or v.startswith('https://')):
|
||||
return f"https://{v}"
|
||||
return v
|
||||
|
||||
class ClaimReview(BaseModel):
|
||||
publisher: Publisher
|
||||
url: Optional[HttpUrl] = None
|
||||
title: Optional[str] = None
|
||||
reviewDate: Optional[str] = None
|
||||
textualRating: Optional[str] = None
|
||||
languageCode: str = Field(default="en-US")
|
||||
|
||||
class Claim(BaseModel):
|
||||
text: str
|
||||
claimant: Optional[str] = None
|
||||
claimDate: Optional[str] = None
|
||||
claimReview: List[ClaimReview]
|
||||
|
||||
class FactCheckResponse(BaseModel):
|
||||
query: str = Field(..., description="Original query that was fact-checked")
|
||||
total_claims_found: int = Field(..., ge=0)
|
||||
results: List[Claim] = Field(default_factory=list)
|
||||
summary: Dict[str, int] = Field(...)
|
||||
|
||||
model_config = ConfigDict(json_schema_extra={
|
||||
"example": {
|
||||
"query": "Example claim",
|
||||
"total_claims_found": 1,
|
||||
"results": [{
|
||||
"text": "Example claim text",
|
||||
"claimant": "Source name",
|
||||
"claimReview": [{
|
||||
"publisher": {
|
||||
"name": "Fact Checker",
|
||||
"site": "factchecker.com"
|
||||
},
|
||||
"textualRating": "True"
|
||||
}]
|
||||
}],
|
||||
"summary": {
|
||||
"total_sources": 1,
|
||||
"fact_checking_sites_queried": 10
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
class SourceType(str, Enum):
|
||||
FACT_CHECKER = "fact_checker"
|
||||
NEWS_SITE = "news_site"
|
||||
|
||||
class FactCheckSource(BaseModel):
|
||||
domain: str
|
||||
type: SourceType
|
||||
priority: int = Field(default=1, ge=1, le=10)
|
||||
|
||||
model_config = ConfigDict(json_schema_extra={
|
||||
"example": {
|
||||
"domain": "factcheck.org",
|
||||
"type": "fact_checker",
|
||||
"priority": 1
|
||||
}
|
||||
})
|
||||
|
||||
# Sources configuration with validation
|
||||
SOURCES = {
|
||||
"fact_checkers": [
|
||||
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
|
||||
for domain in [
|
||||
"factcheck.org",
|
||||
"snopes.com",
|
||||
"politifact.com",
|
||||
"reuters.com",
|
||||
"bbc.com",
|
||||
"apnews.com",
|
||||
"usatoday.com",
|
||||
"nytimes.com",
|
||||
"washingtonpost.com",
|
||||
"afp.com",
|
||||
"fullfact.org",
|
||||
"truthorfiction.com",
|
||||
"leadstories.com",
|
||||
"altnews.in",
|
||||
"boomlive.in",
|
||||
"en.prothomalo.com"
|
||||
]
|
||||
],
|
||||
"news_sites": [
|
||||
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
|
||||
for domain in [
|
||||
"www.thedailystar.net",
|
||||
"www.thefinancialexpress.com.bd",
|
||||
"www.theindependentbd.com",
|
||||
"www.dhakatribune.com",
|
||||
"www.newagebd.net",
|
||||
"www.observerbd.com",
|
||||
"www.daily-sun.com",
|
||||
"www.tbsnews.net",
|
||||
"www.businesspostbd.com",
|
||||
"www.banglanews24.com/english",
|
||||
"www.bdnews24.com/english",
|
||||
"www.risingbd.com/english",
|
||||
"www.dailyindustry.news",
|
||||
"www.bangladeshpost.net",
|
||||
"www.daily-bangladesh.com/english"
|
||||
]
|
||||
]
|
||||
}
|
||||
|
||||
class FactCheckRequest(BaseModel):
|
||||
content: str = Field(
|
||||
...,
|
||||
min_length=10,
|
||||
max_length=1000,
|
||||
description="The claim to be fact-checked"
|
||||
)
|
||||
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
|
||||
max_results_per_source: int = Field(default=10, ge=1, le=50)
|
||||
|
||||
@validator('content')
|
||||
def validate_content(cls, v):
|
||||
if not v.strip():
|
||||
raise ValueError("Content cannot be empty or just whitespace")
|
||||
return v.strip()
|
||||
|
||||
async def fetch_fact_checks(
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
query: str,
|
||||
site: FactCheckSource
|
||||
) -> Dict:
|
||||
"""
|
||||
Fetch fact checks from a specific site using the Google Fact Check API
|
||||
"""
|
||||
try:
|
||||
if not api_key or not base_url:
|
||||
raise ValueError("API key or base URL not configured")
|
||||
|
||||
params = {
|
||||
"key": api_key,
|
||||
"query": query,
|
||||
"languageCode": "en-US",
|
||||
"reviewPublisherSiteFilter": site.domain,
|
||||
"pageSize": 10
|
||||
}
|
||||
|
||||
response = requests.get(base_url, params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.RequestException as e:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=ErrorResponse(
|
||||
detail=f"Error fetching from {site.domain}: {str(e)}",
|
||||
error_code="FACT_CHECK_SERVICE_ERROR",
|
||||
path="/check-facts"
|
||||
).dict()
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=ErrorResponse(
|
||||
detail=str(e),
|
||||
error_code="CONFIGURATION_ERROR",
|
||||
path="/check-facts"
|
||||
).dict()
|
||||
)
|
||||
|
||||
@fact_check_router.post(
|
||||
"/check-facts",
|
||||
response_model=FactCheckResponse,
|
||||
|
|
@ -225,7 +34,7 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
|
|||
all_results = []
|
||||
|
||||
# Validate configuration
|
||||
if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
|
||||
if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=ErrorResponse(
|
||||
|
|
@ -235,17 +44,13 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
|
|||
).dict()
|
||||
)
|
||||
|
||||
# Check all sources in priority order
|
||||
all_sources = (
|
||||
SOURCES["fact_checkers"] +
|
||||
SOURCES["news_sites"]
|
||||
)
|
||||
all_sources.sort(key=lambda x: x.priority)
|
||||
# Get all sources in priority order
|
||||
all_sources = get_all_sources()
|
||||
|
||||
for source in all_sources:
|
||||
try:
|
||||
result = await fetch_fact_checks(
|
||||
GOOGLE_FACT_CHECK_API_KEY,
|
||||
GOOGLE_API_KEY,
|
||||
GOOGLE_FACT_CHECK_BASE_URL,
|
||||
request.content,
|
||||
source
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from dotenv import load_dotenv
|
|||
|
||||
load_dotenv()
|
||||
|
||||
GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"]
|
||||
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
|
||||
GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
|
||||
|
||||
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
||||
|
|
|
|||
BIN
app/models/__pycache__/fact_check_models.cpython-312.pyc
Normal file
BIN
app/models/__pycache__/fact_check_models.cpython-312.pyc
Normal file
Binary file not shown.
109
app/models/fact_check_models.py
Normal file
109
app/models/fact_check_models.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
|
||||
from typing import Dict, List, Optional
|
||||
from enum import Enum
|
||||
from datetime import datetime
|
||||
|
||||
class ErrorResponse(BaseModel):
|
||||
detail: str
|
||||
error_code: str = Field(..., description="Unique error code for this type of error")
|
||||
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
|
||||
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
|
||||
|
||||
model_config = ConfigDict(json_schema_extra={
|
||||
"example": {
|
||||
"detail": "Error description",
|
||||
"error_code": "ERROR_CODE",
|
||||
"timestamp": "2024-12-09T16:49:30.905765",
|
||||
"path": "/check-facts"
|
||||
}
|
||||
})
|
||||
|
||||
class RequestValidationError(BaseModel):
|
||||
loc: List[str]
|
||||
msg: str
|
||||
type: str
|
||||
|
||||
class Publisher(BaseModel):
|
||||
name: str
|
||||
site: Optional[str] = Field(None, description="Publisher's website")
|
||||
|
||||
@validator('site')
|
||||
def validate_site(cls, v):
|
||||
if v and not (v.startswith('http://') or v.startswith('https://')):
|
||||
return f"https://{v}"
|
||||
return v
|
||||
|
||||
class ClaimReview(BaseModel):
|
||||
publisher: Publisher
|
||||
url: Optional[HttpUrl] = None
|
||||
title: Optional[str] = None
|
||||
reviewDate: Optional[str] = None
|
||||
textualRating: Optional[str] = None
|
||||
languageCode: str = Field(default="en-US")
|
||||
|
||||
class Claim(BaseModel):
|
||||
text: str
|
||||
claimant: Optional[str] = None
|
||||
claimDate: Optional[str] = None
|
||||
claimReview: List[ClaimReview]
|
||||
|
||||
class FactCheckResponse(BaseModel):
|
||||
query: str = Field(..., description="Original query that was fact-checked")
|
||||
total_claims_found: int = Field(..., ge=0)
|
||||
results: List[Claim] = Field(default_factory=list)
|
||||
summary: Dict[str, int] = Field(...)
|
||||
|
||||
model_config = ConfigDict(json_schema_extra={
|
||||
"example": {
|
||||
"query": "Example claim",
|
||||
"total_claims_found": 1,
|
||||
"results": [{
|
||||
"text": "Example claim text",
|
||||
"claimant": "Source name",
|
||||
"claimReview": [{
|
||||
"publisher": {
|
||||
"name": "Fact Checker",
|
||||
"site": "factchecker.com"
|
||||
},
|
||||
"textualRating": "True"
|
||||
}]
|
||||
}],
|
||||
"summary": {
|
||||
"total_sources": 1,
|
||||
"fact_checking_sites_queried": 10
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
class SourceType(str, Enum):
|
||||
FACT_CHECKER = "fact_checker"
|
||||
NEWS_SITE = "news_site"
|
||||
|
||||
class FactCheckSource(BaseModel):
|
||||
domain: str
|
||||
type: SourceType
|
||||
priority: int = Field(default=1, ge=1, le=10)
|
||||
|
||||
model_config = ConfigDict(json_schema_extra={
|
||||
"example": {
|
||||
"domain": "factcheck.org",
|
||||
"type": "fact_checker",
|
||||
"priority": 1
|
||||
}
|
||||
})
|
||||
|
||||
class FactCheckRequest(BaseModel):
|
||||
content: str = Field(
|
||||
...,
|
||||
min_length=10,
|
||||
max_length=1000,
|
||||
description="The claim to be fact-checked"
|
||||
)
|
||||
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
|
||||
max_results_per_source: int = Field(default=10, ge=1, le=50)
|
||||
|
||||
@validator('content')
|
||||
def validate_content(cls, v):
|
||||
if not v.strip():
|
||||
raise ValueError("Content cannot be empty or just whitespace")
|
||||
return v.strip()
|
||||
BIN
app/websites/__pycache__/fact_checker_website.cpython-312.pyc
Normal file
BIN
app/websites/__pycache__/fact_checker_website.cpython-312.pyc
Normal file
Binary file not shown.
99
app/websites/fact_checker_website.py
Normal file
99
app/websites/fact_checker_website.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
from typing import Dict, List
|
||||
import requests
|
||||
from fastapi import HTTPException
|
||||
from app.models.fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType
|
||||
|
||||
# Sources configuration with validation
|
||||
SOURCES = {
|
||||
"fact_checkers": [
|
||||
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
|
||||
for domain in [
|
||||
"factcheck.org",
|
||||
"snopes.com",
|
||||
"politifact.com",
|
||||
"reuters.com",
|
||||
"bbc.com",
|
||||
"apnews.com",
|
||||
"usatoday.com",
|
||||
"nytimes.com",
|
||||
"washingtonpost.com",
|
||||
"afp.com",
|
||||
"fullfact.org",
|
||||
"truthorfiction.com",
|
||||
"leadstories.com",
|
||||
"altnews.in",
|
||||
"boomlive.in",
|
||||
"en.prothomalo.com"
|
||||
]
|
||||
],
|
||||
"news_sites": [
|
||||
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
|
||||
for domain in [
|
||||
"www.thedailystar.net",
|
||||
"www.thefinancialexpress.com.bd",
|
||||
"www.theindependentbd.com",
|
||||
"www.dhakatribune.com",
|
||||
"www.newagebd.net",
|
||||
"www.observerbd.com",
|
||||
"www.daily-sun.com",
|
||||
"www.tbsnews.net",
|
||||
"www.businesspostbd.com",
|
||||
"www.banglanews24.com/english",
|
||||
"www.bdnews24.com/english",
|
||||
"www.risingbd.com/english",
|
||||
"www.dailyindustry.news",
|
||||
"www.bangladeshpost.net",
|
||||
"www.daily-bangladesh.com/english"
|
||||
]
|
||||
]
|
||||
}
|
||||
|
||||
async def fetch_fact_checks(
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
query: str,
|
||||
site: FactCheckSource
|
||||
) -> Dict:
|
||||
"""
|
||||
Fetch fact checks from a specific site using the Google Fact Check API
|
||||
"""
|
||||
try:
|
||||
if not api_key or not base_url:
|
||||
raise ValueError("API key or base URL not configured")
|
||||
|
||||
params = {
|
||||
"key": api_key,
|
||||
"query": query,
|
||||
"languageCode": "en-US",
|
||||
"reviewPublisherSiteFilter": site.domain,
|
||||
"pageSize": 10
|
||||
}
|
||||
|
||||
response = requests.get(base_url, params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.RequestException as e:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=ErrorResponse(
|
||||
detail=f"Error fetching from {site.domain}: {str(e)}",
|
||||
error_code="FACT_CHECK_SERVICE_ERROR",
|
||||
path="/check-facts"
|
||||
).dict()
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=ErrorResponse(
|
||||
detail=str(e),
|
||||
error_code="CONFIGURATION_ERROR",
|
||||
path="/check-facts"
|
||||
).dict()
|
||||
)
|
||||
|
||||
def get_all_sources() -> List[FactCheckSource]:
|
||||
"""
|
||||
Get all sources sorted by priority
|
||||
"""
|
||||
all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"]
|
||||
return sorted(all_sources, key=lambda x: x.priority)
|
||||
Loading…
Add table
Reference in a new issue