Dev #1
8 changed files with 221 additions and 208 deletions
Binary file not shown.
Binary file not shown.
|
|
@ -1,11 +1,13 @@
|
||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
|
|
||||||
from typing import Dict, List, Optional, Union
|
|
||||||
import requests
|
|
||||||
from enum import Enum
|
|
||||||
from datetime import datetime
|
|
||||||
import json
|
import json
|
||||||
from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
|
from datetime import datetime
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
|
||||||
|
from app.models.fact_check_models import (
|
||||||
|
FactCheckResponse, FactCheckRequest, Claim, ErrorResponse
|
||||||
|
)
|
||||||
|
from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources
|
||||||
|
|
||||||
fact_check_router = APIRouter()
|
fact_check_router = APIRouter()
|
||||||
|
|
||||||
|
|
@ -15,199 +17,6 @@ class CustomJSONEncoder(json.JSONEncoder):
|
||||||
return obj.isoformat()
|
return obj.isoformat()
|
||||||
return super().default(obj)
|
return super().default(obj)
|
||||||
|
|
||||||
class ErrorResponse(BaseModel):
|
|
||||||
detail: str
|
|
||||||
error_code: str = Field(..., description="Unique error code for this type of error")
|
|
||||||
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
|
|
||||||
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
|
|
||||||
|
|
||||||
model_config = ConfigDict(json_schema_extra={
|
|
||||||
"example": {
|
|
||||||
"detail": "Error description",
|
|
||||||
"error_code": "ERROR_CODE",
|
|
||||||
"timestamp": "2024-12-09T16:49:30.905765",
|
|
||||||
"path": "/check-facts"
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
class RequestValidationError(BaseModel):
|
|
||||||
loc: List[str]
|
|
||||||
msg: str
|
|
||||||
type: str
|
|
||||||
|
|
||||||
class Publisher(BaseModel):
|
|
||||||
name: str
|
|
||||||
site: Optional[str] = Field(None, description="Publisher's website")
|
|
||||||
|
|
||||||
@validator('site')
|
|
||||||
def validate_site(cls, v):
|
|
||||||
if v and not (v.startswith('http://') or v.startswith('https://')):
|
|
||||||
return f"https://{v}"
|
|
||||||
return v
|
|
||||||
|
|
||||||
class ClaimReview(BaseModel):
|
|
||||||
publisher: Publisher
|
|
||||||
url: Optional[HttpUrl] = None
|
|
||||||
title: Optional[str] = None
|
|
||||||
reviewDate: Optional[str] = None
|
|
||||||
textualRating: Optional[str] = None
|
|
||||||
languageCode: str = Field(default="en-US")
|
|
||||||
|
|
||||||
class Claim(BaseModel):
|
|
||||||
text: str
|
|
||||||
claimant: Optional[str] = None
|
|
||||||
claimDate: Optional[str] = None
|
|
||||||
claimReview: List[ClaimReview]
|
|
||||||
|
|
||||||
class FactCheckResponse(BaseModel):
|
|
||||||
query: str = Field(..., description="Original query that was fact-checked")
|
|
||||||
total_claims_found: int = Field(..., ge=0)
|
|
||||||
results: List[Claim] = Field(default_factory=list)
|
|
||||||
summary: Dict[str, int] = Field(...)
|
|
||||||
|
|
||||||
model_config = ConfigDict(json_schema_extra={
|
|
||||||
"example": {
|
|
||||||
"query": "Example claim",
|
|
||||||
"total_claims_found": 1,
|
|
||||||
"results": [{
|
|
||||||
"text": "Example claim text",
|
|
||||||
"claimant": "Source name",
|
|
||||||
"claimReview": [{
|
|
||||||
"publisher": {
|
|
||||||
"name": "Fact Checker",
|
|
||||||
"site": "factchecker.com"
|
|
||||||
},
|
|
||||||
"textualRating": "True"
|
|
||||||
}]
|
|
||||||
}],
|
|
||||||
"summary": {
|
|
||||||
"total_sources": 1,
|
|
||||||
"fact_checking_sites_queried": 10
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
class SourceType(str, Enum):
|
|
||||||
FACT_CHECKER = "fact_checker"
|
|
||||||
NEWS_SITE = "news_site"
|
|
||||||
|
|
||||||
class FactCheckSource(BaseModel):
|
|
||||||
domain: str
|
|
||||||
type: SourceType
|
|
||||||
priority: int = Field(default=1, ge=1, le=10)
|
|
||||||
|
|
||||||
model_config = ConfigDict(json_schema_extra={
|
|
||||||
"example": {
|
|
||||||
"domain": "factcheck.org",
|
|
||||||
"type": "fact_checker",
|
|
||||||
"priority": 1
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
# Sources configuration with validation
|
|
||||||
SOURCES = {
|
|
||||||
"fact_checkers": [
|
|
||||||
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
|
|
||||||
for domain in [
|
|
||||||
"factcheck.org",
|
|
||||||
"snopes.com",
|
|
||||||
"politifact.com",
|
|
||||||
"reuters.com",
|
|
||||||
"bbc.com",
|
|
||||||
"apnews.com",
|
|
||||||
"usatoday.com",
|
|
||||||
"nytimes.com",
|
|
||||||
"washingtonpost.com",
|
|
||||||
"afp.com",
|
|
||||||
"fullfact.org",
|
|
||||||
"truthorfiction.com",
|
|
||||||
"leadstories.com",
|
|
||||||
"altnews.in",
|
|
||||||
"boomlive.in",
|
|
||||||
"en.prothomalo.com"
|
|
||||||
]
|
|
||||||
],
|
|
||||||
"news_sites": [
|
|
||||||
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
|
|
||||||
for domain in [
|
|
||||||
"www.thedailystar.net",
|
|
||||||
"www.thefinancialexpress.com.bd",
|
|
||||||
"www.theindependentbd.com",
|
|
||||||
"www.dhakatribune.com",
|
|
||||||
"www.newagebd.net",
|
|
||||||
"www.observerbd.com",
|
|
||||||
"www.daily-sun.com",
|
|
||||||
"www.tbsnews.net",
|
|
||||||
"www.businesspostbd.com",
|
|
||||||
"www.banglanews24.com/english",
|
|
||||||
"www.bdnews24.com/english",
|
|
||||||
"www.risingbd.com/english",
|
|
||||||
"www.dailyindustry.news",
|
|
||||||
"www.bangladeshpost.net",
|
|
||||||
"www.daily-bangladesh.com/english"
|
|
||||||
]
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
class FactCheckRequest(BaseModel):
|
|
||||||
content: str = Field(
|
|
||||||
...,
|
|
||||||
min_length=10,
|
|
||||||
max_length=1000,
|
|
||||||
description="The claim to be fact-checked"
|
|
||||||
)
|
|
||||||
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
|
|
||||||
max_results_per_source: int = Field(default=10, ge=1, le=50)
|
|
||||||
|
|
||||||
@validator('content')
|
|
||||||
def validate_content(cls, v):
|
|
||||||
if not v.strip():
|
|
||||||
raise ValueError("Content cannot be empty or just whitespace")
|
|
||||||
return v.strip()
|
|
||||||
|
|
||||||
async def fetch_fact_checks(
|
|
||||||
api_key: str,
|
|
||||||
base_url: str,
|
|
||||||
query: str,
|
|
||||||
site: FactCheckSource
|
|
||||||
) -> Dict:
|
|
||||||
"""
|
|
||||||
Fetch fact checks from a specific site using the Google Fact Check API
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if not api_key or not base_url:
|
|
||||||
raise ValueError("API key or base URL not configured")
|
|
||||||
|
|
||||||
params = {
|
|
||||||
"key": api_key,
|
|
||||||
"query": query,
|
|
||||||
"languageCode": "en-US",
|
|
||||||
"reviewPublisherSiteFilter": site.domain,
|
|
||||||
"pageSize": 10
|
|
||||||
}
|
|
||||||
|
|
||||||
response = requests.get(base_url, params=params)
|
|
||||||
response.raise_for_status()
|
|
||||||
return response.json()
|
|
||||||
except requests.RequestException as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=503,
|
|
||||||
detail=ErrorResponse(
|
|
||||||
detail=f"Error fetching from {site.domain}: {str(e)}",
|
|
||||||
error_code="FACT_CHECK_SERVICE_ERROR",
|
|
||||||
path="/check-facts"
|
|
||||||
).dict()
|
|
||||||
)
|
|
||||||
except ValueError as e:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=ErrorResponse(
|
|
||||||
detail=str(e),
|
|
||||||
error_code="CONFIGURATION_ERROR",
|
|
||||||
path="/check-facts"
|
|
||||||
).dict()
|
|
||||||
)
|
|
||||||
|
|
||||||
@fact_check_router.post(
|
@fact_check_router.post(
|
||||||
"/check-facts",
|
"/check-facts",
|
||||||
response_model=FactCheckResponse,
|
response_model=FactCheckResponse,
|
||||||
|
|
@ -225,7 +34,7 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
|
||||||
all_results = []
|
all_results = []
|
||||||
|
|
||||||
# Validate configuration
|
# Validate configuration
|
||||||
if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
|
if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=500,
|
status_code=500,
|
||||||
detail=ErrorResponse(
|
detail=ErrorResponse(
|
||||||
|
|
@ -235,17 +44,13 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
|
||||||
).dict()
|
).dict()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check all sources in priority order
|
# Get all sources in priority order
|
||||||
all_sources = (
|
all_sources = get_all_sources()
|
||||||
SOURCES["fact_checkers"] +
|
|
||||||
SOURCES["news_sites"]
|
|
||||||
)
|
|
||||||
all_sources.sort(key=lambda x: x.priority)
|
|
||||||
|
|
||||||
for source in all_sources:
|
for source in all_sources:
|
||||||
try:
|
try:
|
||||||
result = await fetch_fact_checks(
|
result = await fetch_fact_checks(
|
||||||
GOOGLE_FACT_CHECK_API_KEY,
|
GOOGLE_API_KEY,
|
||||||
GOOGLE_FACT_CHECK_BASE_URL,
|
GOOGLE_FACT_CHECK_BASE_URL,
|
||||||
request.content,
|
request.content,
|
||||||
source
|
source
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"]
|
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
|
||||||
GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
|
GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
|
||||||
|
|
||||||
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
||||||
|
|
|
||||||
BIN
app/models/__pycache__/fact_check_models.cpython-312.pyc
Normal file
BIN
app/models/__pycache__/fact_check_models.cpython-312.pyc
Normal file
Binary file not shown.
109
app/models/fact_check_models.py
Normal file
109
app/models/fact_check_models.py
Normal file
|
|
@ -0,0 +1,109 @@
|
||||||
|
from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from enum import Enum
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
class ErrorResponse(BaseModel):
|
||||||
|
detail: str
|
||||||
|
error_code: str = Field(..., description="Unique error code for this type of error")
|
||||||
|
timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
|
||||||
|
path: Optional[str] = Field(None, description="The endpoint path where error occurred")
|
||||||
|
|
||||||
|
model_config = ConfigDict(json_schema_extra={
|
||||||
|
"example": {
|
||||||
|
"detail": "Error description",
|
||||||
|
"error_code": "ERROR_CODE",
|
||||||
|
"timestamp": "2024-12-09T16:49:30.905765",
|
||||||
|
"path": "/check-facts"
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
class RequestValidationError(BaseModel):
|
||||||
|
loc: List[str]
|
||||||
|
msg: str
|
||||||
|
type: str
|
||||||
|
|
||||||
|
class Publisher(BaseModel):
|
||||||
|
name: str
|
||||||
|
site: Optional[str] = Field(None, description="Publisher's website")
|
||||||
|
|
||||||
|
@validator('site')
|
||||||
|
def validate_site(cls, v):
|
||||||
|
if v and not (v.startswith('http://') or v.startswith('https://')):
|
||||||
|
return f"https://{v}"
|
||||||
|
return v
|
||||||
|
|
||||||
|
class ClaimReview(BaseModel):
|
||||||
|
publisher: Publisher
|
||||||
|
url: Optional[HttpUrl] = None
|
||||||
|
title: Optional[str] = None
|
||||||
|
reviewDate: Optional[str] = None
|
||||||
|
textualRating: Optional[str] = None
|
||||||
|
languageCode: str = Field(default="en-US")
|
||||||
|
|
||||||
|
class Claim(BaseModel):
|
||||||
|
text: str
|
||||||
|
claimant: Optional[str] = None
|
||||||
|
claimDate: Optional[str] = None
|
||||||
|
claimReview: List[ClaimReview]
|
||||||
|
|
||||||
|
class FactCheckResponse(BaseModel):
|
||||||
|
query: str = Field(..., description="Original query that was fact-checked")
|
||||||
|
total_claims_found: int = Field(..., ge=0)
|
||||||
|
results: List[Claim] = Field(default_factory=list)
|
||||||
|
summary: Dict[str, int] = Field(...)
|
||||||
|
|
||||||
|
model_config = ConfigDict(json_schema_extra={
|
||||||
|
"example": {
|
||||||
|
"query": "Example claim",
|
||||||
|
"total_claims_found": 1,
|
||||||
|
"results": [{
|
||||||
|
"text": "Example claim text",
|
||||||
|
"claimant": "Source name",
|
||||||
|
"claimReview": [{
|
||||||
|
"publisher": {
|
||||||
|
"name": "Fact Checker",
|
||||||
|
"site": "factchecker.com"
|
||||||
|
},
|
||||||
|
"textualRating": "True"
|
||||||
|
}]
|
||||||
|
}],
|
||||||
|
"summary": {
|
||||||
|
"total_sources": 1,
|
||||||
|
"fact_checking_sites_queried": 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
class SourceType(str, Enum):
|
||||||
|
FACT_CHECKER = "fact_checker"
|
||||||
|
NEWS_SITE = "news_site"
|
||||||
|
|
||||||
|
class FactCheckSource(BaseModel):
|
||||||
|
domain: str
|
||||||
|
type: SourceType
|
||||||
|
priority: int = Field(default=1, ge=1, le=10)
|
||||||
|
|
||||||
|
model_config = ConfigDict(json_schema_extra={
|
||||||
|
"example": {
|
||||||
|
"domain": "factcheck.org",
|
||||||
|
"type": "fact_checker",
|
||||||
|
"priority": 1
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
class FactCheckRequest(BaseModel):
|
||||||
|
content: str = Field(
|
||||||
|
...,
|
||||||
|
min_length=10,
|
||||||
|
max_length=1000,
|
||||||
|
description="The claim to be fact-checked"
|
||||||
|
)
|
||||||
|
language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
|
||||||
|
max_results_per_source: int = Field(default=10, ge=1, le=50)
|
||||||
|
|
||||||
|
@validator('content')
|
||||||
|
def validate_content(cls, v):
|
||||||
|
if not v.strip():
|
||||||
|
raise ValueError("Content cannot be empty or just whitespace")
|
||||||
|
return v.strip()
|
||||||
BIN
app/websites/__pycache__/fact_checker_website.cpython-312.pyc
Normal file
BIN
app/websites/__pycache__/fact_checker_website.cpython-312.pyc
Normal file
Binary file not shown.
99
app/websites/fact_checker_website.py
Normal file
99
app/websites/fact_checker_website.py
Normal file
|
|
@ -0,0 +1,99 @@
|
||||||
|
from typing import Dict, List
|
||||||
|
import requests
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from app.models.fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType
|
||||||
|
|
||||||
|
# Sources configuration with validation
|
||||||
|
SOURCES = {
|
||||||
|
"fact_checkers": [
|
||||||
|
FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
|
||||||
|
for domain in [
|
||||||
|
"factcheck.org",
|
||||||
|
"snopes.com",
|
||||||
|
"politifact.com",
|
||||||
|
"reuters.com",
|
||||||
|
"bbc.com",
|
||||||
|
"apnews.com",
|
||||||
|
"usatoday.com",
|
||||||
|
"nytimes.com",
|
||||||
|
"washingtonpost.com",
|
||||||
|
"afp.com",
|
||||||
|
"fullfact.org",
|
||||||
|
"truthorfiction.com",
|
||||||
|
"leadstories.com",
|
||||||
|
"altnews.in",
|
||||||
|
"boomlive.in",
|
||||||
|
"en.prothomalo.com"
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"news_sites": [
|
||||||
|
FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
|
||||||
|
for domain in [
|
||||||
|
"www.thedailystar.net",
|
||||||
|
"www.thefinancialexpress.com.bd",
|
||||||
|
"www.theindependentbd.com",
|
||||||
|
"www.dhakatribune.com",
|
||||||
|
"www.newagebd.net",
|
||||||
|
"www.observerbd.com",
|
||||||
|
"www.daily-sun.com",
|
||||||
|
"www.tbsnews.net",
|
||||||
|
"www.businesspostbd.com",
|
||||||
|
"www.banglanews24.com/english",
|
||||||
|
"www.bdnews24.com/english",
|
||||||
|
"www.risingbd.com/english",
|
||||||
|
"www.dailyindustry.news",
|
||||||
|
"www.bangladeshpost.net",
|
||||||
|
"www.daily-bangladesh.com/english"
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
async def fetch_fact_checks(
|
||||||
|
api_key: str,
|
||||||
|
base_url: str,
|
||||||
|
query: str,
|
||||||
|
site: FactCheckSource
|
||||||
|
) -> Dict:
|
||||||
|
"""
|
||||||
|
Fetch fact checks from a specific site using the Google Fact Check API
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not api_key or not base_url:
|
||||||
|
raise ValueError("API key or base URL not configured")
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"key": api_key,
|
||||||
|
"query": query,
|
||||||
|
"languageCode": "en-US",
|
||||||
|
"reviewPublisherSiteFilter": site.domain,
|
||||||
|
"pageSize": 10
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(base_url, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
except requests.RequestException as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=503,
|
||||||
|
detail=ErrorResponse(
|
||||||
|
detail=f"Error fetching from {site.domain}: {str(e)}",
|
||||||
|
error_code="FACT_CHECK_SERVICE_ERROR",
|
||||||
|
path="/check-facts"
|
||||||
|
).dict()
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=ErrorResponse(
|
||||||
|
detail=str(e),
|
||||||
|
error_code="CONFIGURATION_ERROR",
|
||||||
|
path="/check-facts"
|
||||||
|
).dict()
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_all_sources() -> List[FactCheckSource]:
|
||||||
|
"""
|
||||||
|
Get all sources sorted by priority
|
||||||
|
"""
|
||||||
|
all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"]
|
||||||
|
return sorted(all_sources, key=lambda x: x.priority)
|
||||||
Loading…
Add table
Reference in a new issue