diff --git a/.gitignore b/.gitignore
index 21d6e87..cd4609c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
env
.env
test.py
-/__pycache__/
\ No newline at end of file
+__pycache__
\ No newline at end of file
diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc
index 53d89a6..b086fe1 100644
Binary files a/app/__pycache__/config.cpython-312.pyc and b/app/__pycache__/config.cpython-312.pyc differ
diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc
index daa45a8..b5709d2 100644
Binary files a/app/api/__pycache__/fact_check.cpython-312.pyc and b/app/api/__pycache__/fact_check.cpython-312.pyc differ
diff --git a/app/api/ai_fact_check.py b/app/api/ai_fact_check.py
new file mode 100644
index 0000000..6d1f2d7
--- /dev/null
+++ b/app/api/ai_fact_check.py
@@ -0,0 +1,110 @@
+from fastapi import APIRouter, HTTPException
+from app.services.openai_client import OpenAIClient, AIFactChecker
+from app.config import OPENAI_API_KEY
+from app.models.ai_fact_check_models import (
+ AIFactCheckRequest,
+ AIFactCheckResponse,
+ VerificationResult,
+ TokenUsage,
+ ErrorResponse
+)
+from urllib.parse import urlparse
+import asyncio
+
+# Initialize router and OpenAI client
+aifact_check_router = APIRouter()
+openai_client = OpenAIClient(api_key=OPENAI_API_KEY)
+fact_checker = AIFactChecker(openai_client=openai_client)
+
+@aifact_check_router.post(
+ "/aicheck-facts",
+ response_model=AIFactCheckResponse,
+ responses={
+ 400: {"model": ErrorResponse},
+ 500: {"model": ErrorResponse}
+ }
+)
+async def ai_fact_check(request: AIFactCheckRequest):
+ """
+ Endpoint to fact-check a given statement based on multiple webpage URLs.
+ Input:
+ - urls: List of webpage URLs to analyze (with or without http/https)
+ - content: The fact statement to verify
+ Response:
+ - JSON response with verification results per URL, sources, and token usage
+ """
+ try:
+ results = {}
+ all_sources = set()
+ all_contexts = []
+ total_prompt_tokens = 0
+ total_completion_tokens = 0
+ total_tokens = 0
+
+ # Process all URLs concurrently
+ tasks = [
+ fact_checker.check_fact(url=url, query=request.content)
+ for url in request.urls
+ ]
+ fact_check_results = await asyncio.gather(*tasks, return_exceptions=True)
+
+ # Process results
+ for url, result in zip(request.urls, fact_check_results):
+ if isinstance(result, Exception):
+ # Handle failed URL checks
+ results[url] = VerificationResult(
+ verdict="Error",
+ confidence="Low",
+ evidence=f"Error checking URL: {str(result)}",
+ reasoning="URL processing failed",
+ missing_info="Could not access or process the URL"
+ )
+ continue
+
+ verification_result = VerificationResult(
+ verdict=result["verification_result"]["verdict"],
+ confidence=result["verification_result"]["confidence"],
+ evidence=result["verification_result"]["evidence"],
+ reasoning=result["verification_result"]["reasoning"],
+ missing_info=result["verification_result"].get("missing_info", None)
+ )
+
+ results[url] = verification_result
+ all_sources.update(result["sources"])
+
+ # Accumulate token usage
+ total_prompt_tokens += result["token_usage"]["prompt_tokens"]
+ total_completion_tokens += result["token_usage"]["completion_tokens"]
+ total_tokens += result["token_usage"]["total_tokens"]
+
+ token_usage = TokenUsage(
+ prompt_tokens=total_prompt_tokens,
+ completion_tokens=total_completion_tokens,
+ total_tokens=total_tokens
+ )
+
+ return AIFactCheckResponse(
+ query=request.content,
+ verification_result=results,
+ sources=list(all_sources),
+ token_usage=token_usage
+ )
+
+ except ValueError as e:
+ raise HTTPException(
+ status_code=400,
+ detail=ErrorResponse(
+ detail=str(e),
+ error_code="INVALID_URL",
+ path="/aicheck-facts"
+ ).dict()
+ )
+ except Exception as e:
+ raise HTTPException(
+ status_code=500,
+ detail=ErrorResponse(
+ detail=f"Error processing fact-check request: {str(e)}",
+ error_code="PROCESSING_ERROR",
+ path="/aicheck-facts"
+ ).dict()
+ )
\ No newline at end of file
diff --git a/app/api/fact_check.py b/app/api/fact_check.py
index 3e7a12d..b52ef24 100644
--- a/app/api/fact_check.py
+++ b/app/api/fact_check.py
@@ -1,291 +1,192 @@
from fastapi import APIRouter, HTTPException
-from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
-from typing import Dict, List, Optional, Union
-import requests
-from enum import Enum
-from datetime import datetime
-import json
-from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
+import httpx
+from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY
+from app.api.scrap_websites import search_websites, SearchRequest
+from app.services.openai_client import OpenAIClient
+from app.models.fact_check_models import (
+ FactCheckRequest,
+ FactCheckResponse,
+ ErrorResponse,
+ Source
+)
+from app.websites.fact_checker_website import get_all_sources
fact_check_router = APIRouter()
+openai_client = OpenAIClient(OPENAI_API_KEY)
-class CustomJSONEncoder(json.JSONEncoder):
- def default(self, obj):
- if isinstance(obj, datetime):
- return obj.isoformat()
- return super().default(obj)
+async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse:
+ """Generate a fact check report using OpenAI based on the fact check results."""
+ try:
+ base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources.
-class ErrorResponse(BaseModel):
- detail: str
- error_code: str = Field(..., description="Unique error code for this type of error")
- timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
- path: Optional[str] = Field(None, description="The endpoint path where error occurred")
+Rules:
+1. Include all source URLs and names in the sources list
+2. Keep the explanation focused on verifiable facts
+3. Include dates when available
+4. Maintain objectivity in the report"""
- model_config = ConfigDict(json_schema_extra={
- "example": {
- "detail": "Error description",
- "error_code": "ERROR_CODE",
- "timestamp": "2024-12-09T16:49:30.905765",
- "path": "/check-facts"
+ base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format:
+{
+ "claim": "Write the exact claim being verified",
+ "verdict": "One of: True/False/Partially True/Unverified",
+ "confidence": "One of: High/Medium/Low",
+ "sources": [
+ {
+ "url": "Full URL of the source",
+ "name": "Name of the source organization"
}
- })
-
-class RequestValidationError(BaseModel):
- loc: List[str]
- msg: str
- type: str
-
-class Publisher(BaseModel):
- name: str
- site: Optional[str] = Field(None, description="Publisher's website")
-
- @validator('site')
- def validate_site(cls, v):
- if v and not (v.startswith('http://') or v.startswith('https://')):
- return f"https://{v}"
- return v
-
-class ClaimReview(BaseModel):
- publisher: Publisher
- url: Optional[HttpUrl] = None
- title: Optional[str] = None
- reviewDate: Optional[str] = None
- textualRating: Optional[str] = None
- languageCode: str = Field(default="en-US")
-
-class Claim(BaseModel):
- text: str
- claimant: Optional[str] = None
- claimDate: Optional[str] = None
- claimReview: List[ClaimReview]
-
-class FactCheckResponse(BaseModel):
- query: str = Field(..., description="Original query that was fact-checked")
- total_claims_found: int = Field(..., ge=0)
- results: List[Claim] = Field(default_factory=list)
- summary: Dict[str, int] = Field(...)
-
- model_config = ConfigDict(json_schema_extra={
- "example": {
- "query": "Example claim",
- "total_claims_found": 1,
- "results": [{
- "text": "Example claim text",
- "claimant": "Source name",
- "claimReview": [{
- "publisher": {
- "name": "Fact Checker",
- "site": "factchecker.com"
- },
- "textualRating": "True"
- }]
- }],
- "summary": {
- "total_sources": 1,
- "fact_checking_sites_queried": 10
- }
- }
- })
-
-class SourceType(str, Enum):
- FACT_CHECKER = "fact_checker"
- NEWS_SITE = "news_site"
-
-class FactCheckSource(BaseModel):
- domain: str
- type: SourceType
- priority: int = Field(default=1, ge=1, le=10)
-
- model_config = ConfigDict(json_schema_extra={
- "example": {
- "domain": "factcheck.org",
- "type": "fact_checker",
- "priority": 1
- }
- })
-
-# Sources configuration with validation
-SOURCES = {
- "fact_checkers": [
- FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
- for domain in [
- "factcheck.org",
- "snopes.com",
- "politifact.com",
- "reuters.com",
- "bbc.com",
- "apnews.com",
- "usatoday.com",
- "nytimes.com",
- "washingtonpost.com",
- "afp.com",
- "fullfact.org",
- "truthorfiction.com",
- "leadstories.com",
- "altnews.in",
- "boomlive.in",
- "en.prothomalo.com"
- ]
],
- "news_sites": [
- FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
- for domain in [
- "www.thedailystar.net",
- "www.thefinancialexpress.com.bd",
- "www.theindependentbd.com",
- "www.dhakatribune.com",
- "www.newagebd.net",
- "www.observerbd.com",
- "www.daily-sun.com",
- "www.tbsnews.net",
- "www.businesspostbd.com",
- "www.banglanews24.com/english",
- "www.bdnews24.com/english",
- "www.risingbd.com/english",
- "www.dailyindustry.news",
- "www.bangladeshpost.net",
- "www.daily-bangladesh.com/english"
- ]
- ]
+ "evidence": "A concise summary of the key evidence (1-2 sentences)",
+ "explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)",
+ "additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)"
}
-class FactCheckRequest(BaseModel):
- content: str = Field(
- ...,
- min_length=10,
- max_length=1000,
- description="The claim to be fact-checked"
- )
- language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
- max_results_per_source: int = Field(default=10, ge=1, le=50)
+Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name."""
- @validator('content')
- def validate_content(cls, v):
- if not v.strip():
- raise ValueError("Content cannot be empty or just whitespace")
- return v.strip()
+ if "claims" in fact_check_data:
+ system_prompt = base_system_prompt
+ user_prompt = f"""Query: {query}
+ Fact Check Results: {fact_check_data}
+
+ {base_user_prompt}
-async def fetch_fact_checks(
- api_key: str,
- base_url: str,
- query: str,
- site: FactCheckSource
-) -> Dict:
- """
- Fetch fact checks from a specific site using the Google Fact Check API
- """
- try:
- if not api_key or not base_url:
- raise ValueError("API key or base URL not configured")
+ The report should:
+ 1. Include ALL source URLs and organization names
+ 2. Specify verification dates when available
+ 3. Name the fact-checking organizations involved
+ 4. Describe the verification process"""
+
+ else:
+ system_prompt = base_system_prompt
+ user_prompt = f"""Query: {query}
+ Fact Check Results: {fact_check_data}
+
+ {base_user_prompt}
- params = {
- "key": api_key,
- "query": query,
- "languageCode": "en-US",
- "reviewPublisherSiteFilter": site.domain,
- "pageSize": 10
- }
+ The report should:
+ 1. Include ALL source URLs and names from both verification_result and sources fields
+ 2. Mention all fact-checking organizations involved
+ 3. Describe the verification process
+ 4. Note any conflicting information between sources"""
+
+ response = await openai_client.generate_text_response(
+ system_prompt=system_prompt,
+ user_prompt=user_prompt,
+ max_tokens=1000
+ )
- response = requests.get(base_url, params=params)
- response.raise_for_status()
- return response.json()
- except requests.RequestException as e:
- raise HTTPException(
- status_code=503,
- detail=ErrorResponse(
- detail=f"Error fetching from {site.domain}: {str(e)}",
- error_code="FACT_CHECK_SERVICE_ERROR",
- path="/check-facts"
- ).dict()
- )
- except ValueError as e:
+ try:
+ # First try to parse the response directly
+ response_data = response["response"]
+
+ # Clean up sources before validation
+ if isinstance(response_data.get('sources'), list):
+ cleaned_sources = []
+ for source in response_data['sources']:
+ if isinstance(source, str):
+ # Convert string sources to Source objects
+ url = source if source.startswith('http') else f"https://{source}"
+ cleaned_sources.append({
+ "url": url,
+ "name": source
+ })
+ elif isinstance(source, dict):
+ # Ensure URL has proper scheme
+ url = source.get('url', '')
+ if url and not url.startswith('http'):
+ source['url'] = f"https://{url}"
+ cleaned_sources.append(source)
+ response_data['sources'] = cleaned_sources
+
+ fact_check_response = FactCheckResponse(**response_data)
+ return fact_check_response
+
+ except Exception as validation_error:
+ print(f"Response validation error: {str(validation_error)}")
+ raise HTTPException(
+ status_code=422,
+ detail=ErrorResponse(
+ detail=f"Invalid response format: {str(validation_error)}",
+ error_code="VALIDATION_ERROR",
+ path="/check-facts"
+ ).dict()
+ )
+
+ except Exception as e:
+ print(f"Error generating fact report: {str(e)}")
raise HTTPException(
status_code=500,
detail=ErrorResponse(
- detail=str(e),
+ detail="Error generating fact report",
+ error_code="FACT_CHECK_ERROR",
+ path="/check-facts"
+ ).dict()
+ )
+
+@fact_check_router.post("/check-facts", response_model=FactCheckResponse)
+async def check_facts(request: FactCheckRequest):
+ """
+ Fetch fact check results and generate a comprehensive report.
+ """
+ if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
+ raise HTTPException(
+ status_code=500,
+ detail=ErrorResponse(
+ detail="Google API key or base URL is not configured",
error_code="CONFIGURATION_ERROR",
path="/check-facts"
).dict()
)
-@fact_check_router.post(
- "/check-facts",
- response_model=FactCheckResponse,
- responses={
- 400: {"model": ErrorResponse},
- 404: {"model": ErrorResponse},
- 500: {"model": ErrorResponse},
- 503: {"model": ErrorResponse}
- }
-)
-async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
- """
- Check facts using multiple fact-checking sources
- """
- all_results = []
-
- # Validate configuration
- if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
- raise HTTPException(
- status_code=500,
- detail=ErrorResponse(
- detail="API configuration is missing",
- error_code="CONFIGURATION_ERROR",
- path="/check-facts"
- ).dict()
- )
-
- # Check all sources in priority order
- all_sources = (
- SOURCES["fact_checkers"] +
- SOURCES["news_sites"]
- )
- all_sources.sort(key=lambda x: x.priority)
-
- for source in all_sources:
+ headers = {"Content-Type": "application/json"}
+ async with httpx.AsyncClient() as client:
+ # Get fact checker sources from the centralized configuration
+ fact_checker_sources = get_all_sources()
+
+ for source in fact_checker_sources:
+ params = {
+ "key": GOOGLE_API_KEY,
+ "query": request.query,
+ "languageCode": "en-US",
+ "reviewPublisherSiteFilter": source.domain,
+ "pageSize": 10
+ }
+
+ try:
+ response = await client.get(
+ GOOGLE_FACT_CHECK_BASE_URL,
+ params=params,
+ headers=headers
+ )
+ response.raise_for_status()
+ json_response = response.json()
+
+ if json_response.get("claims"):
+ return await generate_fact_report(request.query, json_response)
+
+ except httpx.RequestError as e:
+ print(f"Error fetching results for site {source.domain}: {str(e)}")
+ continue
+ except Exception as e:
+ print(f"Unexpected error for site {source.domain}: {str(e)}")
+ continue
+
try:
- result = await fetch_fact_checks(
- GOOGLE_FACT_CHECK_API_KEY,
- GOOGLE_FACT_CHECK_BASE_URL,
- request.content,
- source
+ search_request = SearchRequest(
+ search_text=request.query,
+ source_types=["fact_checkers"]
)
- if "claims" in result:
- # Validate each claim through Pydantic
- validated_claims = [
- Claim(**claim).dict()
- for claim in result["claims"]
- ]
- all_results.extend(validated_claims)
-
- except HTTPException:
- raise
+ ai_response = await search_websites(search_request)
+ return await generate_fact_report(request.query, ai_response)
+
except Exception as e:
- # Log the error but continue with other sources
- print(f"Error processing {source.domain}: {str(e)}")
- continue
-
- if not all_results:
- raise HTTPException(
- status_code=404,
- detail=ErrorResponse(
- detail="No fact check results found",
- error_code="NO_RESULTS_FOUND",
- path="/check-facts"
- ).dict()
- )
-
- # Create the response using Pydantic model
- response = FactCheckResponse(
- query=request.content,
- total_claims_found=len(all_results),
- results=all_results,
- summary={
- "total_sources": len(set(claim.get("claimReview", [{}])[0].get("publisher", {}).get("site", "")
- for claim in all_results if claim.get("claimReview"))),
- "fact_checking_sites_queried": len(all_sources)
- }
- )
-
- return response
\ No newline at end of file
+ print(f"Error in AI fact check: {str(e)}")
+ raise HTTPException(
+ status_code=404,
+ detail=ErrorResponse(
+ detail="No fact check results found",
+ error_code="NOT_FOUND",
+ path="/check-facts"
+ ).dict()
+ )
\ No newline at end of file
diff --git a/app/api/scrap_websites.py b/app/api/scrap_websites.py
new file mode 100644
index 0000000..946ec01
--- /dev/null
+++ b/app/api/scrap_websites.py
@@ -0,0 +1,160 @@
+from fastapi import APIRouter, HTTPException
+import httpx
+import logging
+from urllib.parse import urlparse
+from typing import List, Dict, Optional
+from pydantic import BaseModel
+from app.models.ai_fact_check_models import (
+ AIFactCheckRequest,
+ FactCheckSource,
+ SourceType
+)
+from app.websites.fact_checker_website import SOURCES, get_all_sources
+from app.api.ai_fact_check import ai_fact_check
+from app.config import GOOGLE_API_KEY, GOOGLE_ENGINE_ID, GOOGLE_SEARCH_URL
+
+
+class SearchRequest(BaseModel):
+ search_text: str
+ source_types: List[str] = ["fact_checkers"]
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+scrap_websites_router = APIRouter()
+
+# Constants
+RESULTS_PER_PAGE = 10
+MAX_PAGES = 5
+MAX_URLS_PER_DOMAIN = 5
+
+
+def get_domain_from_url(url: str) -> str:
+ """Extract domain from URL with improved handling."""
+ try:
+ parsed = urlparse(url)
+ domain = parsed.netloc.lower()
+ if domain.startswith('www.'):
+ domain = domain[4:]
+ return domain
+ except Exception as e:
+ logger.error(f"Error extracting domain from URL {url}: {str(e)}")
+ return ""
+
+def is_valid_source_domain(domain: str, sources: List[FactCheckSource]) -> bool:
+ """Check if domain matches any source with improved matching logic."""
+ if not domain:
+ return False
+
+ domain = domain.lower()
+ if domain.startswith('www.'):
+ domain = domain[4:]
+
+ for source in sources:
+ source_domain = source.domain.lower()
+ if source_domain.startswith('www.'):
+ source_domain = source_domain[4:]
+
+ if domain == source_domain or domain.endswith('.' + source_domain):
+ return True
+
+ return False
+
+async def build_enhanced_search_query(query: str, sources: List[FactCheckSource]) -> str:
+ """Build search query with site restrictions."""
+ site_queries = [f"site:{source.domain}" for source in sources]
+ site_restriction = " OR ".join(site_queries)
+ return f"({query}) ({site_restriction})"
+
+async def google_custom_search(query: str, sources: List[FactCheckSource], page: int = 1) -> Optional[Dict]:
+ """Perform Google Custom Search with enhanced query."""
+ enhanced_query = await build_enhanced_search_query(query, sources)
+ start_index = ((page - 1) * RESULTS_PER_PAGE) + 1
+
+ params = {
+ "key": GOOGLE_API_KEY,
+ "cx": GOOGLE_ENGINE_ID,
+ "q": enhanced_query,
+ "num": RESULTS_PER_PAGE,
+ "start": start_index
+ }
+
+ async with httpx.AsyncClient(timeout=30.0) as client:
+ try:
+ response = await client.get(GOOGLE_SEARCH_URL, params=params)
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Search error: {str(e)}")
+ raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")
+
+@scrap_websites_router.post("/search")
+async def search_websites(request: SearchRequest):
+ # Get the source types from the request
+ source_types = request.source_types if request.source_types else ["fact_checkers"]
+
+ # Get sources based on requested types
+ selected_sources = []
+ for source_type in source_types:
+ if source_type in SOURCES:
+ selected_sources.extend(SOURCES[source_type])
+
+ # If no valid sources found, use fact checkers as default
+ if not selected_sources:
+ selected_sources = SOURCES["fact_checkers"]
+
+ all_urls = []
+ domain_results = {}
+
+ try:
+ for page in range(1, MAX_PAGES + 1):
+ if len(all_urls) >= 50:
+ break
+
+ search_response = await google_custom_search(request.search_text, selected_sources, page)
+
+ if not search_response or not search_response.get("items"):
+ break
+
+ for item in search_response.get("items", []):
+ url = item.get("link")
+ if not url:
+ continue
+
+ domain = get_domain_from_url(url)
+
+ if is_valid_source_domain(domain, selected_sources):
+ if domain not in domain_results:
+ domain_results[domain] = []
+
+ if len(domain_results[domain]) < MAX_URLS_PER_DOMAIN:
+ domain_results[domain].append({
+ "url": url,
+ "title": item.get("title", ""),
+ "snippet": item.get("snippet", "")
+ })
+ all_urls.append(url)
+
+ if len(all_urls) >= 50:
+ break
+
+ if not all_urls:
+ return {
+ "status": "no_results",
+ "urls_found": 0
+ }
+
+ fact_check_request = AIFactCheckRequest(
+ content=request.search_text,
+ urls=all_urls[:5]
+ )
+
+ return await ai_fact_check(fact_check_request)
+
+ except Exception as e:
+ logger.error(f"Error during search/fact-check process: {str(e)}")
+ raise HTTPException(status_code=500, detail=str(e))
\ No newline at end of file
diff --git a/app/config.py b/app/config.py
index d9de9e9..b890247 100644
--- a/app/config.py
+++ b/app/config.py
@@ -3,8 +3,10 @@ from dotenv import load_dotenv
load_dotenv()
-GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"]
+GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
+GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"]
+GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
FRONTEND_URL = os.environ["FRONTEND_URL"]
\ No newline at end of file
diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc
new file mode 100644
index 0000000..7cb8e9a
Binary files /dev/null and b/app/models/__pycache__/fact_check_models.cpython-312.pyc differ
diff --git a/app/models/ai_fact_check_models.py b/app/models/ai_fact_check_models.py
new file mode 100644
index 0000000..0949e51
--- /dev/null
+++ b/app/models/ai_fact_check_models.py
@@ -0,0 +1,229 @@
+from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
+from typing import Dict, List, Optional, Any, Union
+from enum import Enum
+from datetime import datetime
+from urllib.parse import urlparse
+
+# Common Models
+class TokenUsage(BaseModel):
+ prompt_tokens: Optional[int] = 0
+ completion_tokens: Optional[int] = 0
+ total_tokens: Optional[int] = 0
+
+class ErrorResponse(BaseModel):
+ detail: str
+ error_code: str = Field(..., description="Unique error code for this type of error")
+ timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
+ path: Optional[str] = Field(None, description="The endpoint path where error occurred")
+
+ model_config = ConfigDict(json_schema_extra={
+ "example": {
+ "detail": "Error description",
+ "error_code": "ERROR_CODE",
+ "timestamp": "2024-12-09T16:49:30.905765",
+ "path": "/check-facts"
+ }
+ })
+
+# Fact Check Models
+class Publisher(BaseModel):
+ name: str
+ site: Optional[str] = Field(None, description="Publisher's website")
+
+ @validator('site')
+ def validate_site(cls, v):
+ if v and not (v.startswith('http://') or v.startswith('https://')):
+ return f"https://{v}"
+ return v
+
+class ClaimReview(BaseModel):
+ publisher: Publisher
+ url: Optional[HttpUrl] = None
+ title: Optional[str] = None
+ reviewDate: Optional[str] = None
+ textualRating: Optional[str] = None
+ languageCode: str = Field(default="en-US")
+
+class Claim(BaseModel):
+ text: str
+ claimant: Optional[str] = None
+ claimDate: Optional[str] = None
+ claimReview: List[ClaimReview]
+
+class SourceType(str, Enum):
+ FACT_CHECKER = "fact_checker"
+ NEWS_SITE = "news_site"
+
+class FactCheckSource(BaseModel):
+ domain: str
+ type: SourceType
+ priority: int = Field(default=1, ge=1, le=10)
+
+# Verification Models
+class VerificationResult(BaseModel):
+ verdict: str = Field(..., description="True/False/Insufficient Information")
+ confidence: str = Field(..., description="High/Medium/Low")
+ evidence: Union[str, List[str]]
+ reasoning: str
+ missing_info: Optional[str] = None
+
+ model_config = ConfigDict(json_schema_extra={
+ "example": {
+ "verdict": "True",
+ "confidence": "High",
+ "evidence": ["Direct quote from source supporting the claim"],
+ "reasoning": "Detailed analysis of why the claim is considered true",
+ "missing_info": "Any caveats or limitations of the verification"
+ }
+ })
+
+# Request Models
+class BaseFactCheckRequest(BaseModel):
+ content: str = Field(
+ ...,
+ min_length=10,
+ max_length=1000,
+ description="The claim to be fact-checked"
+ )
+
+ @validator('content')
+ def validate_content(cls, v):
+ if not v.strip():
+ raise ValueError("Content cannot be empty or just whitespace")
+ return v.strip()
+
+class GoogleFactCheckRequest(BaseFactCheckRequest):
+ language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
+ max_results_per_source: int = Field(default=10, ge=1, le=50)
+
+class AIFactCheckRequest(BaseFactCheckRequest):
+ urls: List[str] = Field(
+ ...,
+ min_items=1,
+ max_items=5,
+ description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing"
+ )
+
+ @validator('urls')
+ def validate_urls(cls, urls):
+ validated_urls = []
+ for url in urls:
+ if not url.strip():
+ raise ValueError("URL cannot be empty")
+
+ # Add https:// if no protocol specified
+ if not url.startswith(('http://', 'https://')):
+ url = f'https://{url}'
+
+ try:
+ result = urlparse(url)
+ if not result.netloc:
+ raise ValueError(f"Invalid URL structure for {url}")
+ validated_urls.append(url)
+ except Exception as e:
+ raise ValueError(f"Invalid URL {url}: {str(e)}")
+
+ return validated_urls
+
+ model_config = ConfigDict(json_schema_extra={
+ "example": {
+ "content": "Indian flag was drawn in BUET campus",
+ "urls": [
+ "www.altnews.in/article-about-flag",
+ "www.another-source.com/related-news"
+ ]
+ }
+ })
+
+# Response Models
+class BaseFactCheckResponse(BaseModel):
+ query: str
+ token_usage: TokenUsage
+ sources: List[str]
+
+ model_config = ConfigDict(json_schema_extra={
+ "example": {
+ "query": "Example statement to verify",
+ "token_usage": {
+ "prompt_tokens": 100,
+ "completion_tokens": 50,
+ "total_tokens": 150
+ },
+ "sources": ["source1.com", "source2.com"],
+ }
+ })
+
+class GoogleFactCheckResponse(BaseFactCheckResponse):
+ total_claims_found: int
+ results: List[Dict[str, Any]]
+ verification_result: Dict[str, Any]
+ summary: Dict[str, int]
+
+ model_config = ConfigDict(json_schema_extra={
+ "example": {
+ "query": "Example claim",
+ "total_claims_found": 1,
+ "results": [{
+ "text": "Example claim text",
+ "claimant": "Source name",
+ "claimReview": [{
+ "publisher": {
+ "name": "Fact Checker",
+ "site": "factchecker.com"
+ },
+ "textualRating": "True"
+ }]
+ }],
+ "verification_result": {
+ "verdict": "True",
+ "confidence": "High",
+ "evidence": ["Supporting evidence"],
+ "reasoning": "Detailed analysis"
+ },
+ "sources": ["factchecker.com"],
+ "token_usage": {
+ "prompt_tokens": 100,
+ "completion_tokens": 50,
+ "total_tokens": 150
+ },
+ "summary": {
+ "total_sources": 1,
+ "fact_checking_sites_queried": 10
+ }
+ }
+ })
+
+class AIFactCheckResponse(BaseFactCheckResponse):
+ verification_result: Dict[str, VerificationResult] # Changed to Dict to store results per URL
+
+ model_config = ConfigDict(json_schema_extra={
+ "example": {
+ "query": "Indian flag was drawn in BUET campus",
+ "verification_result": {
+ "https://www.source1.com": {
+ "verdict": "True",
+ "confidence": "High",
+ "evidence": ["Supporting evidence from source 1"],
+ "reasoning": "Detailed analysis from source 1",
+ "missing_info": None
+ },
+ "https://www.source2.com": {
+ "verdict": "True",
+ "confidence": "Medium",
+ "evidence": ["Supporting evidence from source 2"],
+ "reasoning": "Analysis from source 2",
+ "missing_info": "Additional context needed"
+ }
+ },
+ "sources": ["source1.com", "source2.com"],
+ "token_usage": {
+ "prompt_tokens": 200,
+ "completion_tokens": 100,
+ "total_tokens": 300
+ }
+ }
+ })
+
+# Backwards compatibility aliases
+FactCheckRequest = GoogleFactCheckRequest
+FactCheckResponse = GoogleFactCheckResponse
\ No newline at end of file
diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py
new file mode 100644
index 0000000..1b30511
--- /dev/null
+++ b/app/models/fact_check_models.py
@@ -0,0 +1,101 @@
+from pydantic import BaseModel, Field, HttpUrl, validator
+from typing import List, Literal, Union
+from datetime import datetime
+from enum import Enum
+
+class VerdictEnum(str, Enum):
+ TRUE = "True"
+ FALSE = "False"
+ PARTIALLY_TRUE = "Partially True"
+ UNVERIFIED = "Unverified"
+
+class ConfidenceEnum(str, Enum):
+ HIGH = "High"
+ MEDIUM = "Medium"
+ LOW = "Low"
+
+class FactCheckRequest(BaseModel):
+ query: str = Field(
+ ...,
+ min_length=3,
+ max_length=500,
+ description="The claim or statement to be fact-checked",
+ example="Did NASA confirm finding alien structures on Mars in 2024?"
+ )
+
+class Source(BaseModel):
+ url: str
+ name: str = ""
+
+ @validator('url')
+ def validate_url(cls, v):
+ # Basic URL validation without requiring HTTP/HTTPS
+ if not v or len(v) < 3:
+ raise ValueError("URL must not be empty and must be at least 3 characters")
+ return v
+
+class FactCheckResponse(BaseModel):
+ claim: str = Field(
+ ...,
+ min_length=10,
+ max_length=1000,
+ description="The exact claim being verified"
+ )
+ verdict: VerdictEnum = Field(
+ ...,
+ description="The verification verdict"
+ )
+ confidence: ConfidenceEnum = Field(
+ ...,
+ description="Confidence level in the verdict"
+ )
+ sources: List[Source] = Field(
+ ...,
+ min_items=1,
+ description="List of sources used in verification"
+ )
+ evidence: str = Field(
+ ...,
+ min_length=20,
+ max_length=500,
+ description="Concise summary of key evidence"
+ )
+ explanation: str = Field(
+ ...,
+ min_length=50,
+ max_length=1000,
+ description="Detailed explanation of verification findings"
+ )
+ additional_context: str = Field(
+ ...,
+ min_length=20,
+ max_length=500,
+ description="Important context about the verification"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "claim": "NASA confirmed finding alien structures on Mars in 2024",
+ "verdict": "False",
+ "confidence": "High",
+ "sources": [
+ {
+ "url": "https://www.nasa.gov/mars-exploration",
+ "name": "NASA Mars Exploration"
+ },
+ {
+ "url": "https://factcheck.org/2024/mars-claims",
+ "name": "FactCheck.org"
+ }
+ ],
+ "evidence": "NASA has made no such announcement. Recent Mars rover images show natural rock formations.",
+ "explanation": "Multiple fact-checking organizations investigated this claim. NASA's official communications and Mars mission reports from 2024 contain no mention of alien structures. The viral images being shared are misidentified natural geological formations.",
+ "additional_context": "Similar false claims about alien structures on Mars have circulated periodically since the first Mars rovers began sending back images."
+ }
+ }
+
+class ErrorResponse(BaseModel):
+ detail: str
+ error_code: str = Field(..., example="VALIDATION_ERROR")
+ path: str = Field(..., example="/check-facts")
\ No newline at end of file
diff --git a/app/models/scrap_websites_models.py b/app/models/scrap_websites_models.py
new file mode 100644
index 0000000..1c629c5
--- /dev/null
+++ b/app/models/scrap_websites_models.py
@@ -0,0 +1,43 @@
+from pydantic import BaseModel
+from typing import List, Dict
+
+class SearchRequest(BaseModel):
+ search_text: str
+ source_types: List[str] = ["fact_checkers"]
+
+class Publisher(BaseModel):
+ name: str
+ site: str
+
+class ClaimReview(BaseModel):
+ publisher: Publisher
+ textualRating: str
+
+class Claim(BaseModel):
+ claimReview: List[ClaimReview]
+ claimant: str
+ text: str
+
+class Summary(BaseModel):
+ fact_checking_sites_queried: int
+ total_sources: int
+
+class TokenUsage(BaseModel):
+ prompt_tokens: int
+ completion_tokens: int
+ total_tokens: int
+
+class VerificationResult(BaseModel):
+ verdict: str
+ confidence: str
+ evidence: List[str]
+ reasoning: str
+
+class EnhancedFactCheckResponse(BaseModel):
+ query: str
+ results: List[Claim]
+ sources: List[str]
+ summary: Summary
+ token_usage: Dict[str, int]
+ total_claims_found: int
+ verification_result: VerificationResult
\ No newline at end of file
diff --git a/app/services/openai_client.py b/app/services/openai_client.py
new file mode 100644
index 0000000..07b6ae3
--- /dev/null
+++ b/app/services/openai_client.py
@@ -0,0 +1,172 @@
+from langchain_community.document_loaders import AsyncHtmlLoader
+from langchain_community.document_transformers import BeautifulSoupTransformer
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
+from typing import List, Dict, Any
+import numpy as np
+import logging as logger
+import openai
+import json
+
+class OpenAIClient:
+ def __init__(self, api_key: str):
+ """
+ Initialize OpenAI client with the provided API key.
+ """
+ openai.api_key = api_key
+
+ async def generate_text_response(self, system_prompt: str, user_prompt: str, max_tokens: int) -> dict:
+ """
+ Generate a response using OpenAI's chat completion API.
+ """
+ try:
+ response = openai.ChatCompletion.create(
+ model="gpt-4",
+ messages=[
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": user_prompt}
+ ],
+ max_tokens=max_tokens
+ )
+ content = response['choices'][0]['message']['content']
+ # Parse the JSON string into a dictionary
+ parsed_content = json.loads(content)
+
+ return {
+ "response": parsed_content, # Now returns a dictionary instead of string
+ "prompt_tokens": response['usage']['prompt_tokens'],
+ "completion_tokens": response['usage']['completion_tokens'],
+ "total_tokens": response['usage']['total_tokens']
+ }
+ except json.JSONDecodeError as e:
+ raise Exception(f"Failed to parse OpenAI response as JSON: {str(e)}")
+ except Exception as e:
+ raise Exception(f"OpenAI text generation error: {str(e)}")
+
+ def get_embeddings(self, texts: List[str]) -> List[List[float]]:
+ """
+ Retrieve embeddings for a list of texts using OpenAI's embedding API.
+ """
+ try:
+ response = openai.Embedding.create(
+ input=texts,
+ model="text-embedding-ada-002"
+ )
+ embeddings = [data['embedding'] for data in response['data']]
+ return embeddings
+ except Exception as e:
+ raise Exception(f"OpenAI embedding error: {str(e)}")
+
+class AIFactChecker:
+ def __init__(self, openai_client: OpenAIClient):
+ """Initialize the fact checker with OpenAI client."""
+ self.openai_client = openai_client
+ self.text_splitter = RecursiveCharacterTextSplitter(
+ chunk_size=1000,
+ chunk_overlap=200,
+ length_function=len,
+ separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
+ )
+
+ async def scrape_webpage(self, url: str) -> List[Document]:
+ """Scrape webpage content using LangChain's AsyncHtmlLoader."""
+ try:
+ loader = AsyncHtmlLoader([url])
+ docs = await loader.aload()
+
+ bs_transformer = BeautifulSoupTransformer()
+ docs_transformed = bs_transformer.transform_documents(docs)
+ docs_chunks = self.text_splitter.split_documents(docs_transformed)
+
+ logger.info(f"Successfully scraped webpage | chunks={len(docs_chunks)}")
+ return docs_chunks
+
+ except Exception as e:
+ logger.error(f"Error scraping webpage | url={url} | error={str(e)}")
+ raise
+
+ def find_relevant_chunks(
+ self,
+ query_embedding: List[float],
+ doc_embeddings: List[List[float]],
+ docs: List[Document]
+ ) -> List[Document]:
+ """Find most relevant document chunks using cosine similarity."""
+ try:
+ query_array = np.array(query_embedding)
+ chunks_array = np.array(doc_embeddings)
+
+ similarities = np.dot(chunks_array, query_array) / (
+ np.linalg.norm(chunks_array, axis=1) * np.linalg.norm(query_array)
+ )
+
+ top_indices = np.argsort(similarities)[-5:][::-1]
+ return [docs[i] for i in top_indices]
+
+ except Exception as e:
+ logger.error(f"Error finding relevant chunks | error={str(e)}")
+ raise
+
+ async def verify_fact(self, query: str, relevant_docs: List[Document]) -> Dict[str, Any]:
+ """Verify fact using OpenAI's API with context from relevant documents."""
+ try:
+ context = "\n\n".join([doc.page_content for doc in relevant_docs])
+
+ system_prompt = """You are a professional fact-checking assistant. Analyze the provided context
+ and determine if the given statement is true, false, or if there isn't enough information.
+
+ Provide your response in the following JSON format:
+ {
+ "verdict": "True/False/Insufficient Information",
+ "confidence": "High/Medium/Low",
+ "evidence": "Direct quotes or evidence from the context",
+ "reasoning": "Your detailed analysis and reasoning",
+ "missing_info": "Any important missing information (if applicable)"
+ }"""
+
+ user_prompt = f"""Context:
+ {context}
+
+ Statement to verify: "{query}"
+
+ Analyze the statement based on the provided context and return your response in the specified JSON format."""
+
+ response = await self.openai_client.generate_text_response(
+ system_prompt=system_prompt,
+ user_prompt=user_prompt,
+ max_tokens=800
+ )
+
+ sources = list(set([doc.metadata.get('source', 'Unknown source') for doc in relevant_docs]))
+
+ return {
+ "verification_result": response["response"], # This is now a dictionary
+ "sources": sources,
+ "token_usage": {
+ "prompt_tokens": response["prompt_tokens"],
+ "completion_tokens": response["completion_tokens"],
+ "total_tokens": response["total_tokens"]
+ }
+ }
+
+ except Exception as e:
+ logger.error(f"Error verifying fact | error={str(e)}")
+ raise
+
+ async def check_fact(self, url: str, query: str) -> Dict[str, Any]:
+ """Main method to check a fact against a webpage."""
+ try:
+ docs = await self.scrape_webpage(url)
+
+ doc_texts = [doc.page_content for doc in docs]
+ doc_embeddings = self.openai_client.get_embeddings(doc_texts)
+ query_embedding = self.openai_client.get_embeddings([query])
+
+ relevant_docs = self.find_relevant_chunks(query_embedding[0], doc_embeddings, docs)
+ verification_result = await self.verify_fact(query, relevant_docs)
+
+ return verification_result
+
+ except Exception as e:
+ logger.error(f"Error checking fact | error={str(e)}")
+ raise
\ No newline at end of file
diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc
new file mode 100644
index 0000000..b0b0fa4
Binary files /dev/null and b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc differ
diff --git a/app/websites/fact_checker_website.py b/app/websites/fact_checker_website.py
new file mode 100644
index 0000000..2e4934b
--- /dev/null
+++ b/app/websites/fact_checker_website.py
@@ -0,0 +1,190 @@
+from typing import Dict, List
+import requests
+from fastapi import HTTPException
+from app.models.ai_fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType
+
+# Sources configuration with validation
+SOURCES = {
+ "fact_checkers": [
+ FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
+ for domain in [
+ "snopes.com",
+ "politifact.com",
+ "factcheck.org",
+ "reuters.com/fact-check",
+ "apnews.com/hub/ap-fact-check",
+ "bbc.com/news/reality_check",
+ "fullfact.org",
+ "afp.com/fact-check",
+ "truthorfiction.com",
+ "leadstories.com",
+ "checkyourfact.com",
+ "washingtonpost.com/news/fact-checker",
+ "factcheck.kz",
+ "poynter.org/ifcn",
+ "factcheckeu.info",
+ "africacheck.org",
+ "thequint.com/webqoof",
+ "altnews.in",
+ "facta.news",
+ "factcheckni.org",
+ "mythdetector.ge",
+ "verificado.mx",
+ "euvsdisinfo.eu",
+ "factcheck.afp.com",
+ "newtral.es",
+ "maldita.es",
+ "faktograf.hr",
+ "demagog.org.pl",
+ "factnameh.com",
+ "faktiskt.se",
+ "teyit.org",
+ "factly.in",
+ "boom.live",
+ "stopfake.org",
+ "factcheck.ge",
+ "factcheck.kg",
+ "factcheck.uz",
+ "factcheck.tj",
+ "factcheck.az",
+ "factcheck.am",
+ "factcheck.md",
+ "verafiles.org",
+ "rappler.com/fact-check",
+ "vera.com.gt",
+ "chequeado.com",
+ "aosfatos.org",
+ "lasillavacia.com/detector-mentiras",
+ "colombiacheck.com",
+ "ecuadorchequea.com",
+ "elsurti.com/checado",
+ "verificat.cat",
+ "mafindo.or.id",
+ "tempo.co/cek-fakta",
+ "factcheck.mk",
+ "raskrinkavanje.ba",
+ "faktograf.hr",
+ "demagog.cz",
+ "faktabaari.fi",
+ "correctiv.org",
+ "mimikama.at",
+ "factcheck.vlaanderen",
+ "factuel.afp.com",
+ "nieuwscheckers.nl",
+ "faktisk.no",
+ "tjekdet.dk",
+ "ellinikahoaxes.gr",
+ "faktograf.id",
+ "stopfake.kz",
+ "pesacheck.org",
+ "dubawa.org",
+ "namibiafactcheck.org.na",
+ "zimfact.org",
+ "ghanafact.com",
+ "factspace.africa",
+ "factcrescendo.com",
+ "vishvasnews.com",
+ "factcheck.lk",
+ "newschecker.in",
+ "boomlive.in",
+ "digiteye.in",
+ "indiatoday.in/fact-check",
+ "factcrescendo.com",
+ "piyasa.com/fact-check",
+ "taiwanese.facts.news",
+ "taiwanfactcheck.com",
+ "mygopen.com",
+ "tfc-taiwan.org.tw",
+ "cofacts.tw",
+ "rumor.taipei",
+ "fact.qq.com",
+ "factcheck.afp.com/list",
+ "acfta.org",
+ "crosscheck.firstdraftnews.org",
+ "healthfeedback.org",
+ "climatefeedback.org",
+ "sciencefeedback.co",
+ "factcheck.aap.com.au",
+ "emergent.info",
+ "hoax-slayer.net",
+ "truthorfiction.com",
+ "factcheck.media",
+ "mediawise.org",
+ "thejournal.ie/factcheck",
+ "journalistsresource.org",
+ "metafact.io",
+ "reporterslab.org/fact-checking"
+]
+ ],
+ "news_sites": [
+ FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
+ for domain in [
+ "www.thedailystar.net",
+ "www.thefinancialexpress.com.bd",
+ "www.theindependentbd.com",
+ "www.dhakatribune.com",
+ "www.newagebd.net",
+ "www.observerbd.com",
+ "www.daily-sun.com",
+ "www.tbsnews.net",
+ "www.businesspostbd.com",
+ "www.banglanews24.com/english",
+ "www.bdnews24.com/english",
+ "www.risingbd.com/english",
+ "www.dailyindustry.news",
+ "www.bangladeshpost.net",
+ "www.daily-bangladesh.com/english"
+ ]
+ ]
+}
+
+async def fetch_fact_checks(
+ api_key: str,
+ base_url: str,
+ query: str,
+ site: FactCheckSource
+) -> Dict:
+ """
+ Fetch fact checks from a specific site using the Google Fact Check API
+ """
+ try:
+ if not api_key or not base_url:
+ raise ValueError("API key or base URL not configured")
+
+ params = {
+ "key": api_key,
+ "query": query,
+ "languageCode": "en-US",
+ "reviewPublisherSiteFilter": site.domain,
+ "pageSize": 10
+ }
+
+ response = requests.get(base_url, params=params)
+ response.raise_for_status()
+ return response.json()
+ except requests.RequestException as e:
+ raise HTTPException(
+ status_code=503,
+ detail=ErrorResponse(
+ detail=f"Error fetching from {site.domain}: {str(e)}",
+ error_code="FACT_CHECK_SERVICE_ERROR",
+ path="/check-facts"
+ ).dict()
+ )
+ except ValueError as e:
+ raise HTTPException(
+ status_code=500,
+ detail=ErrorResponse(
+ detail=str(e),
+ error_code="CONFIGURATION_ERROR",
+ path="/check-facts"
+ ).dict()
+ )
+
+def get_all_sources() -> List[FactCheckSource]:
+ """
+ Get all sources sorted by priority
+ """
+ # all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"]
+ all_sources = SOURCES["fact_checkers"]
+ return sorted(all_sources, key=lambda x: x.priority)
\ No newline at end of file
diff --git a/main.py b/main.py
index 6b79e28..25d68c4 100644
--- a/main.py
+++ b/main.py
@@ -1,6 +1,8 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.api.fact_check import fact_check_router
+from app.api.ai_fact_check import aifact_check_router
+from app.api.scrap_websites import scrap_websites_router
from app.config import FRONTEND_URL
# Initialize FastAPI app
@@ -39,6 +41,8 @@ async def health_check():
return {"status": "healthy"}
app.include_router(fact_check_router, prefix="")
+app.include_router(aifact_check_router, prefix="")
+app.include_router(scrap_websites_router, prefix="")
# Include routers (uncomment and modify as needed)
# from routes import some_router
diff --git a/search_response_altnews_in.html b/search_response_altnews_in.html
new file mode 100644
index 0000000..a3ee576
--- /dev/null
+++ b/search_response_altnews_in.html
@@ -0,0 +1,28 @@
+
q="Indian flag was drawn in BUET campus" site:altnews.in - Google Search যদি কয়েক সেকেন্ডের মধ্যে আপনি অন্য সাইটে না যান তাহলে
এখানে ক্লিক করুন
অ্যাক্সেসিবিলিটি লিঙ্ক
আপনার সার্চ - q="Indian flag was drawn in BUET campus" site:altnews.in - কোনো পৃষ্ঠাতে পাওয়া যায়নি।
পরামর্শ:
দেখুন যে সব বানান ঠিক আছে কিনা অন্য বিষয়বস্তু ব্যবহার করে চেষ্টা করুন আরও সাধারণ বিষয়বস্তু দিয়ে চেষ্টা করুন স্বল্প বিষয়বস্তু দিয়ে চেষ্টা করুন
\ No newline at end of file
diff --git a/search_response_bbc_com.html b/search_response_bbc_com.html
new file mode 100644
index 0000000..4c6857e
--- /dev/null
+++ b/search_response_bbc_com.html
@@ -0,0 +1,28 @@
+q="Indian flag BUET" site:bbc.com - Google Search যদি কয়েক সেকেন্ডের মধ্যে আপনি অন্য সাইটে না যান তাহলে
এখানে ক্লিক করুন
অ্যাক্সেসিবিলিটি লিঙ্ক
আপনার সার্চ - q="Indian flag BUET" site:bbc.com - কোনো পৃষ্ঠাতে পাওয়া যায়নি।
পরামর্শ:
দেখুন যে সব বানান ঠিক আছে কিনা অন্য বিষয়বস্তু ব্যবহার করে চেষ্টা করুন আরও সাধারণ বিষয়বস্তু দিয়ে চেষ্টা করুন স্বল্প বিষয়বস্তু দিয়ে চেষ্টা করুন
\ No newline at end of file
diff --git a/search_response_en_prothomalo_com.html b/search_response_en_prothomalo_com.html
new file mode 100644
index 0000000..298364a
--- /dev/null
+++ b/search_response_en_prothomalo_com.html
@@ -0,0 +1,28 @@
+q="flag BUET campus" site:en.prothomalo.com - Google Search যদি কয়েক সেকেন্ডের মধ্যে আপনি অন্য সাইটে না যান তাহলে
এখানে ক্লিক করুন
অ্যাক্সেসিবিলিটি লিঙ্ক
আপনার সার্চ - q="flag BUET campus" site:en.prothomalo.com - কোনো পৃষ্ঠাতে পাওয়া যায়নি।
পরামর্শ:
দেখুন যে সব বানান ঠিক আছে কিনা অন্য বিষয়বস্তু ব্যবহার করে চেষ্টা করুন আরও সাধারণ বিষয়বস্তু দিয়ে চেষ্টা করুন স্বল্প বিষয়বস্তু দিয়ে চেষ্টা করুন
\ No newline at end of file