Merge branch 'dev' into 'master'

Dev See merge request planpostai/fact-checker-backend!1
2024-12-17 11:33:43 +00:00 · 2024-12-17 11:33:43 +00:00 · 8aae0d18da
commit 8aae0d18da
parent 9f87639b51 e56163a8c3
18 changed files with 1263 additions and 267 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,4 @@
 env
 .env
 test.py
-/__pycache__/
+__pycache__
--- a/app/pycache/config.cpython-312.pyc
+++ b/app/pycache/config.cpython-312.pyc
--- a/app/api/pycache/fact_check.cpython-312.pyc
+++ b/app/api/pycache/fact_check.cpython-312.pyc
--- a/app/api/ai_fact_check.py
+++ b/app/api/ai_fact_check.py
@ -0,0 +1,110 @@
+from fastapi import APIRouter, HTTPException
+from app.services.openai_client import OpenAIClient, AIFactChecker
+from app.config import OPENAI_API_KEY
+from app.models.ai_fact_check_models import (
+    AIFactCheckRequest,
+    AIFactCheckResponse,
+    VerificationResult,
+    TokenUsage,
+    ErrorResponse
+)
+from urllib.parse import urlparse
+import asyncio
+
+# Initialize router and OpenAI client
+aifact_check_router = APIRouter()
+openai_client = OpenAIClient(api_key=OPENAI_API_KEY)
+fact_checker = AIFactChecker(openai_client=openai_client)
+
+@aifact_check_router.post(
+    "/aicheck-facts",
+    response_model=AIFactCheckResponse,
+    responses={
+        400: {"model": ErrorResponse},
+        500: {"model": ErrorResponse}
+    }
+)
+async def ai_fact_check(request: AIFactCheckRequest):
+    """
+    Endpoint to fact-check a given statement based on multiple webpage URLs.
+    Input:
+        - urls: List of webpage URLs to analyze (with or without http/https)
+        - content: The fact statement to verify
+    Response:
+        - JSON response with verification results per URL, sources, and token usage
+    """
+    try:
+        results = {}
+        all_sources = set()
+        all_contexts = []
+        total_prompt_tokens = 0
+        total_completion_tokens = 0
+        total_tokens = 0
+        
+        # Process all URLs concurrently
+        tasks = [
+            fact_checker.check_fact(url=url, query=request.content)
+            for url in request.urls
+        ]
+        fact_check_results = await asyncio.gather(*tasks, return_exceptions=True)
+        
+        # Process results
+        for url, result in zip(request.urls, fact_check_results):
+            if isinstance(result, Exception):
+                # Handle failed URL checks
+                results[url] = VerificationResult(
+                    verdict="Error",
+                    confidence="Low",
+                    evidence=f"Error checking URL: {str(result)}",
+                    reasoning="URL processing failed",
+                    missing_info="Could not access or process the URL"
+                )
+                continue
+                
+            verification_result = VerificationResult(
+                verdict=result["verification_result"]["verdict"],
+                confidence=result["verification_result"]["confidence"],
+                evidence=result["verification_result"]["evidence"],
+                reasoning=result["verification_result"]["reasoning"],
+                missing_info=result["verification_result"].get("missing_info", None)
+            )
+            
+            results[url] = verification_result
+            all_sources.update(result["sources"])
+            
+            # Accumulate token usage
+            total_prompt_tokens += result["token_usage"]["prompt_tokens"]
+            total_completion_tokens += result["token_usage"]["completion_tokens"]
+            total_tokens += result["token_usage"]["total_tokens"]
+
+        token_usage = TokenUsage(
+            prompt_tokens=total_prompt_tokens,
+            completion_tokens=total_completion_tokens,
+            total_tokens=total_tokens
+        )
+
+        return AIFactCheckResponse(
+            query=request.content,
+            verification_result=results,
+            sources=list(all_sources),
+            token_usage=token_usage
+        )
+
+    except ValueError as e:
+        raise HTTPException(
+            status_code=400,
+            detail=ErrorResponse(
+                detail=str(e),
+                error_code="INVALID_URL",
+                path="/aicheck-facts"
+            ).dict()
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=ErrorResponse(
+                detail=f"Error processing fact-check request: {str(e)}",
+                error_code="PROCESSING_ERROR",
+                path="/aicheck-facts"
+            ).dict()
+        )
--- a/app/api/fact_check.py
+++ b/app/api/fact_check.py
@ -1,291 +1,192 @@
 from fastapi import APIRouter, HTTPException
-from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
-from typing import Dict, List, Optional, Union
-import requests
-from enum import Enum
-from datetime import datetime
-import json
-from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
+import httpx
+from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY
+from app.api.scrap_websites import search_websites, SearchRequest
+from app.services.openai_client import OpenAIClient
+from app.models.fact_check_models import (
+    FactCheckRequest, 
+    FactCheckResponse, 
+    ErrorResponse,
+    Source
+)
+from app.websites.fact_checker_website import get_all_sources

 fact_check_router = APIRouter()
+openai_client = OpenAIClient(OPENAI_API_KEY)

-class CustomJSONEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, datetime):
-            return obj.isoformat()
-        return super().default(obj)
+async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse:
+    """Generate a fact check report using OpenAI based on the fact check results."""
+    try:
+        base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources.

-class ErrorResponse(BaseModel):
-    detail: str
-    error_code: str = Field(..., description="Unique error code for this type of error")
-    timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
-    path: Optional[str] = Field(None, description="The endpoint path where error occurred")
+Rules:
+1. Include all source URLs and names in the sources list
+2. Keep the explanation focused on verifiable facts
+3. Include dates when available
+4. Maintain objectivity in the report"""

-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "detail": "Error description",
-            "error_code": "ERROR_CODE",
-            "timestamp": "2024-12-09T16:49:30.905765",
-            "path": "/check-facts"
+        base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format:
+{
+    "claim": "Write the exact claim being verified",
+    "verdict": "One of: True/False/Partially True/Unverified",
+    "confidence": "One of: High/Medium/Low",
+    "sources": [
+        {
+            "url": "Full URL of the source",
+            "name": "Name of the source organization"
        }
-    })
-
-class RequestValidationError(BaseModel):
-    loc: List[str]
-    msg: str
-    type: str
-
-class Publisher(BaseModel):
-    name: str
-    site: Optional[str] = Field(None, description="Publisher's website")
-    
-    @validator('site')
-    def validate_site(cls, v):
-        if v and not (v.startswith('http://') or v.startswith('https://')):
-            return f"https://{v}"
-        return v
-
-class ClaimReview(BaseModel):
-    publisher: Publisher
-    url: Optional[HttpUrl] = None
-    title: Optional[str] = None
-    reviewDate: Optional[str] = None
-    textualRating: Optional[str] = None
-    languageCode: str = Field(default="en-US")
-
-class Claim(BaseModel):
-    text: str
-    claimant: Optional[str] = None
-    claimDate: Optional[str] = None
-    claimReview: List[ClaimReview]
-
-class FactCheckResponse(BaseModel):
-    query: str = Field(..., description="Original query that was fact-checked")
-    total_claims_found: int = Field(..., ge=0)
-    results: List[Claim] = Field(default_factory=list)
-    summary: Dict[str, int] = Field(...)
-
-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "query": "Example claim",
-            "total_claims_found": 1,
-            "results": [{
-                "text": "Example claim text",
-                "claimant": "Source name",
-                "claimReview": [{
-                    "publisher": {
-                        "name": "Fact Checker",
-                        "site": "factchecker.com"
-                    },
-                    "textualRating": "True"
-                }]
-            }],
-            "summary": {
-                "total_sources": 1,
-                "fact_checking_sites_queried": 10
-            }
-        }
-    })
-
-class SourceType(str, Enum):
-    FACT_CHECKER = "fact_checker"
-    NEWS_SITE = "news_site"
-
-class FactCheckSource(BaseModel):
-    domain: str
-    type: SourceType
-    priority: int = Field(default=1, ge=1, le=10)
-
-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "domain": "factcheck.org",
-            "type": "fact_checker",
-            "priority": 1
-        }
-    })
-
-# Sources configuration with validation
-SOURCES = {
-    "fact_checkers": [
-        FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
-        for domain in [
-            "factcheck.org",
-            "snopes.com",
-            "politifact.com",
-            "reuters.com",
-            "bbc.com",
-            "apnews.com",
-            "usatoday.com",
-            "nytimes.com",
-            "washingtonpost.com",
-            "afp.com",
-            "fullfact.org",
-            "truthorfiction.com",
-            "leadstories.com",
-            "altnews.in",
-            "boomlive.in",
-            "en.prothomalo.com"
-        ]
    ],
-    "news_sites": [
-        FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
-        for domain in [
-            "www.thedailystar.net",
-            "www.thefinancialexpress.com.bd",
-            "www.theindependentbd.com",
-            "www.dhakatribune.com",
-            "www.newagebd.net",
-            "www.observerbd.com",
-            "www.daily-sun.com",
-            "www.tbsnews.net",
-            "www.businesspostbd.com",
-            "www.banglanews24.com/english",
-            "www.bdnews24.com/english",
-            "www.risingbd.com/english",
-            "www.dailyindustry.news",
-            "www.bangladeshpost.net",
-            "www.daily-bangladesh.com/english"
-        ]
-    ]
+    "evidence": "A concise summary of the key evidence (1-2 sentences)",
+    "explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)",
+    "additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)"
 }

-class FactCheckRequest(BaseModel):
-    content: str = Field(
-        ...,
-        min_length=10,
-        max_length=1000,
-        description="The claim to be fact-checked"
-    )
-    language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
-    max_results_per_source: int = Field(default=10, ge=1, le=50)
+Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name."""

-    @validator('content')
-    def validate_content(cls, v):
-        if not v.strip():
-            raise ValueError("Content cannot be empty or just whitespace")
-        return v.strip()
+        if "claims" in fact_check_data:
+            system_prompt = base_system_prompt
+            user_prompt = f"""Query: {query}
+            Fact Check Results: {fact_check_data}
+            
+            {base_user_prompt}

-async def fetch_fact_checks(
-    api_key: str,
-    base_url: str,
-    query: str,
-    site: FactCheckSource
-) -> Dict:
-    """
-    Fetch fact checks from a specific site using the Google Fact Check API
-    """
-    try:
-        if not api_key or not base_url:
-            raise ValueError("API key or base URL not configured")
+            The report should:
+            1. Include ALL source URLs and organization names
+            2. Specify verification dates when available 
+            3. Name the fact-checking organizations involved
+            4. Describe the verification process"""
+            
+        else:
+            system_prompt = base_system_prompt
+            user_prompt = f"""Query: {query}
+            Fact Check Results: {fact_check_data}
+            
+            {base_user_prompt}

-        params = {
-            "key": api_key,
-            "query": query,
-            "languageCode": "en-US",
-            "reviewPublisherSiteFilter": site.domain,
-            "pageSize": 10
-        }
+            The report should:
+            1. Include ALL source URLs and names from both verification_result and sources fields
+            2. Mention all fact-checking organizations involved
+            3. Describe the verification process
+            4. Note any conflicting information between sources"""
+
+        response = await openai_client.generate_text_response(
+            system_prompt=system_prompt,
+            user_prompt=user_prompt,
+            max_tokens=1000
+        )
        
-        response = requests.get(base_url, params=params)
-        response.raise_for_status()
-        return response.json()
-    except requests.RequestException as e:
-        raise HTTPException(
-            status_code=503,
-            detail=ErrorResponse(
-                detail=f"Error fetching from {site.domain}: {str(e)}",
-                error_code="FACT_CHECK_SERVICE_ERROR",
-                path="/check-facts"
-            ).dict()
-        )
-    except ValueError as e:
+        try:
+            # First try to parse the response directly
+            response_data = response["response"]
+            
+            # Clean up sources before validation
+            if isinstance(response_data.get('sources'), list):
+                cleaned_sources = []
+                for source in response_data['sources']:
+                    if isinstance(source, str):
+                        # Convert string sources to Source objects
+                        url = source if source.startswith('http') else f"https://{source}"
+                        cleaned_sources.append({
+                            "url": url,
+                            "name": source
+                        })
+                    elif isinstance(source, dict):
+                        # Ensure URL has proper scheme
+                        url = source.get('url', '')
+                        if url and not url.startswith('http'):
+                            source['url'] = f"https://{url}"
+                        cleaned_sources.append(source)
+                response_data['sources'] = cleaned_sources
+            
+            fact_check_response = FactCheckResponse(**response_data)
+            return fact_check_response
+            
+        except Exception as validation_error:
+            print(f"Response validation error: {str(validation_error)}")
+            raise HTTPException(
+                status_code=422,
+                detail=ErrorResponse(
+                    detail=f"Invalid response format: {str(validation_error)}",
+                    error_code="VALIDATION_ERROR",
+                    path="/check-facts"
+                ).dict()
+            )
+        
+    except Exception as e:
+        print(f"Error generating fact report: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=ErrorResponse(
-                detail=str(e),
+                detail="Error generating fact report",
+                error_code="FACT_CHECK_ERROR",
+                path="/check-facts"
+            ).dict()
+        )
+
+@fact_check_router.post("/check-facts", response_model=FactCheckResponse)
+async def check_facts(request: FactCheckRequest):
+    """
+    Fetch fact check results and generate a comprehensive report.
+    """
+    if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
+        raise HTTPException(
+            status_code=500,
+            detail=ErrorResponse(
+                detail="Google API key or base URL is not configured",
                error_code="CONFIGURATION_ERROR",
                path="/check-facts"
            ).dict()
        )

-@fact_check_router.post(
-    "/check-facts",
-    response_model=FactCheckResponse,
-    responses={
-        400: {"model": ErrorResponse},
-        404: {"model": ErrorResponse},
-        500: {"model": ErrorResponse},
-        503: {"model": ErrorResponse}
-    }
-)
-async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
-    """
-    Check facts using multiple fact-checking sources
-    """
-    all_results = []
-    
-    # Validate configuration
-    if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
-        raise HTTPException(
-            status_code=500,
-            detail=ErrorResponse(
-                detail="API configuration is missing",
-                error_code="CONFIGURATION_ERROR",
-                path="/check-facts"
-            ).dict()
-        )
-    
-    # Check all sources in priority order
-    all_sources = (
-        SOURCES["fact_checkers"] +
-        SOURCES["news_sites"]
-    )
-    all_sources.sort(key=lambda x: x.priority)
-    
-    for source in all_sources:
+    headers = {"Content-Type": "application/json"}
+    async with httpx.AsyncClient() as client:
+        # Get fact checker sources from the centralized configuration
+        fact_checker_sources = get_all_sources()
+        
+        for source in fact_checker_sources:
+            params = {
+                "key": GOOGLE_API_KEY,
+                "query": request.query,
+                "languageCode": "en-US",
+                "reviewPublisherSiteFilter": source.domain,
+                "pageSize": 10
+            }
+
+            try:
+                response = await client.get(
+                    GOOGLE_FACT_CHECK_BASE_URL,
+                    params=params,
+                    headers=headers
+                )
+                response.raise_for_status()
+                json_response = response.json()
+
+                if json_response.get("claims"):
+                    return await generate_fact_report(request.query, json_response)
+                
+            except httpx.RequestError as e:
+                print(f"Error fetching results for site {source.domain}: {str(e)}")
+                continue
+            except Exception as e:
+                print(f"Unexpected error for site {source.domain}: {str(e)}")
+                continue
+        
        try:
-            result = await fetch_fact_checks(
-                GOOGLE_FACT_CHECK_API_KEY,
-                GOOGLE_FACT_CHECK_BASE_URL,
-                request.content,
-                source
+            search_request = SearchRequest(
+                search_text=request.query,
+                source_types=["fact_checkers"]
            )
            
-            if "claims" in result:
-                # Validate each claim through Pydantic
-                validated_claims = [
-                    Claim(**claim).dict()
-                    for claim in result["claims"]
-                ]
-                all_results.extend(validated_claims)
-                
-        except HTTPException:
-            raise
+            ai_response = await search_websites(search_request)
+            return await generate_fact_report(request.query, ai_response)
+            
        except Exception as e:
-            # Log the error but continue with other sources
-            print(f"Error processing {source.domain}: {str(e)}")
-            continue
-
-    if not all_results:
-        raise HTTPException(
-            status_code=404,
-            detail=ErrorResponse(
-                detail="No fact check results found",
-                error_code="NO_RESULTS_FOUND",
-                path="/check-facts"
-            ).dict()
-        )
-
-    # Create the response using Pydantic model
-    response = FactCheckResponse(
-        query=request.content,
-        total_claims_found=len(all_results),
-        results=all_results,
-        summary={
-            "total_sources": len(set(claim.get("claimReview", [{}])[0].get("publisher", {}).get("site", "") 
-                                 for claim in all_results if claim.get("claimReview"))),
-            "fact_checking_sites_queried": len(all_sources)
-        }
-    )
-
-    return response
+            print(f"Error in AI fact check: {str(e)}")
+            raise HTTPException(
+                status_code=404,
+                detail=ErrorResponse(
+                    detail="No fact check results found",
+                    error_code="NOT_FOUND",
+                    path="/check-facts"
+                ).dict()
+            )
--- a/app/api/scrap_websites.py
+++ b/app/api/scrap_websites.py
@ -0,0 +1,160 @@
+from fastapi import APIRouter, HTTPException
+import httpx
+import logging
+from urllib.parse import urlparse
+from typing import List, Dict, Optional
+from pydantic import BaseModel
+from app.models.ai_fact_check_models import (
+    AIFactCheckRequest,
+    FactCheckSource,
+    SourceType
+)
+from app.websites.fact_checker_website import SOURCES, get_all_sources
+from app.api.ai_fact_check import ai_fact_check
+from app.config import GOOGLE_API_KEY, GOOGLE_ENGINE_ID, GOOGLE_SEARCH_URL
+
+
+class SearchRequest(BaseModel):
+    search_text: str
+    source_types: List[str] = ["fact_checkers"]
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+scrap_websites_router = APIRouter()
+
+# Constants
+RESULTS_PER_PAGE = 10
+MAX_PAGES = 5
+MAX_URLS_PER_DOMAIN = 5
+
+
+def get_domain_from_url(url: str) -> str:
+    """Extract domain from URL with improved handling."""
+    try:
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+        if domain.startswith('www.'):
+            domain = domain[4:]
+        return domain
+    except Exception as e:
+        logger.error(f"Error extracting domain from URL {url}: {str(e)}")
+        return ""
+
+def is_valid_source_domain(domain: str, sources: List[FactCheckSource]) -> bool:
+    """Check if domain matches any source with improved matching logic."""
+    if not domain:
+        return False
+    
+    domain = domain.lower()
+    if domain.startswith('www.'):
+        domain = domain[4:]
+    
+    for source in sources:
+        source_domain = source.domain.lower()
+        if source_domain.startswith('www.'):
+            source_domain = source_domain[4:]
+            
+        if domain == source_domain or domain.endswith('.' + source_domain):
+            return True
+    
+    return False
+
+async def build_enhanced_search_query(query: str, sources: List[FactCheckSource]) -> str:
+    """Build search query with site restrictions."""
+    site_queries = [f"site:{source.domain}" for source in sources]
+    site_restriction = " OR ".join(site_queries)
+    return f"({query}) ({site_restriction})"
+
+async def google_custom_search(query: str, sources: List[FactCheckSource], page: int = 1) -> Optional[Dict]:
+    """Perform Google Custom Search with enhanced query."""
+    enhanced_query = await build_enhanced_search_query(query, sources)
+    start_index = ((page - 1) * RESULTS_PER_PAGE) + 1
+    
+    params = {
+        "key": GOOGLE_API_KEY,
+        "cx": GOOGLE_ENGINE_ID,
+        "q": enhanced_query,
+        "num": RESULTS_PER_PAGE,
+        "start": start_index
+    }
+    
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        try:
+            response = await client.get(GOOGLE_SEARCH_URL, params=params)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"Search error: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")
+
+@scrap_websites_router.post("/search")
+async def search_websites(request: SearchRequest):
+    # Get the source types from the request
+    source_types = request.source_types if request.source_types else ["fact_checkers"]
+    
+    # Get sources based on requested types
+    selected_sources = []
+    for source_type in source_types:
+        if source_type in SOURCES:
+            selected_sources.extend(SOURCES[source_type])
+    
+    # If no valid sources found, use fact checkers as default
+    if not selected_sources:
+        selected_sources = SOURCES["fact_checkers"]
+    
+    all_urls = []
+    domain_results = {}
+    
+    try:
+        for page in range(1, MAX_PAGES + 1):
+            if len(all_urls) >= 50:
+                break
+            
+            search_response = await google_custom_search(request.search_text, selected_sources, page)
+            
+            if not search_response or not search_response.get("items"):
+                break
+            
+            for item in search_response.get("items", []):
+                url = item.get("link")
+                if not url:
+                    continue
+                
+                domain = get_domain_from_url(url)
+                
+                if is_valid_source_domain(domain, selected_sources):
+                    if domain not in domain_results:
+                        domain_results[domain] = []
+                    
+                    if len(domain_results[domain]) < MAX_URLS_PER_DOMAIN:
+                        domain_results[domain].append({
+                            "url": url,
+                            "title": item.get("title", ""),
+                            "snippet": item.get("snippet", "")
+                        })
+                        all_urls.append(url)
+                
+                if len(all_urls) >= 50:
+                    break
+        
+        if not all_urls:
+            return {
+                "status": "no_results",
+                "urls_found": 0
+            }
+        
+        fact_check_request = AIFactCheckRequest(
+            content=request.search_text,
+            urls=all_urls[:5]
+        )
+        
+        return await ai_fact_check(fact_check_request)
+
+    except Exception as e:
+        logger.error(f"Error during search/fact-check process: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
--- a/app/config.py
+++ b/app/config.py
@ -3,8 +3,10 @@ from dotenv import load_dotenv

 load_dotenv()

-GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"]
+GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
 GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
+GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"]
+GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"]

 OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
 FRONTEND_URL = os.environ["FRONTEND_URL"]
--- a/app/models/pycache/fact_check_models.cpython-312.pyc
+++ b/app/models/pycache/fact_check_models.cpython-312.pyc
--- a/app/models/ai_fact_check_models.py
+++ b/app/models/ai_fact_check_models.py
@ -0,0 +1,229 @@
+from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
+from typing import Dict, List, Optional, Any, Union
+from enum import Enum
+from datetime import datetime
+from urllib.parse import urlparse
+
+# Common Models
+class TokenUsage(BaseModel):
+    prompt_tokens: Optional[int] = 0
+    completion_tokens: Optional[int] = 0
+    total_tokens: Optional[int] = 0
+
+class ErrorResponse(BaseModel):
+    detail: str
+    error_code: str = Field(..., description="Unique error code for this type of error")
+    timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
+    path: Optional[str] = Field(None, description="The endpoint path where error occurred")
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "detail": "Error description",
+            "error_code": "ERROR_CODE",
+            "timestamp": "2024-12-09T16:49:30.905765",
+            "path": "/check-facts"
+        }
+    })
+
+# Fact Check Models
+class Publisher(BaseModel):
+    name: str
+    site: Optional[str] = Field(None, description="Publisher's website")
+    
+    @validator('site')
+    def validate_site(cls, v):
+        if v and not (v.startswith('http://') or v.startswith('https://')):
+            return f"https://{v}"
+        return v
+
+class ClaimReview(BaseModel):
+    publisher: Publisher
+    url: Optional[HttpUrl] = None
+    title: Optional[str] = None
+    reviewDate: Optional[str] = None
+    textualRating: Optional[str] = None
+    languageCode: str = Field(default="en-US")
+
+class Claim(BaseModel):
+    text: str
+    claimant: Optional[str] = None
+    claimDate: Optional[str] = None
+    claimReview: List[ClaimReview]
+
+class SourceType(str, Enum):
+    FACT_CHECKER = "fact_checker"
+    NEWS_SITE = "news_site"
+
+class FactCheckSource(BaseModel):
+    domain: str
+    type: SourceType
+    priority: int = Field(default=1, ge=1, le=10)
+
+# Verification Models
+class VerificationResult(BaseModel):
+    verdict: str = Field(..., description="True/False/Insufficient Information")
+    confidence: str = Field(..., description="High/Medium/Low")
+    evidence: Union[str, List[str]]
+    reasoning: str
+    missing_info: Optional[str] = None
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "verdict": "True",
+            "confidence": "High",
+            "evidence": ["Direct quote from source supporting the claim"],
+            "reasoning": "Detailed analysis of why the claim is considered true",
+            "missing_info": "Any caveats or limitations of the verification"
+        }
+    })
+
+# Request Models
+class BaseFactCheckRequest(BaseModel):
+    content: str = Field(
+        ...,
+        min_length=10,
+        max_length=1000,
+        description="The claim to be fact-checked"
+    )
+    
+    @validator('content')
+    def validate_content(cls, v):
+        if not v.strip():
+            raise ValueError("Content cannot be empty or just whitespace")
+        return v.strip()
+
+class GoogleFactCheckRequest(BaseFactCheckRequest):
+    language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
+    max_results_per_source: int = Field(default=10, ge=1, le=50)
+
+class AIFactCheckRequest(BaseFactCheckRequest):
+    urls: List[str] = Field(
+        ...,
+        min_items=1,
+        max_items=5,
+        description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing"
+    )
+    
+    @validator('urls')
+    def validate_urls(cls, urls):
+        validated_urls = []
+        for url in urls:
+            if not url.strip():
+                raise ValueError("URL cannot be empty")
+            
+            # Add https:// if no protocol specified
+            if not url.startswith(('http://', 'https://')):
+                url = f'https://{url}'
+            
+            try:
+                result = urlparse(url)
+                if not result.netloc:
+                    raise ValueError(f"Invalid URL structure for {url}")
+                validated_urls.append(url)
+            except Exception as e:
+                raise ValueError(f"Invalid URL {url}: {str(e)}")
+        
+        return validated_urls
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "content": "Indian flag was drawn in BUET campus",
+            "urls": [
+                "www.altnews.in/article-about-flag",
+                "www.another-source.com/related-news"
+            ]
+        }
+    })
+
+# Response Models
+class BaseFactCheckResponse(BaseModel):
+    query: str
+    token_usage: TokenUsage
+    sources: List[str]
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "query": "Example statement to verify",
+            "token_usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
+            "sources": ["source1.com", "source2.com"],
+        }
+    })
+
+class GoogleFactCheckResponse(BaseFactCheckResponse):
+    total_claims_found: int
+    results: List[Dict[str, Any]]
+    verification_result: Dict[str, Any]
+    summary: Dict[str, int]
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "query": "Example claim",
+            "total_claims_found": 1,
+            "results": [{
+                "text": "Example claim text",
+                "claimant": "Source name",
+                "claimReview": [{
+                    "publisher": {
+                        "name": "Fact Checker",
+                        "site": "factchecker.com"
+                    },
+                    "textualRating": "True"
+                }]
+            }],
+            "verification_result": {
+                "verdict": "True",
+                "confidence": "High",
+                "evidence": ["Supporting evidence"],
+                "reasoning": "Detailed analysis"
+            },
+            "sources": ["factchecker.com"],
+            "token_usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
+            "summary": {
+                "total_sources": 1,
+                "fact_checking_sites_queried": 10
+            }
+        }
+    })
+
+class AIFactCheckResponse(BaseFactCheckResponse):
+    verification_result: Dict[str, VerificationResult]  # Changed to Dict to store results per URL
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "query": "Indian flag was drawn in BUET campus",
+            "verification_result": {
+                "https://www.source1.com": {
+                    "verdict": "True",
+                    "confidence": "High",
+                    "evidence": ["Supporting evidence from source 1"],
+                    "reasoning": "Detailed analysis from source 1",
+                    "missing_info": None
+                },
+                "https://www.source2.com": {
+                    "verdict": "True",
+                    "confidence": "Medium",
+                    "evidence": ["Supporting evidence from source 2"],
+                    "reasoning": "Analysis from source 2",
+                    "missing_info": "Additional context needed"
+                }
+            },
+            "sources": ["source1.com", "source2.com"],
+            "token_usage": {
+                "prompt_tokens": 200,
+                "completion_tokens": 100,
+                "total_tokens": 300
+            }
+        }
+    })
+
+# Backwards compatibility aliases
+FactCheckRequest = GoogleFactCheckRequest
+FactCheckResponse = GoogleFactCheckResponse
--- a/app/models/fact_check_models.py
+++ b/app/models/fact_check_models.py
@ -0,0 +1,101 @@
+from pydantic import BaseModel, Field, HttpUrl, validator
+from typing import List, Literal, Union
+from datetime import datetime
+from enum import Enum
+
+class VerdictEnum(str, Enum):
+    TRUE = "True"
+    FALSE = "False"
+    PARTIALLY_TRUE = "Partially True"
+    UNVERIFIED = "Unverified"
+
+class ConfidenceEnum(str, Enum):
+    HIGH = "High"
+    MEDIUM = "Medium"
+    LOW = "Low"
+
+class FactCheckRequest(BaseModel):
+    query: str = Field(
+        ...,
+        min_length=3,
+        max_length=500,
+        description="The claim or statement to be fact-checked",
+        example="Did NASA confirm finding alien structures on Mars in 2024?"
+    )
+
+class Source(BaseModel):
+    url: str
+    name: str = ""
+
+    @validator('url')
+    def validate_url(cls, v):
+        # Basic URL validation without requiring HTTP/HTTPS
+        if not v or len(v) < 3:
+            raise ValueError("URL must not be empty and must be at least 3 characters")
+        return v
+
+class FactCheckResponse(BaseModel):
+    claim: str = Field(
+        ...,
+        min_length=10,
+        max_length=1000,
+        description="The exact claim being verified"
+    )
+    verdict: VerdictEnum = Field(
+        ...,
+        description="The verification verdict"
+    )
+    confidence: ConfidenceEnum = Field(
+        ...,
+        description="Confidence level in the verdict"
+    )
+    sources: List[Source] = Field(
+        ...,
+        min_items=1,
+        description="List of sources used in verification"
+    )
+    evidence: str = Field(
+        ...,
+        min_length=20,
+        max_length=500,
+        description="Concise summary of key evidence"
+    )
+    explanation: str = Field(
+        ...,
+        min_length=50,
+        max_length=1000,
+        description="Detailed explanation of verification findings"
+    )
+    additional_context: str = Field(
+        ...,
+        min_length=20,
+        max_length=500,
+        description="Important context about the verification"
+    )
+
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "claim": "NASA confirmed finding alien structures on Mars in 2024",
+                "verdict": "False",
+                "confidence": "High",
+                "sources": [
+                    {
+                        "url": "https://www.nasa.gov/mars-exploration",
+                        "name": "NASA Mars Exploration"
+                    },
+                    {
+                        "url": "https://factcheck.org/2024/mars-claims",
+                        "name": "FactCheck.org"
+                    }
+                ],
+                "evidence": "NASA has made no such announcement. Recent Mars rover images show natural rock formations.",
+                "explanation": "Multiple fact-checking organizations investigated this claim. NASA's official communications and Mars mission reports from 2024 contain no mention of alien structures. The viral images being shared are misidentified natural geological formations.",
+                "additional_context": "Similar false claims about alien structures on Mars have circulated periodically since the first Mars rovers began sending back images."
+            }
+        }
+
+class ErrorResponse(BaseModel):
+    detail: str
+    error_code: str = Field(..., example="VALIDATION_ERROR")
+    path: str = Field(..., example="/check-facts")
--- a/app/models/scrap_websites_models.py
+++ b/app/models/scrap_websites_models.py
@ -0,0 +1,43 @@
+from pydantic import BaseModel
+from typing import List, Dict
+
+class SearchRequest(BaseModel):
+    search_text: str
+    source_types: List[str] = ["fact_checkers"]
+
+class Publisher(BaseModel):
+    name: str
+    site: str
+
+class ClaimReview(BaseModel):
+    publisher: Publisher
+    textualRating: str
+
+class Claim(BaseModel):
+    claimReview: List[ClaimReview]
+    claimant: str
+    text: str
+
+class Summary(BaseModel):
+    fact_checking_sites_queried: int
+    total_sources: int
+
+class TokenUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+
+class VerificationResult(BaseModel):
+    verdict: str
+    confidence: str
+    evidence: List[str]
+    reasoning: str
+
+class EnhancedFactCheckResponse(BaseModel):
+    query: str
+    results: List[Claim]
+    sources: List[str]
+    summary: Summary
+    token_usage: Dict[str, int]
+    total_claims_found: int
+    verification_result: VerificationResult
--- a/app/services/openai_client.py
+++ b/app/services/openai_client.py
@ -0,0 +1,172 @@
+from langchain_community.document_loaders import AsyncHtmlLoader
+from langchain_community.document_transformers import BeautifulSoupTransformer
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
+from typing import List, Dict, Any
+import numpy as np
+import logging as logger
+import openai
+import json
+
+class OpenAIClient:
+    def __init__(self, api_key: str):
+        """
+        Initialize OpenAI client with the provided API key.
+        """
+        openai.api_key = api_key
+
+    async def generate_text_response(self, system_prompt: str, user_prompt: str, max_tokens: int) -> dict:
+        """
+        Generate a response using OpenAI's chat completion API.
+        """
+        try:
+            response = openai.ChatCompletion.create(
+                model="gpt-4",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                max_tokens=max_tokens
+            )
+            content = response['choices'][0]['message']['content']
+            # Parse the JSON string into a dictionary
+            parsed_content = json.loads(content)
+            
+            return {
+                "response": parsed_content,  # Now returns a dictionary instead of string
+                "prompt_tokens": response['usage']['prompt_tokens'],
+                "completion_tokens": response['usage']['completion_tokens'],
+                "total_tokens": response['usage']['total_tokens']
+            }
+        except json.JSONDecodeError as e:
+            raise Exception(f"Failed to parse OpenAI response as JSON: {str(e)}")
+        except Exception as e:
+            raise Exception(f"OpenAI text generation error: {str(e)}")
+
+    def get_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """
+        Retrieve embeddings for a list of texts using OpenAI's embedding API.
+        """
+        try:
+            response = openai.Embedding.create(
+                input=texts,
+                model="text-embedding-ada-002"
+            )
+            embeddings = [data['embedding'] for data in response['data']]
+            return embeddings
+        except Exception as e:
+            raise Exception(f"OpenAI embedding error: {str(e)}")
+
+class AIFactChecker:
+    def __init__(self, openai_client: OpenAIClient):
+        """Initialize the fact checker with OpenAI client."""
+        self.openai_client = openai_client
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len,
+            separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
+        )
+        
+    async def scrape_webpage(self, url: str) -> List[Document]:
+        """Scrape webpage content using LangChain's AsyncHtmlLoader."""
+        try:
+            loader = AsyncHtmlLoader([url])
+            docs = await loader.aload()
+            
+            bs_transformer = BeautifulSoupTransformer()
+            docs_transformed = bs_transformer.transform_documents(docs)
+            docs_chunks = self.text_splitter.split_documents(docs_transformed)
+            
+            logger.info(f"Successfully scraped webpage | chunks={len(docs_chunks)}")
+            return docs_chunks
+            
+        except Exception as e:
+            logger.error(f"Error scraping webpage | url={url} | error={str(e)}")
+            raise
+
+    def find_relevant_chunks(
+        self, 
+        query_embedding: List[float], 
+        doc_embeddings: List[List[float]], 
+        docs: List[Document]
+    ) -> List[Document]:
+        """Find most relevant document chunks using cosine similarity."""
+        try:
+            query_array = np.array(query_embedding)
+            chunks_array = np.array(doc_embeddings)
+            
+            similarities = np.dot(chunks_array, query_array) / (
+                np.linalg.norm(chunks_array, axis=1) * np.linalg.norm(query_array)
+            )
+            
+            top_indices = np.argsort(similarities)[-5:][::-1]
+            return [docs[i] for i in top_indices]
+            
+        except Exception as e:
+            logger.error(f"Error finding relevant chunks | error={str(e)}")
+            raise
+
+    async def verify_fact(self, query: str, relevant_docs: List[Document]) -> Dict[str, Any]:
+        """Verify fact using OpenAI's API with context from relevant documents."""
+        try:
+            context = "\n\n".join([doc.page_content for doc in relevant_docs])
+            
+            system_prompt = """You are a professional fact-checking assistant. Analyze the provided context 
+            and determine if the given statement is true, false, or if there isn't enough information.
+            
+            Provide your response in the following JSON format:
+            {
+                "verdict": "True/False/Insufficient Information",
+                "confidence": "High/Medium/Low",
+                "evidence": "Direct quotes or evidence from the context",
+                "reasoning": "Your detailed analysis and reasoning",
+                "missing_info": "Any important missing information (if applicable)"
+            }"""
+            
+            user_prompt = f"""Context:
+            {context}
+            
+            Statement to verify: "{query}"
+            
+            Analyze the statement based on the provided context and return your response in the specified JSON format."""
+            
+            response = await self.openai_client.generate_text_response(
+                system_prompt=system_prompt,
+                user_prompt=user_prompt,
+                max_tokens=800
+            )
+            
+            sources = list(set([doc.metadata.get('source', 'Unknown source') for doc in relevant_docs]))
+            
+            return {
+                "verification_result": response["response"],  # This is now a dictionary
+                "sources": sources,
+                "token_usage": {
+                    "prompt_tokens": response["prompt_tokens"],
+                    "completion_tokens": response["completion_tokens"],
+                    "total_tokens": response["total_tokens"]
+                }
+            }
+            
+        except Exception as e:
+            logger.error(f"Error verifying fact | error={str(e)}")
+            raise
+
+    async def check_fact(self, url: str, query: str) -> Dict[str, Any]:
+        """Main method to check a fact against a webpage."""
+        try:
+            docs = await self.scrape_webpage(url)
+            
+            doc_texts = [doc.page_content for doc in docs]
+            doc_embeddings = self.openai_client.get_embeddings(doc_texts)
+            query_embedding = self.openai_client.get_embeddings([query])
+            
+            relevant_docs = self.find_relevant_chunks(query_embedding[0], doc_embeddings, docs)
+            verification_result = await self.verify_fact(query, relevant_docs)
+            
+            return verification_result
+            
+        except Exception as e:
+            logger.error(f"Error checking fact | error={str(e)}")
+            raise
--- a/app/websites/pycache/fact_checker_website.cpython-312.pyc
+++ b/app/websites/pycache/fact_checker_website.cpython-312.pyc
--- a/app/websites/fact_checker_website.py
+++ b/app/websites/fact_checker_website.py
@ -0,0 +1,190 @@
+from typing import Dict, List
+import requests
+from fastapi import HTTPException
+from app.models.ai_fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType
+
+# Sources configuration with validation
+SOURCES = {
+    "fact_checkers": [
+        FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
+        for domain in [
+    "snopes.com",
+    "politifact.com",
+    "factcheck.org",
+    "reuters.com/fact-check",
+    "apnews.com/hub/ap-fact-check",
+    "bbc.com/news/reality_check",
+    "fullfact.org",
+    "afp.com/fact-check",
+    "truthorfiction.com",
+    "leadstories.com",
+    "checkyourfact.com",
+    "washingtonpost.com/news/fact-checker",
+    "factcheck.kz",
+    "poynter.org/ifcn",
+    "factcheckeu.info",
+    "africacheck.org",
+    "thequint.com/webqoof",
+    "altnews.in",
+    "facta.news",
+    "factcheckni.org",
+    "mythdetector.ge",
+    "verificado.mx",
+    "euvsdisinfo.eu",
+    "factcheck.afp.com",
+    "newtral.es",
+    "maldita.es",
+    "faktograf.hr",
+    "demagog.org.pl",
+    "factnameh.com",
+    "faktiskt.se",
+    "teyit.org",
+    "factly.in",
+    "boom.live",
+    "stopfake.org",
+    "factcheck.ge",
+    "factcheck.kg",
+    "factcheck.uz",
+    "factcheck.tj",
+    "factcheck.az",
+    "factcheck.am",
+    "factcheck.md",
+    "verafiles.org",
+    "rappler.com/fact-check",
+    "vera.com.gt",
+    "chequeado.com",
+    "aosfatos.org",
+    "lasillavacia.com/detector-mentiras",
+    "colombiacheck.com",
+    "ecuadorchequea.com",
+    "elsurti.com/checado",
+    "verificat.cat",
+    "mafindo.or.id",
+    "tempo.co/cek-fakta",
+    "factcheck.mk",
+    "raskrinkavanje.ba",
+    "faktograf.hr",
+    "demagog.cz",
+    "faktabaari.fi",
+    "correctiv.org",
+    "mimikama.at",
+    "factcheck.vlaanderen",
+    "factuel.afp.com",
+    "nieuwscheckers.nl",
+    "faktisk.no",
+    "tjekdet.dk",
+    "ellinikahoaxes.gr",
+    "faktograf.id",
+    "stopfake.kz",
+    "pesacheck.org",
+    "dubawa.org",
+    "namibiafactcheck.org.na",
+    "zimfact.org",
+    "ghanafact.com",
+    "factspace.africa",
+    "factcrescendo.com",
+    "vishvasnews.com",
+    "factcheck.lk",
+    "newschecker.in",
+    "boomlive.in",
+    "digiteye.in",
+    "indiatoday.in/fact-check",
+    "factcrescendo.com",
+    "piyasa.com/fact-check",
+    "taiwanese.facts.news",
+    "taiwanfactcheck.com",
+    "mygopen.com",
+    "tfc-taiwan.org.tw",
+    "cofacts.tw",
+    "rumor.taipei",
+    "fact.qq.com",
+    "factcheck.afp.com/list",
+    "acfta.org",
+    "crosscheck.firstdraftnews.org",
+    "healthfeedback.org",
+    "climatefeedback.org",
+    "sciencefeedback.co",
+    "factcheck.aap.com.au",
+    "emergent.info",
+    "hoax-slayer.net",
+    "truthorfiction.com",
+    "factcheck.media",
+    "mediawise.org",
+    "thejournal.ie/factcheck",
+    "journalistsresource.org",
+    "metafact.io",
+    "reporterslab.org/fact-checking"
+]
+    ],
+    "news_sites": [
+        FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
+        for domain in [
+            "www.thedailystar.net",
+            "www.thefinancialexpress.com.bd",
+            "www.theindependentbd.com",
+            "www.dhakatribune.com",
+            "www.newagebd.net",
+            "www.observerbd.com",
+            "www.daily-sun.com",
+            "www.tbsnews.net",
+            "www.businesspostbd.com",
+            "www.banglanews24.com/english",
+            "www.bdnews24.com/english",
+            "www.risingbd.com/english",
+            "www.dailyindustry.news",
+            "www.bangladeshpost.net",
+            "www.daily-bangladesh.com/english"
+        ]
+    ]
+}
+
+async def fetch_fact_checks(
+    api_key: str,
+    base_url: str,
+    query: str,
+    site: FactCheckSource
+) -> Dict:
+    """
+    Fetch fact checks from a specific site using the Google Fact Check API
+    """
+    try:
+        if not api_key or not base_url:
+            raise ValueError("API key or base URL not configured")
+
+        params = {
+            "key": api_key,
+            "query": query,
+            "languageCode": "en-US",
+            "reviewPublisherSiteFilter": site.domain,
+            "pageSize": 10
+        }
+        
+        response = requests.get(base_url, params=params)
+        response.raise_for_status()
+        return response.json()
+    except requests.RequestException as e:
+        raise HTTPException(
+            status_code=503,
+            detail=ErrorResponse(
+                detail=f"Error fetching from {site.domain}: {str(e)}",
+                error_code="FACT_CHECK_SERVICE_ERROR",
+                path="/check-facts"
+            ).dict()
+        )
+    except ValueError as e:
+        raise HTTPException(
+            status_code=500,
+            detail=ErrorResponse(
+                detail=str(e),
+                error_code="CONFIGURATION_ERROR",
+                path="/check-facts"
+            ).dict()
+        )
+
+def get_all_sources() -> List[FactCheckSource]:
+    """
+    Get all sources sorted by priority
+    """
+    # all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"]
+    all_sources = SOURCES["fact_checkers"] 
+    return sorted(all_sources, key=lambda x: x.priority)
--- a/main.py
+++ b/main.py
@ -1,6 +1,8 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from app.api.fact_check import fact_check_router
+from app.api.ai_fact_check import aifact_check_router
+from app.api.scrap_websites import scrap_websites_router
 from app.config import FRONTEND_URL

 # Initialize FastAPI app
@ -39,6 +41,8 @@ async def health_check():
    return {"status": "healthy"}

 app.include_router(fact_check_router, prefix="")
+app.include_router(aifact_check_router, prefix="")
+app.include_router(scrap_websites_router, prefix="")

 # Include routers (uncomment and modify as needed)
 # from routes import some_router
--- a/search_response_altnews_in.html
+++ b/search_response_altnews_in.html
--- a/search_response_bbc_com.html
+++ b/search_response_bbc_com.html
--- a/search_response_en_prothomalo_com.html
+++ b/search_response_en_prothomalo_com.html