temp

fixed pipeline error
fixed response
2024-12-18 17:35:00 +06:00 · 2024-12-18 13:16:48 +06:00 · 2024-12-18 13:10:03 +06:00 · 2024-12-17 18:51:05 +06:00 · 2024-12-17 18:46:38 +06:00 · 2024-12-17 18:43:42 +06:00
26 changed files with 931 additions and 551 deletions
--- a/.flake8
+++ b/.flake8
@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 100
+exclude = .git,__pycache__,dist,*.egg-info,venv
+extend-ignore = E203
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,42 @@
-env
+# Environment
+env/
 .env
+venv/
+ENV/
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+.Python
+*.so
+.pytest_cache/
+.coverage
+.coverage.*
+coverage.xml
+*.cover
+htmlcov/
+
+# IDEs and editors
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Project specific
 test.py
-__pycache__
+*.log
+.pip-cache/
+
+# Temporary files
+*.tmp
+.DS_Store
+
+# Distribution / packaging
+dist/
+build/
+*.egg-info/
+
+# Docker
+.docker/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -0,0 +1,52 @@
+image: python:3.10-slim
+
+variables:
+  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
+  PYTHONPATH: "$CI_PROJECT_DIR"
+
+cache:
+  paths:
+    - .pip-cache
+    - venv/
+
+stages:
+  - setup
+  - test
+
+before_script:
+  - apt-get update
+  - apt-get install -y curl
+  - python --version
+  - pip install virtualenv
+  - virtualenv venv
+  - source venv/bin/activate
+
+setup:
+  stage: setup
+  script:
+    - pip install --no-cache-dir -r requirements.txt
+  artifacts:
+    paths:
+      - venv/
+    expire_in: 1 hour
+
+test:
+  stage: test
+  needs:
+    - setup
+  script:
+    # Run all tests
+    - pytest tests/ -v
+    # Start FastAPI server
+    - uvicorn main:app --host 0.0.0.0 --port 8000 &
+    # Wait for server to start
+    - sleep 15
+    # Test health endpoint
+    - |
+      RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/health)
+      if [ "$RESPONSE" = "200" ]; then
+        echo "✅ Health check passed"
+      else
+        echo "❌ Health check failed with status $RESPONSE"
+        exit 1
+      fi
--- a/app/pycache/config.cpython-312.pyc
+++ b/app/pycache/config.cpython-312.pyc
--- a/app/api/pycache/fact_check.cpython-312.pyc
+++ b/app/api/pycache/fact_check.cpython-312.pyc
--- a/app/api/ai_fact_check.py
+++ b/app/api/ai_fact_check.py
@ -6,7 +6,7 @@ from app.models.ai_fact_check_models import (
    AIFactCheckResponse,
    VerificationResult,
    TokenUsage,
-    ErrorResponse
+    ErrorResponse,
 )
 from urllib.parse import urlparse
 import asyncio
@ -16,13 +16,11 @@ aifact_check_router = APIRouter()
 openai_client = OpenAIClient(api_key=OPENAI_API_KEY)
 fact_checker = AIFactChecker(openai_client=openai_client)

+
@aifact_check_router.post(
    "/aicheck-facts",
    response_model=AIFactCheckResponse,
-    responses={
-        400: {"model": ErrorResponse},
-        500: {"model": ErrorResponse}
-    }
+    responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
 )
 async def ai_fact_check(request: AIFactCheckRequest):
    """
@ -57,7 +55,7 @@ async def ai_fact_check(request: AIFactCheckRequest):
                    confidence="Low",
                    evidence=f"Error checking URL: {str(result)}",
                    reasoning="URL processing failed",
-                    missing_info="Could not access or process the URL"
+                    missing_info="Could not access or process the URL",
                )
                continue

@ -66,7 +64,7 @@ async def ai_fact_check(request: AIFactCheckRequest):
                confidence=result["verification_result"]["confidence"],
                evidence=result["verification_result"]["evidence"],
                reasoning=result["verification_result"]["reasoning"],
-                missing_info=result["verification_result"].get("missing_info", None)
+                missing_info=result["verification_result"].get("missing_info", None),
            )

            results[url] = verification_result
@ -80,24 +78,22 @@ async def ai_fact_check(request: AIFactCheckRequest):
        token_usage = TokenUsage(
            prompt_tokens=total_prompt_tokens,
            completion_tokens=total_completion_tokens,
-            total_tokens=total_tokens
+            total_tokens=total_tokens,
        )

        return AIFactCheckResponse(
            query=request.content,
            verification_result=results,
            sources=list(all_sources),
-            token_usage=token_usage
+            token_usage=token_usage,
        )

    except ValueError as e:
        raise HTTPException(
            status_code=400,
            detail=ErrorResponse(
-                detail=str(e),
-                error_code="INVALID_URL",
-                path="/aicheck-facts"
-            ).dict()
+                detail=str(e), error_code="INVALID_URL", path="/aicheck-facts"
+            ).dict(),
        )
    except Exception as e:
        raise HTTPException(
@ -105,6 +101,6 @@ async def ai_fact_check(request: AIFactCheckRequest):
            detail=ErrorResponse(
                detail=f"Error processing fact-check request: {str(e)}",
                error_code="PROCESSING_ERROR",
-                path="/aicheck-facts"
-            ).dict()
+                path="/aicheck-facts",
+            ).dict(),
        )
--- a/app/api/fact_check.py
+++ b/app/api/fact_check.py
@ -1,20 +1,25 @@
 from fastapi import APIRouter, HTTPException
 import httpx
+from typing import Union
 from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY
 from app.api.scrap_websites import search_websites, SearchRequest
 from app.services.openai_client import OpenAIClient
 from app.models.fact_check_models import (
    FactCheckRequest,
    FactCheckResponse,
+    UnverifiedFactCheckResponse,
    ErrorResponse,
-    Source
+    Source,
+    VerdictEnum,
+    ConfidenceEnum
 )
 from app.websites.fact_checker_website import get_all_sources

 fact_check_router = APIRouter()
 openai_client = OpenAIClient(OPENAI_API_KEY)

-async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse:
+
+async def generate_fact_report(query: str, fact_check_data: dict) -> Union[FactCheckResponse, UnverifiedFactCheckResponse]:
    """Generate a fact check report using OpenAI based on the fact check results."""
    try:
        base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources.
@ -23,7 +28,24 @@ Rules:
 1. Include all source URLs and names in the sources list
 2. Keep the explanation focused on verifiable facts
 3. Include dates when available
-4. Maintain objectivity in the report"""
+4. Maintain objectivity in the report
+5. If no reliable sources are found, provide a clear explanation why"""
+
+        # If no sources were found, return an unverified response
+        if not fact_check_data.get("claims") and (
+            not fact_check_data.get("urls_found") or 
+            fact_check_data.get("status") == "no_results" or 
+            fact_check_data.get("verification_result", {}).get("no_sources_found")
+        ):
+            return UnverifiedFactCheckResponse(
+                claim=query,
+                verdict=VerdictEnum.UNVERIFIED,
+                confidence=ConfidenceEnum.LOW,
+                sources=[],
+                evidence="No fact-checking sources have verified this claim yet.",
+                explanation="Our search across reputable fact-checking websites did not find any formal verification of this claim. This doesn't mean the claim is false - just that it hasn't been formally fact-checked yet.",
+                additional_context="The claim may be too recent for fact-checkers to have investigated, or it may not have been widely circulated enough to warrant formal fact-checking."
+            )

        base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format:
 {
@ -39,9 +61,7 @@ Rules:
    "evidence": "A concise summary of the key evidence (1-2 sentences)",
    "explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)",
    "additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)"
-}
-
-Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name."""
+}"""

        if "claims" in fact_check_data:
            system_prompt = base_system_prompt
@ -76,71 +96,68 @@ Ensure all URLs in sources are complete (including https:// if missing) and each
        )

        try:
-            # First try to parse the response directly
            response_data = response["response"]

-            # Clean up sources before validation
-            if isinstance(response_data.get('sources'), list):
+            if isinstance(response_data.get("sources"), list):
                cleaned_sources = []
-                for source in response_data['sources']:
+                for source in response_data["sources"]:
                    if isinstance(source, str):
-                        # Convert string sources to Source objects
-                        url = source if source.startswith('http') else f"https://{source}"
-                        cleaned_sources.append({
-                            "url": url,
-                            "name": source
-                        })
+                        url = source if source.startswith("http") else f"https://{source}"
+                        cleaned_sources.append({"url": url, "name": source})
                    elif isinstance(source, dict):
-                        # Ensure URL has proper scheme
-                        url = source.get('url', '')
-                        if url and not url.startswith('http'):
-                            source['url'] = f"https://{url}"
+                        url = source.get("url", "")
+                        if url and not url.startswith("http"):
+                            source["url"] = f"https://{url}"
                        cleaned_sources.append(source)
-                response_data['sources'] = cleaned_sources
+                response_data["sources"] = cleaned_sources

-            fact_check_response = FactCheckResponse(**response_data)
-            return fact_check_response
+            if response_data["verdict"] == "Unverified" or not response_data.get("sources"):
+                return UnverifiedFactCheckResponse(**response_data)
+            return FactCheckResponse(**response_data)

        except Exception as validation_error:
            print(f"Response validation error: {str(validation_error)}")
-            raise HTTPException(
-                status_code=422,
-                detail=ErrorResponse(
-                    detail=f"Invalid response format: {str(validation_error)}",
-                    error_code="VALIDATION_ERROR",
-                    path="/check-facts"
-                ).dict()
+            return UnverifiedFactCheckResponse(
+                claim=query,
+                verdict=VerdictEnum.UNVERIFIED,
+                confidence=ConfidenceEnum.LOW,
+                sources=[],
+                evidence="An error occurred while processing the fact check results.",
+                explanation="The system encountered an error while validating the fact check results.",
+                additional_context="This is a technical error and does not reflect on the truthfulness of the claim."
            )

    except Exception as e:
        print(f"Error generating fact report: {str(e)}")
-        raise HTTPException(
-            status_code=500,
-            detail=ErrorResponse(
-                detail="Error generating fact report",
-                error_code="FACT_CHECK_ERROR",
-                path="/check-facts"
-            ).dict()
+        return UnverifiedFactCheckResponse(
+            claim=query,
+            verdict=VerdictEnum.UNVERIFIED,
+            confidence=ConfidenceEnum.LOW,
+            sources=[],
+            evidence="An error occurred while generating the fact check report.",
+            explanation="The system encountered an unexpected error while processing the fact check request.",
+            additional_context="This is a technical error and does not reflect on the truthfulness of the claim."
        )

-@fact_check_router.post("/check-facts", response_model=FactCheckResponse)
+
+@fact_check_router.post("/check-facts", response_model=Union[FactCheckResponse, UnverifiedFactCheckResponse])
 async def check_facts(request: FactCheckRequest):
    """
    Fetch fact check results and generate a comprehensive report.
    """
    if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
-        raise HTTPException(
-            status_code=500,
-            detail=ErrorResponse(
-                detail="Google API key or base URL is not configured",
-                error_code="CONFIGURATION_ERROR",
-                path="/check-facts"
-            ).dict()
+        return UnverifiedFactCheckResponse(
+            claim=request.query,
+            verdict=VerdictEnum.UNVERIFIED,
+            confidence=ConfidenceEnum.LOW,
+            sources=[],
+            evidence="The fact-checking service is not properly configured.",
+            explanation="The system is missing required API configuration for fact-checking services.",
+            additional_context="This is a temporary system configuration issue."
        )

    headers = {"Content-Type": "application/json"}
    async with httpx.AsyncClient() as client:
-        # Get fact checker sources from the centralized configuration
        fact_checker_sources = get_all_sources()

        for source in fact_checker_sources:
@ -149,14 +166,12 @@ async def check_facts(request: FactCheckRequest):
                "query": request.query,
                "languageCode": "en-US",
                "reviewPublisherSiteFilter": source.domain,
-                "pageSize": 10
+                "pageSize": 10,
            }

            try:
                response = await client.get(
-                    GOOGLE_FACT_CHECK_BASE_URL,
-                    params=params,
-                    headers=headers
+                    GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers
                )
                response.raise_for_status()
                json_response = response.json()
@ -182,11 +197,10 @@ async def check_facts(request: FactCheckRequest):

        except Exception as e:
            print(f"Error in AI fact check: {str(e)}")
-            raise HTTPException(
-                status_code=404,
-                detail=ErrorResponse(
-                    detail="No fact check results found",
-                    error_code="NOT_FOUND",
-                    path="/check-facts"
-                ).dict()
-            )
+            return await generate_fact_report(request.query, {
+                "status": "no_results",
+                "verification_result": {
+                    "no_sources_found": True,
+                    "reason": str(e)
+                }
+            })
--- a/app/api/scrap_websites.py
+++ b/app/api/scrap_websites.py
@ -7,7 +7,7 @@ from pydantic import BaseModel
 from app.models.ai_fact_check_models import (
    AIFactCheckRequest,
    FactCheckSource,
-    SourceType
+    SourceType,
 )
 from app.websites.fact_checker_website import SOURCES, get_all_sources
 from app.api.ai_fact_check import ai_fact_check
@ -18,10 +18,10 @@ class SearchRequest(BaseModel):
    search_text: str
    source_types: List[str] = ["fact_checkers"]

+
 # Configure logging
 logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 )
 logger = logging.getLogger(__name__)

@ -38,39 +38,46 @@ def get_domain_from_url(url: str) -> str:
    try:
        parsed = urlparse(url)
        domain = parsed.netloc.lower()
-        if domain.startswith('www.'):
+        if domain.startswith("www."):
            domain = domain[4:]
        return domain
    except Exception as e:
        logger.error(f"Error extracting domain from URL {url}: {str(e)}")
        return ""

+
 def is_valid_source_domain(domain: str, sources: List[FactCheckSource]) -> bool:
    """Check if domain matches any source with improved matching logic."""
    if not domain:
        return False

    domain = domain.lower()
-    if domain.startswith('www.'):
+    if domain.startswith("www."):
        domain = domain[4:]

    for source in sources:
        source_domain = source.domain.lower()
-        if source_domain.startswith('www.'):
+        if source_domain.startswith("www."):
            source_domain = source_domain[4:]

-        if domain == source_domain or domain.endswith('.' + source_domain):
+        if domain == source_domain or domain.endswith("." + source_domain):
            return True

    return False

-async def build_enhanced_search_query(query: str, sources: List[FactCheckSource]) -> str:
+
+async def build_enhanced_search_query(
+    query: str, sources: List[FactCheckSource]
+) -> str:
    """Build search query with site restrictions."""
    site_queries = [f"site:{source.domain}" for source in sources]
    site_restriction = " OR ".join(site_queries)
    return f"({query}) ({site_restriction})"

-async def google_custom_search(query: str, sources: List[FactCheckSource], page: int = 1) -> Optional[Dict]:
+
+async def google_custom_search(
+    query: str, sources: List[FactCheckSource], page: int = 1
+) -> Optional[Dict]:
    """Perform Google Custom Search with enhanced query."""
    enhanced_query = await build_enhanced_search_query(query, sources)
    start_index = ((page - 1) * RESULTS_PER_PAGE) + 1
@ -80,7 +87,7 @@ async def google_custom_search(query: str, sources: List[FactCheckSource], page:
        "cx": GOOGLE_ENGINE_ID,
        "q": enhanced_query,
        "num": RESULTS_PER_PAGE,
-        "start": start_index
+        "start": start_index,
    }

    async with httpx.AsyncClient(timeout=30.0) as client:
@ -92,6 +99,7 @@ async def google_custom_search(query: str, sources: List[FactCheckSource], page:
            logger.error(f"Search error: {str(e)}")
            raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")

+
@scrap_websites_router.post("/search")
 async def search_websites(request: SearchRequest):
    # Get the source types from the request
@ -115,7 +123,9 @@ async def search_websites(request: SearchRequest):
            if len(all_urls) >= 50:
                break

-            search_response = await google_custom_search(request.search_text, selected_sources, page)
+            search_response = await google_custom_search(
+                request.search_text, selected_sources, page
+            )

            if not search_response or not search_response.get("items"):
                break
@ -132,25 +142,23 @@ async def search_websites(request: SearchRequest):
                        domain_results[domain] = []

                    if len(domain_results[domain]) < MAX_URLS_PER_DOMAIN:
-                        domain_results[domain].append({
-                            "url": url,
-                            "title": item.get("title", ""),
-                            "snippet": item.get("snippet", "")
-                        })
+                        domain_results[domain].append(
+                            {
+                                "url": url,
+                                "title": item.get("title", ""),
+                                "snippet": item.get("snippet", ""),
+                            }
+                        )
                        all_urls.append(url)

                if len(all_urls) >= 50:
                    break

        if not all_urls:
-            return {
-                "status": "no_results",
-                "urls_found": 0
-            }
+            return {"status": "no_results", "urls_found": 0}

        fact_check_request = AIFactCheckRequest(
-            content=request.search_text,
-            urls=all_urls[:5]
+            content=request.search_text, urls=all_urls[:5]
        )

        return await ai_fact_check(fact_check_request)
--- a/app/config.py
+++ b/app/config.py
@ -4,7 +4,7 @@ from dotenv import load_dotenv
 load_dotenv()

 GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
-GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
+GOOGLE_FACT_CHECK_BASE_URL = os.environ["GOOGLE_FACT_CHECK_BASE_URL"]
 GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"]
 GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"]

--- a/app/models/pycache/fact_check_models.cpython-312.pyc
+++ b/app/models/pycache/fact_check_models.cpython-312.pyc
--- a/app/models/ai_fact_check_models.py
+++ b/app/models/ai_fact_check_models.py
@ -4,38 +4,46 @@ from enum import Enum
 from datetime import datetime
 from urllib.parse import urlparse

+
 # Common Models
 class TokenUsage(BaseModel):
    prompt_tokens: Optional[int] = 0
    completion_tokens: Optional[int] = 0
    total_tokens: Optional[int] = 0

+
 class ErrorResponse(BaseModel):
    detail: str
    error_code: str = Field(..., description="Unique error code for this type of error")
    timestamp: str = Field(default_factory=lambda: datetime.now().isoformat())
-    path: Optional[str] = Field(None, description="The endpoint path where error occurred")
+    path: Optional[str] = Field(
+        None, description="The endpoint path where error occurred"
+    )

-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "detail": "Error description",
-            "error_code": "ERROR_CODE",
-            "timestamp": "2024-12-09T16:49:30.905765",
-            "path": "/check-facts"
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "detail": "Error description",
+                "error_code": "ERROR_CODE",
+                "timestamp": "2024-12-09T16:49:30.905765",
+                "path": "/check-facts",
+            }
        }
-    })
+    )
+

 # Fact Check Models
 class Publisher(BaseModel):
    name: str
    site: Optional[str] = Field(None, description="Publisher's website")

-    @validator('site')
+    @validator("site")
    def validate_site(cls, v):
-        if v and not (v.startswith('http://') or v.startswith('https://')):
+        if v and not (v.startswith("http://") or v.startswith("https://")):
            return f"https://{v}"
        return v

+
 class ClaimReview(BaseModel):
    publisher: Publisher
    url: Optional[HttpUrl] = None
@ -44,21 +52,25 @@ class ClaimReview(BaseModel):
    textualRating: Optional[str] = None
    languageCode: str = Field(default="en-US")

+
 class Claim(BaseModel):
    text: str
    claimant: Optional[str] = None
    claimDate: Optional[str] = None
    claimReview: List[ClaimReview]

+
 class SourceType(str, Enum):
    FACT_CHECKER = "fact_checker"
    NEWS_SITE = "news_site"

+
 class FactCheckSource(BaseModel):
    domain: str
    type: SourceType
    priority: int = Field(default=1, ge=1, le=10)

+
 # Verification Models
 class VerificationResult(BaseModel):
    verdict: str = Field(..., description="True/False/Insufficient Information")
@ -67,44 +79,46 @@ class VerificationResult(BaseModel):
    reasoning: str
    missing_info: Optional[str] = None

-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "verdict": "True",
-            "confidence": "High",
-            "evidence": ["Direct quote from source supporting the claim"],
-            "reasoning": "Detailed analysis of why the claim is considered true",
-            "missing_info": "Any caveats or limitations of the verification"
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "verdict": "True",
+                "confidence": "High",
+                "evidence": ["Direct quote from source supporting the claim"],
+                "reasoning": "Detailed analysis of why the claim is considered true",
+                "missing_info": "Any caveats or limitations of the verification",
+            }
        }
-    })
+    )
+

 # Request Models
 class BaseFactCheckRequest(BaseModel):
    content: str = Field(
-        ...,
-        min_length=10,
-        max_length=1000,
-        description="The claim to be fact-checked"
+        ..., min_length=10, max_length=1000, description="The claim to be fact-checked"
    )

-    @validator('content')
+    @validator("content")
    def validate_content(cls, v):
        if not v.strip():
            raise ValueError("Content cannot be empty or just whitespace")
        return v.strip()

+
 class GoogleFactCheckRequest(BaseFactCheckRequest):
    language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
    max_results_per_source: int = Field(default=10, ge=1, le=50)

+
 class AIFactCheckRequest(BaseFactCheckRequest):
    urls: List[str] = Field(
        ...,
        min_items=1,
        max_items=5,
-        description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing"
+        description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing",
    )

-    @validator('urls')
+    @validator("urls")
    def validate_urls(cls, urls):
        validated_urls = []
        for url in urls:
@ -112,8 +126,8 @@ class AIFactCheckRequest(BaseFactCheckRequest):
                raise ValueError("URL cannot be empty")

            # Add https:// if no protocol specified
-            if not url.startswith(('http://', 'https://')):
-                url = f'https://{url}'
+            if not url.startswith(("http://", "https://")):
+                url = f"https://{url}"

            try:
                result = urlparse(url)
@ -125,15 +139,18 @@ class AIFactCheckRequest(BaseFactCheckRequest):

        return validated_urls

-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "content": "Indian flag was drawn in BUET campus",
-            "urls": [
-                "www.altnews.in/article-about-flag",
-                "www.another-source.com/related-news"
-            ]
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "content": "Indian flag was drawn in BUET campus",
+                "urls": [
+                    "www.altnews.in/article-about-flag",
+                    "www.another-source.com/related-news",
+                ],
+            }
        }
-    })
+    )
+

 # Response Models
 class BaseFactCheckResponse(BaseModel):
@ -141,17 +158,20 @@ class BaseFactCheckResponse(BaseModel):
    token_usage: TokenUsage
    sources: List[str]

-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "query": "Example statement to verify",
-            "token_usage": {
-                "prompt_tokens": 100,
-                "completion_tokens": 50,
-                "total_tokens": 150
-            },
-            "sources": ["source1.com", "source2.com"],
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "query": "Example statement to verify",
+                "token_usage": {
+                    "prompt_tokens": 100,
+                    "completion_tokens": 50,
+                    "total_tokens": 150,
+                },
+                "sources": ["source1.com", "source2.com"],
+            }
        }
-    })
+    )
+

 class GoogleFactCheckResponse(BaseFactCheckResponse):
    total_claims_found: int
@ -159,70 +179,79 @@ class GoogleFactCheckResponse(BaseFactCheckResponse):
    verification_result: Dict[str, Any]
    summary: Dict[str, int]

-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "query": "Example claim",
-            "total_claims_found": 1,
-            "results": [{
-                "text": "Example claim text",
-                "claimant": "Source name",
-                "claimReview": [{
-                    "publisher": {
-                        "name": "Fact Checker",
-                        "site": "factchecker.com"
-                    },
-                    "textualRating": "True"
-                }]
-            }],
-            "verification_result": {
-                "verdict": "True",
-                "confidence": "High",
-                "evidence": ["Supporting evidence"],
-                "reasoning": "Detailed analysis"
-            },
-            "sources": ["factchecker.com"],
-            "token_usage": {
-                "prompt_tokens": 100,
-                "completion_tokens": 50,
-                "total_tokens": 150
-            },
-            "summary": {
-                "total_sources": 1,
-                "fact_checking_sites_queried": 10
-            }
-        }
-    })
-
-class AIFactCheckResponse(BaseFactCheckResponse):
-    verification_result: Dict[str, VerificationResult]  # Changed to Dict to store results per URL
-
-    model_config = ConfigDict(json_schema_extra={
-        "example": {
-            "query": "Indian flag was drawn in BUET campus",
-            "verification_result": {
-                "https://www.source1.com": {
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "query": "Example claim",
+                "total_claims_found": 1,
+                "results": [
+                    {
+                        "text": "Example claim text",
+                        "claimant": "Source name",
+                        "claimReview": [
+                            {
+                                "publisher": {
+                                    "name": "Fact Checker",
+                                    "site": "factchecker.com",
+                                },
+                                "textualRating": "True",
+                            }
+                        ],
+                    }
+                ],
+                "verification_result": {
                    "verdict": "True",
                    "confidence": "High",
-                    "evidence": ["Supporting evidence from source 1"],
-                    "reasoning": "Detailed analysis from source 1",
-                    "missing_info": None
+                    "evidence": ["Supporting evidence"],
+                    "reasoning": "Detailed analysis",
                },
-                "https://www.source2.com": {
-                    "verdict": "True",
-                    "confidence": "Medium",
-                    "evidence": ["Supporting evidence from source 2"],
-                    "reasoning": "Analysis from source 2",
-                    "missing_info": "Additional context needed"
-                }
-            },
-            "sources": ["source1.com", "source2.com"],
-            "token_usage": {
-                "prompt_tokens": 200,
-                "completion_tokens": 100,
-                "total_tokens": 300
+                "sources": ["factchecker.com"],
+                "token_usage": {
+                    "prompt_tokens": 100,
+                    "completion_tokens": 50,
+                    "total_tokens": 150,
+                },
+                "summary": {"total_sources": 1, "fact_checking_sites_queried": 10},
            }
        }
-    })
+    )
+
+
+class AIFactCheckResponse(BaseFactCheckResponse):
+    verification_result: Dict[
+        str, VerificationResult
+    ]  # Changed to Dict to store results per URL
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "query": "Indian flag was drawn in BUET campus",
+                "verification_result": {
+                    "https://www.source1.com": {
+                        "verdict": "True",
+                        "confidence": "High",
+                        "evidence": ["Supporting evidence from source 1"],
+                        "reasoning": "Detailed analysis from source 1",
+                        "missing_info": None,
+                    },
+                    "https://www.source2.com": {
+                        "verdict": "True",
+                        "confidence": "Medium",
+                        "evidence": ["Supporting evidence from source 2"],
+                        "reasoning": "Analysis from source 2",
+                        "missing_info": "Additional context needed",
+                    },
+                },
+                "sources": ["source1.com", "source2.com"],
+                "token_usage": {
+                    "prompt_tokens": 200,
+                    "completion_tokens": 100,
+                    "total_tokens": 300,
+                },
+            }
+        }
+    )
+

 # Backwards compatibility aliases
 FactCheckRequest = GoogleFactCheckRequest
--- a/app/models/fact_check_models.py
+++ b/app/models/fact_check_models.py
@ -3,52 +3,83 @@ from typing import List, Literal, Union
 from datetime import datetime
 from enum import Enum

+
 class VerdictEnum(str, Enum):
    TRUE = "True"
    FALSE = "False"
    PARTIALLY_TRUE = "Partially True"
    UNVERIFIED = "Unverified"

+
 class ConfidenceEnum(str, Enum):
    HIGH = "High"
    MEDIUM = "Medium"
    LOW = "Low"

+
 class FactCheckRequest(BaseModel):
    query: str = Field(
        ...,
        min_length=3,
        max_length=500,
        description="The claim or statement to be fact-checked",
-        example="Did NASA confirm finding alien structures on Mars in 2024?"
+        example="Did NASA confirm finding alien structures on Mars in 2024?",
    )

+
 class Source(BaseModel):
    url: str
    name: str = ""

-    @validator('url')
+    @validator("url")
    def validate_url(cls, v):
-        # Basic URL validation without requiring HTTP/HTTPS
        if not v or len(v) < 3:
            raise ValueError("URL must not be empty and must be at least 3 characters")
        return v

+
+class UnverifiedFactCheckResponse(BaseModel):
+    claim: str = Field(
+        ...,
+        min_length=10,
+        max_length=1000,
+        description="The exact claim being verified",
+    )
+    verdict: VerdictEnum = Field(..., description="The verification verdict")
+    confidence: ConfidenceEnum = Field(..., description="Confidence level in the verdict")
+    sources: List[Source] = Field(
+        default=[],
+        description="List of sources used in verification"
+    )
+    evidence: str = Field(
+        ...,
+        min_length=20,
+        max_length=500,
+        description="Concise summary of key evidence",
+    )
+    explanation: str = Field(
+        ...,
+        min_length=50,
+        max_length=1000,
+        description="Detailed explanation of verification findings",
+    )
+    additional_context: str = Field(
+        ...,
+        min_length=20,
+        max_length=500,
+        description="Important context about the verification",
+    )
+
+
 class FactCheckResponse(BaseModel):
    claim: str = Field(
        ...,
        min_length=10,
        max_length=1000,
-        description="The exact claim being verified"
-    )
-    verdict: VerdictEnum = Field(
-        ...,
-        description="The verification verdict"
-    )
-    confidence: ConfidenceEnum = Field(
-        ...,
-        description="Confidence level in the verdict"
+        description="The exact claim being verified",
    )
+    verdict: VerdictEnum = Field(..., description="The verification verdict")
+    confidence: ConfidenceEnum = Field(..., description="Confidence level in the verdict")
    sources: List[Source] = Field(
        ...,
        min_items=1,
@ -58,19 +89,19 @@ class FactCheckResponse(BaseModel):
        ...,
        min_length=20,
        max_length=500,
-        description="Concise summary of key evidence"
+        description="Concise summary of key evidence",
    )
    explanation: str = Field(
        ...,
        min_length=50,
        max_length=1000,
-        description="Detailed explanation of verification findings"
+        description="Detailed explanation of verification findings",
    )
    additional_context: str = Field(
        ...,
        min_length=20,
        max_length=500,
-        description="Important context about the verification"
+        description="Important context about the verification",
    )

    class Config:
@ -82,19 +113,16 @@ class FactCheckResponse(BaseModel):
                "sources": [
                    {
                        "url": "https://www.nasa.gov/mars-exploration",
-                        "name": "NASA Mars Exploration"
-                    },
-                    {
-                        "url": "https://factcheck.org/2024/mars-claims",
-                        "name": "FactCheck.org"
+                        "name": "NASA Mars Exploration",
                    }
                ],
                "evidence": "NASA has made no such announcement. Recent Mars rover images show natural rock formations.",
-                "explanation": "Multiple fact-checking organizations investigated this claim. NASA's official communications and Mars mission reports from 2024 contain no mention of alien structures. The viral images being shared are misidentified natural geological formations.",
-                "additional_context": "Similar false claims about alien structures on Mars have circulated periodically since the first Mars rovers began sending back images."
+                "explanation": "Multiple fact-checking organizations investigated this claim. NASA's official communications and Mars mission reports from 2024 contain no mention of alien structures.",
+                "additional_context": "Similar false claims about alien structures on Mars have circulated periodically.",
            }
        }

+
 class ErrorResponse(BaseModel):
    detail: str
    error_code: str = Field(..., example="VALIDATION_ERROR")
--- a/app/models/scrap_websites_models.py
+++ b/app/models/scrap_websites_models.py
@ -1,38 +1,46 @@
 from pydantic import BaseModel
 from typing import List, Dict

+
 class SearchRequest(BaseModel):
    search_text: str
    source_types: List[str] = ["fact_checkers"]

+
 class Publisher(BaseModel):
    name: str
    site: str

+
 class ClaimReview(BaseModel):
    publisher: Publisher
    textualRating: str

+
 class Claim(BaseModel):
    claimReview: List[ClaimReview]
    claimant: str
    text: str

+
 class Summary(BaseModel):
    fact_checking_sites_queried: int
    total_sources: int

+
 class TokenUsage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int

+
 class VerificationResult(BaseModel):
    verdict: str
    confidence: str
    evidence: List[str]
    reasoning: str

+
 class EnhancedFactCheckResponse(BaseModel):
    query: str
    results: List[Claim]
--- a/app/services/openai_client.py
+++ b/app/services/openai_client.py
@ -1,4 +1,3 @@
-from langchain_community.document_loaders import AsyncHtmlLoader
 from langchain_community.document_transformers import BeautifulSoupTransformer
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
@ -7,6 +6,9 @@ import numpy as np
 import logging as logger
 import openai
 import json
+import aiohttp
+from bs4 import BeautifulSoup
+

 class OpenAIClient:
    def __init__(self, api_key: str):
@ -15,7 +17,9 @@ class OpenAIClient:
        """
        openai.api_key = api_key

-    async def generate_text_response(self, system_prompt: str, user_prompt: str, max_tokens: int) -> dict:
+    async def generate_text_response(
+        self, system_prompt: str, user_prompt: str, max_tokens: int
+    ) -> dict:
        """
        Generate a response using OpenAI's chat completion API.
        """
@ -24,19 +28,19 @@ class OpenAIClient:
                model="gpt-4",
                messages=[
                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt}
+                    {"role": "user", "content": user_prompt},
                ],
-                max_tokens=max_tokens
+                max_tokens=max_tokens,
            )
-            content = response['choices'][0]['message']['content']
+            content = response["choices"][0]["message"]["content"]
            # Parse the JSON string into a dictionary
            parsed_content = json.loads(content)

            return {
                "response": parsed_content,  # Now returns a dictionary instead of string
-                "prompt_tokens": response['usage']['prompt_tokens'],
-                "completion_tokens": response['usage']['completion_tokens'],
-                "total_tokens": response['usage']['total_tokens']
+                "prompt_tokens": response["usage"]["prompt_tokens"],
+                "completion_tokens": response["usage"]["completion_tokens"],
+                "total_tokens": response["usage"]["total_tokens"],
            }
        except json.JSONDecodeError as e:
            raise Exception(f"Failed to parse OpenAI response as JSON: {str(e)}")
@ -49,14 +53,14 @@ class OpenAIClient:
        """
        try:
            response = openai.Embedding.create(
-                input=texts,
-                model="text-embedding-ada-002"
+                input=texts, model="text-embedding-ada-002"
            )
-            embeddings = [data['embedding'] for data in response['data']]
+            embeddings = [data["embedding"] for data in response["data"]]
            return embeddings
        except Exception as e:
            raise Exception(f"OpenAI embedding error: {str(e)}")

+
 class AIFactChecker:
    def __init__(self, openai_client: OpenAIClient):
        """Initialize the fact checker with OpenAI client."""
@ -65,21 +69,37 @@ class AIFactChecker:
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len,
-            separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
+            separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
        )

    async def scrape_webpage(self, url: str) -> List[Document]:
-        """Scrape webpage content using LangChain's AsyncHtmlLoader."""
+        """Scrape webpage content without saving HTML files."""
        try:
-            loader = AsyncHtmlLoader([url])
-            docs = await loader.aload()
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url) as response:
+                    if response.status != 200:
+                        raise Exception(
+                            f"Failed to fetch URL: {url}, status: {response.status}"
+                        )

-            bs_transformer = BeautifulSoupTransformer()
-            docs_transformed = bs_transformer.transform_documents(docs)
-            docs_chunks = self.text_splitter.split_documents(docs_transformed)
+                    html_content = await response.text()

-            logger.info(f"Successfully scraped webpage | chunks={len(docs_chunks)}")
-            return docs_chunks
+                    # Parse HTML with BeautifulSoup
+                    soup = BeautifulSoup(html_content, "html.parser")
+
+                    # Create a Document with the parsed content
+                    doc = Document(
+                        page_content=soup.get_text(separator="\n", strip=True),
+                        metadata={"source": url},
+                    )
+
+                    # Split into chunks
+                    docs_chunks = self.text_splitter.split_documents([doc])
+
+                    logger.info(
+                        f"Successfully scraped webpage | chunks={len(docs_chunks)}"
+                    )
+                    return docs_chunks

        except Exception as e:
            logger.error(f"Error scraping webpage | url={url} | error={str(e)}")
@ -89,7 +109,7 @@ class AIFactChecker:
        self,
        query_embedding: List[float],
        doc_embeddings: List[List[float]],
-        docs: List[Document]
+        docs: List[Document],
    ) -> List[Document]:
        """Find most relevant document chunks using cosine similarity."""
        try:
@ -107,7 +127,9 @@ class AIFactChecker:
            logger.error(f"Error finding relevant chunks | error={str(e)}")
            raise

-    async def verify_fact(self, query: str, relevant_docs: List[Document]) -> Dict[str, Any]:
+    async def verify_fact(
+        self, query: str, relevant_docs: List[Document]
+    ) -> Dict[str, Any]:
        """Verify fact using OpenAI's API with context from relevant documents."""
        try:
            context = "\n\n".join([doc.page_content for doc in relevant_docs])
@ -132,12 +154,17 @@ class AIFactChecker:
            Analyze the statement based on the provided context and return your response in the specified JSON format."""

            response = await self.openai_client.generate_text_response(
-                system_prompt=system_prompt,
-                user_prompt=user_prompt,
-                max_tokens=800
+                system_prompt=system_prompt, user_prompt=user_prompt, max_tokens=800
            )

-            sources = list(set([doc.metadata.get('source', 'Unknown source') for doc in relevant_docs]))
+            sources = list(
+                set(
+                    [
+                        doc.metadata.get("source", "Unknown source")
+                        for doc in relevant_docs
+                    ]
+                )
+            )

            return {
                "verification_result": response["response"],  # This is now a dictionary
@ -145,8 +172,8 @@ class AIFactChecker:
                "token_usage": {
                    "prompt_tokens": response["prompt_tokens"],
                    "completion_tokens": response["completion_tokens"],
-                    "total_tokens": response["total_tokens"]
-                }
+                    "total_tokens": response["total_tokens"],
+                },
            }

        except Exception as e:
@ -162,7 +189,9 @@ class AIFactChecker:
            doc_embeddings = self.openai_client.get_embeddings(doc_texts)
            query_embedding = self.openai_client.get_embeddings([query])

-            relevant_docs = self.find_relevant_chunks(query_embedding[0], doc_embeddings, docs)
+            relevant_docs = self.find_relevant_chunks(
+                query_embedding[0], doc_embeddings, docs
+            )
            verification_result = await self.verify_fact(query, relevant_docs)

            return verification_result
--- a/app/websites/pycache/fact_checker_website.cpython-312.pyc
+++ b/app/websites/pycache/fact_checker_website.cpython-312.pyc
--- a/app/websites/fact_checker_website.py
+++ b/app/websites/fact_checker_website.py
@ -1,120 +1,125 @@
 from typing import Dict, List
 import requests
 from fastapi import HTTPException
-from app.models.ai_fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType
+from app.models.ai_fact_check_models import (
+    FactCheckSource,
+    ErrorResponse,
+    FactCheckRequest,
+    SourceType,
+)

 # Sources configuration with validation
 SOURCES = {
    "fact_checkers": [
        FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1)
        for domain in [
-    "snopes.com",
-    "politifact.com",
-    "factcheck.org",
-    "reuters.com/fact-check",
-    "apnews.com/hub/ap-fact-check",
-    "bbc.com/news/reality_check",
-    "fullfact.org",
-    "afp.com/fact-check",
-    "truthorfiction.com",
-    "leadstories.com",
-    "checkyourfact.com",
-    "washingtonpost.com/news/fact-checker",
-    "factcheck.kz",
-    "poynter.org/ifcn",
-    "factcheckeu.info",
-    "africacheck.org",
-    "thequint.com/webqoof",
-    "altnews.in",
-    "facta.news",
-    "factcheckni.org",
-    "mythdetector.ge",
-    "verificado.mx",
-    "euvsdisinfo.eu",
-    "factcheck.afp.com",
-    "newtral.es",
-    "maldita.es",
-    "faktograf.hr",
-    "demagog.org.pl",
-    "factnameh.com",
-    "faktiskt.se",
-    "teyit.org",
-    "factly.in",
-    "boom.live",
-    "stopfake.org",
-    "factcheck.ge",
-    "factcheck.kg",
-    "factcheck.uz",
-    "factcheck.tj",
-    "factcheck.az",
-    "factcheck.am",
-    "factcheck.md",
-    "verafiles.org",
-    "rappler.com/fact-check",
-    "vera.com.gt",
-    "chequeado.com",
-    "aosfatos.org",
-    "lasillavacia.com/detector-mentiras",
-    "colombiacheck.com",
-    "ecuadorchequea.com",
-    "elsurti.com/checado",
-    "verificat.cat",
-    "mafindo.or.id",
-    "tempo.co/cek-fakta",
-    "factcheck.mk",
-    "raskrinkavanje.ba",
-    "faktograf.hr",
-    "demagog.cz",
-    "faktabaari.fi",
-    "correctiv.org",
-    "mimikama.at",
-    "factcheck.vlaanderen",
-    "factuel.afp.com",
-    "nieuwscheckers.nl",
-    "faktisk.no",
-    "tjekdet.dk",
-    "ellinikahoaxes.gr",
-    "faktograf.id",
-    "stopfake.kz",
-    "pesacheck.org",
-    "dubawa.org",
-    "namibiafactcheck.org.na",
-    "zimfact.org",
-    "ghanafact.com",
-    "factspace.africa",
-    "factcrescendo.com",
-    "vishvasnews.com",
-    "factcheck.lk",
-    "newschecker.in",
-    "boomlive.in",
-    "digiteye.in",
-    "indiatoday.in/fact-check",
-    "factcrescendo.com",
-    "piyasa.com/fact-check",
-    "taiwanese.facts.news",
-    "taiwanfactcheck.com",
-    "mygopen.com",
-    "tfc-taiwan.org.tw",
-    "cofacts.tw",
-    "rumor.taipei",
-    "fact.qq.com",
-    "factcheck.afp.com/list",
-    "acfta.org",
-    "crosscheck.firstdraftnews.org",
-    "healthfeedback.org",
-    "climatefeedback.org",
-    "sciencefeedback.co",
-    "factcheck.aap.com.au",
-    "emergent.info",
-    "hoax-slayer.net",
-    "truthorfiction.com",
-    "factcheck.media",
-    "mediawise.org",
-    "thejournal.ie/factcheck",
-    "journalistsresource.org",
-    "metafact.io",
-    "reporterslab.org/fact-checking"
-]
+            "snopes.com",
+            "politifact.com",
+            "factcheck.org",
+            "reuters.com/fact-check",
+            "apnews.com/hub/ap-fact-check",
+            "bbc.com/news/reality_check",
+            "fullfact.org",
+            "afp.com/fact-check",
+            "truthorfiction.com",
+            "leadstories.com",
+            "checkyourfact.com",
+            "washingtonpost.com/news/fact-checker",
+            "factcheck.kz",
+            "poynter.org/ifcn",
+            "factcheckeu.info",
+            "africacheck.org",
+            "thequint.com/webqoof",
+            "altnews.in",
+            "facta.news",
+            "factcheckni.org",
+            "mythdetector.ge",
+            "verificado.mx",
+            "euvsdisinfo.eu",
+            "factcheck.afp.com",
+            "newtral.es",
+            "maldita.es",
+            "faktograf.hr",
+            "demagog.org.pl",
+            "factnameh.com",
+            "faktiskt.se",
+            "teyit.org",
+            "factly.in",
+            "boom.live",
+            "stopfake.org",
+            "factcheck.ge",
+            "factcheck.kg",
+            "factcheck.uz",
+            "factcheck.tj",
+            "factcheck.az",
+            "factcheck.am",
+            "factcheck.md",
+            "verafiles.org",
+            "rappler.com/fact-check",
+            "vera.com.gt",
+            "chequeado.com",
+            "aosfatos.org",
+            "lasillavacia.com/detector-mentiras",
+            "colombiacheck.com",
+            "ecuadorchequea.com",
+            "elsurti.com/checado",
+            "verificat.cat",
+            "mafindo.or.id",
+            "tempo.co/cek-fakta",
+            "factcheck.mk",
+            "raskrinkavanje.ba",
+            "faktograf.hr",
+            "demagog.cz",
+            "faktabaari.fi",
+            "correctiv.org",
+            "mimikama.at",
+            "factcheck.vlaanderen",
+            "factuel.afp.com",
+            "nieuwscheckers.nl",
+            "faktisk.no",
+            "tjekdet.dk",
+            "ellinikahoaxes.gr",
+            "faktograf.id",
+            "stopfake.kz",
+            "pesacheck.org",
+            "dubawa.org",
+            "namibiafactcheck.org.na",
+            "zimfact.org",
+            "ghanafact.com",
+            "factspace.africa",
+            "factcrescendo.com",
+            "vishvasnews.com",
+            "factcheck.lk",
+            "newschecker.in",
+            "boomlive.in",
+            "digiteye.in",
+            "indiatoday.in/fact-check",
+            "factcrescendo.com",
+            "piyasa.com/fact-check",
+            "taiwanese.facts.news",
+            "taiwanfactcheck.com",
+            "mygopen.com",
+            "tfc-taiwan.org.tw",
+            "cofacts.tw",
+            "rumor.taipei",
+            "fact.qq.com",
+            "factcheck.afp.com/list",
+            "acfta.org",
+            "crosscheck.firstdraftnews.org",
+            "healthfeedback.org",
+            "climatefeedback.org",
+            "sciencefeedback.co",
+            "factcheck.aap.com.au",
+            "emergent.info",
+            "hoax-slayer.net",
+            "truthorfiction.com",
+            "factcheck.media",
+            "mediawise.org",
+            "thejournal.ie/factcheck",
+            "journalistsresource.org",
+            "metafact.io",
+            "reporterslab.org/fact-checking",
+        ]
    ],
    "news_sites": [
        FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2)
@ -133,16 +138,14 @@ SOURCES = {
            "www.risingbd.com/english",
            "www.dailyindustry.news",
            "www.bangladeshpost.net",
-            "www.daily-bangladesh.com/english"
+            "www.daily-bangladesh.com/english",
        ]
-    ]
+    ],
 }

+
 async def fetch_fact_checks(
-    api_key: str,
-    base_url: str,
-    query: str,
-    site: FactCheckSource
+    api_key: str, base_url: str, query: str, site: FactCheckSource
 ) -> Dict:
    """
    Fetch fact checks from a specific site using the Google Fact Check API
@ -156,7 +159,7 @@ async def fetch_fact_checks(
            "query": query,
            "languageCode": "en-US",
            "reviewPublisherSiteFilter": site.domain,
-            "pageSize": 10
+            "pageSize": 10,
        }

        response = requests.get(base_url, params=params)
@ -168,19 +171,18 @@ async def fetch_fact_checks(
            detail=ErrorResponse(
                detail=f"Error fetching from {site.domain}: {str(e)}",
                error_code="FACT_CHECK_SERVICE_ERROR",
-                path="/check-facts"
-            ).dict()
+                path="/check-facts",
+            ).dict(),
        )
    except ValueError as e:
        raise HTTPException(
            status_code=500,
            detail=ErrorResponse(
-                detail=str(e),
-                error_code="CONFIGURATION_ERROR",
-                path="/check-facts"
-            ).dict()
+                detail=str(e), error_code="CONFIGURATION_ERROR", path="/check-facts"
+            ).dict(),
        )

+
 def get_all_sources() -> List[FactCheckSource]:
    """
    Get all sources sorted by priority
--- a/images-test.jpg
+++ b/images-test.jpg
--- a/main.py
+++ b/main.py
@ -7,25 +7,14 @@ from app.config import FRONTEND_URL

 # Initialize FastAPI app
 app = FastAPI(
-    title="Your API Title",
-    description="Your API Description",
-    version="1.0.0"
+    title="Your API Title", description="Your API Description", version="1.0.0"
 )

 # CORS configuration
-origins = [
-    FRONTEND_URL,
-    "http://localhost",
-    "http://localhost:5173",
-    "http://0.0.0.0",
-    "http://0.0.0.0:5173",
-]
-
-
 app.add_middleware(
    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
+    allow_origins=["*"],  # Only wildcard
+    allow_credentials=False,  # Changed to False to work with wildcard
    allow_methods=["*"],
    allow_headers=["*"],
 )
@ -44,10 +33,6 @@ app.include_router(fact_check_router, prefix="")
 app.include_router(aifact_check_router, prefix="")
 app.include_router(scrap_websites_router, prefix="")

-# Include routers (uncomment and modify as needed)
-# from routes import some_router
-# app.include_router(some_router, prefix="/your-prefix", tags=["your-tag"])
-
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
--- a/pytest.ini
+++ b/pytest.ini
@ -0,0 +1,4 @@
+[pytest]
+pythonpath = .
+testpaths = tests
+python_files = test_*.py
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,87 @@
-certifi==2024.8.30
+aiofiles==24.1.0
+aiohappyeyeballs==2.4.4
+aiohttp==3.11.10
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.7.0
+attrs==24.3.0
+beautifulsoup4==4.12.3
+black==24.10.0
+certifi==2024.12.14
 charset-normalizer==3.4.0
+click==8.1.7
+dataclasses-json==0.6.7
+dnspython==2.7.0
+email_validator==2.2.0
+fastapi==0.115.6
+fastapi-cli==0.0.7
+flake8==7.1.1
+frozenlist==1.5.0
+greenlet==3.1.1
+gunicorn==23.0.0
+h11==0.14.0
+httpcore==1.0.7
+httptools==0.6.4
+httpx==0.28.1
+httpx-sse==0.4.0
 idna==3.10
+iniconfig==2.0.0
+itsdangerous==2.2.0
+Jinja2==3.1.4
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.3.12
+langchain-community==0.3.12
+langchain-core==0.3.25
+langchain-text-splitters==0.3.3
+langsmith==0.2.3
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.23.1
+mccabe==0.7.0
+mdurl==0.1.2
+multidict==6.1.0
+mypy-extensions==1.0.0
+numpy==1.26.4
+openai==0.28.0
+orjson==3.10.12
+packaging==24.2
+pathspec==0.12.1
+platformdirs==4.3.6
+pluggy==1.5.0
+propcache==0.2.1
+pycodestyle==2.12.1
+pydantic==2.10.3
+pydantic-extra-types==2.10.1
+pydantic-settings==2.7.0
+pydantic_core==2.27.1
+pyflakes==3.2.0
+Pygments==2.18.0
+pytest==8.3.4
+python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
+python-json-logger==3.2.1
+python-multipart==0.0.20
+PyYAML==6.0.2
 requests==2.32.3
+requests-toolbelt==1.0.0
+rich==13.9.4
+rich-toolkit==0.12.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.36
+starlette==0.41.3
+tenacity==9.0.0
+tqdm==4.67.1
+typer==0.15.1
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+ujson==5.10.0
 urllib3==2.2.3
+uvicorn==0.34.0
+uvloop==0.21.0
+watchfiles==1.0.3
+websockets==14.1
+yarl==1.18.3
--- a/search_response_altnews_in.html
+++ b/search_response_altnews_in.html
--- a/search_response_bbc_com.html
+++ b/search_response_bbc_com.html
--- a/search_response_en_prothomalo_com.html
+++ b/search_response_en_prothomalo_com.html
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -0,0 +1,7 @@
+import os
+import sys
+from pathlib import Path
+
+# Add the project root directory to Python path
+project_root = str(Path(__file__).parent.parent)
+sys.path.append(project_root)
--- a/tests/test_check_fact.py
+++ b/tests/test_check_fact.py
@ -0,0 +1,161 @@
+from fastapi.testclient import TestClient
+from unittest.mock import patch, Mock
+import sys
+from pathlib import Path
+
+# Import the FastAPI app
+from main import app
+from app.models.fact_check_models import (
+    FactCheckResponse,
+    UnverifiedFactCheckResponse,
+    VerdictEnum,
+    ConfidenceEnum,
+    Source
+)
+
+# Create test client
+client = TestClient(app)
+
+def test_check_facts_missing_query():
+    """Test the endpoint with a missing query."""
+    response = client.post("/check-facts", json={})
+    assert response.status_code == 422  # Validation error
+
+def test_check_facts_short_query():
+    """Test the endpoint with a query that's too short."""
+    response = client.post("/check-facts", json={"query": "ab"})
+    assert response.status_code == 422  # Query too short
+
+def test_check_facts_valid_query_no_sources():
+    """Test the endpoint with a valid query but no sources found."""
+    test_query = "Does drinking water cure all diseases?"
+    
+    # Mock the search_websites function
+    with patch('app.api.scrap_websites.search_websites') as mock_search:
+        mock_search.return_value = {
+            "status": "no_results",
+            "urls_found": 0,
+            "verification_result": {
+                "no_sources_found": True,
+                "reason": "No relevant fact-checking sources found"
+            }
+        }
+        
+        response = client.post("/check-facts", json={"query": test_query})
+        
+        assert response.status_code == 200
+        data = response.json()
+        assert data["claim"] == test_query
+        assert data["verdict"] == VerdictEnum.UNVERIFIED
+        assert data["confidence"] == ConfidenceEnum.LOW
+        assert data["sources"] == []
+        assert "No fact-checking sources" in data["evidence"]
+
+def test_check_facts_valid_query_with_sources():
+    """Test the endpoint with a valid query and sources found."""
+    test_query = "Did NASA find aliens on Mars?"
+    mock_fact_check_response = {
+        "claims": [
+            {
+                "text": test_query,
+                "claimReview": [
+                    {
+                        "publisher": {
+                            "name": "Fact Check Organization",
+                            "site": "https://factcheck.org"
+                        },
+                        "textualRating": "False",
+                        "title": "NASA Mars Claim",
+                        "url": "https://factcheck.org/mars-claim"
+                    }
+                ]
+            }
+        ]
+    }
+    
+    # Mock both the Google Fact Check API call and search_websites
+    with patch('httpx.AsyncClient.get') as mock_get:
+        mock_get.return_value.status_code = 200
+        mock_get.return_value.json.return_value = mock_fact_check_response
+        
+        response = client.post("/check-facts", json={"query": test_query})
+        
+        assert response.status_code == 200
+        data = response.json()
+        assert data["claim"] == test_query
+        assert data["verdict"] in [v.value for v in VerdictEnum]
+        assert data["confidence"] in [c.value for c in ConfidenceEnum]
+        assert len(data["sources"]) >= 1
+        assert all(isinstance(source, dict) for source in data["sources"])
+        assert all("url" in source and "name" in source for source in data["sources"])
+
+def test_check_facts_api_error():
+    """Test the endpoint's handling of API errors."""
+    test_query = "Test query for API error"
+    
+    # Mock API error scenario
+    with patch('httpx.AsyncClient.get') as mock_get:
+        mock_get.side_effect = Exception("API Error")
+        
+        response = client.post("/check-facts", json={"query": test_query})
+        
+        assert response.status_code == 200  # We return 200 with UnverifiedFactCheckResponse
+        data = response.json()
+        assert data["verdict"] == VerdictEnum.UNVERIFIED
+        assert data["confidence"] == ConfidenceEnum.LOW
+        assert data["sources"] == []
+        assert "error" in data["evidence"].lower()
+
+def test_check_facts_missing_api_keys():
+    """Test the endpoint's handling of missing API keys."""
+    test_query = "Test query without API keys"
+    
+    # Mock missing API keys
+    with patch('app.api.fact_check.GOOGLE_API_KEY', None):
+        response = client.post("/check-facts", json={"query": test_query})
+        
+        assert response.status_code == 200  # We return 200 with UnverifiedFactCheckResponse
+        data = response.json()
+        assert data["verdict"] == VerdictEnum.UNVERIFIED
+        assert "configuration" in data["evidence"].lower()
+        assert data["sources"] == []
+
+def test_check_facts_rate_limit():
+    """Test the endpoint's handling of rate limiting."""
+    test_query = "Test query for rate limit"
+    
+    # Mock rate limit response
+    with patch('httpx.AsyncClient.get') as mock_get:
+        mock_get.return_value.status_code = 429
+        
+        response = client.post("/check-facts", json={"query": test_query})
+        
+        assert response.status_code == 200  # We return 200 with UnverifiedFactCheckResponse
+        data = response.json()
+        assert data["verdict"] == VerdictEnum.UNVERIFIED
+        assert data["confidence"] == ConfidenceEnum.LOW
+        assert data["sources"] == []
+
+def test_check_facts_empty_query():
+    """Test the endpoint with an empty query."""
+    response = client.post("/check-facts", json={"query": ""})
+    assert response.status_code == 422  # Validation error
+
+def test_check_facts_long_query():
+    """Test the endpoint with a query that exceeds maximum length."""
+    long_query = "a" * 501  # Create a string longer than max_length (500)
+    response = client.post("/check-facts", json={"query": long_query})
+    assert response.status_code == 422  # Validation error
+
+def test_check_facts_malformed_json():
+    """Test the endpoint with malformed JSON."""
+    response = client.post(
+        "/check-facts",
+        data="this is not json",
+        headers={"Content-Type": "application/json"}
+    )
+    assert response.status_code == 422  # JSON validation error
+
+if __name__ == "__main__":
+    import pytest
+    pytest.main([__file__, "-v"])
--- a/tests/test_main.py
+++ b/tests/test_main.py
@ -0,0 +1,18 @@
+from fastapi.testclient import TestClient
+from main import app
+
+client = TestClient(app)
+
+def test_root_endpoint():
+    response = client.get("/")
+    assert response.status_code == 200
+    assert response.json() == {"message": "Welcome to your FastAPI application"}
+
+def test_health_endpoint():
+    response = client.get("/health")
+    assert response.status_code == 200
+    assert response.json() == {"status": "healthy"}
+
+def test_cors_headers():
+    response = client.get("/", headers={"Origin": "http://localhost:5173"})
+    assert response.headers["access-control-allow-origin"] == "*"
Author	SHA1	Message	Date
Utsho Dey	a754fdc756	temp	2024-12-18 17:35:00 +06:00
Utsho Dey	56335cbfa7	fixed pipeline error	2024-12-18 13:16:48 +06:00
Utsho Dey	15a0061a0d	fixed response	2024-12-18 13:10:03 +06:00
Utsho Dey	9be0343695	added curl command	2024-12-17 18:51:05 +06:00
Utsho Dey	f32745326b	added .env	2024-12-17 18:46:38 +06:00
Utsho Dey	b79c746e15	added .env	2024-12-17 18:43:42 +06:00
Utsho Dey	019e07e1b9	added cicd modified	2024-12-17 18:34:04 +06:00
Utsho Dey	9c15f7a59c	added cicd	2024-12-17 18:27:37 +06:00
Utsho Dey	954c01432b	added cicd	2024-12-17 18:24:39 +06:00
Utsho Dey	49c9c9c92d	added cicd	2024-12-17 18:23:13 +06:00
Utsho Dey	d59f5c884e	content fact checked is functional	2024-12-17 18:05:50 +06:00