base code added

2024-12-12 17:31:44 +06:00 · 2024-12-12 17:31:44 +06:00 · 1a1a713e0f
commit 1a1a713e0f
parent 83a886960b
10 changed files with 656 additions and 48 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,4 @@
 env
 .env
 test.py
-/__pycache__/
+__pycache__
--- a/app/pycache/config.cpython-312.pyc
+++ b/app/pycache/config.cpython-312.pyc
--- a/app/api/pycache/fact_check.cpython-312.pyc
+++ b/app/api/pycache/fact_check.cpython-312.pyc
--- a/app/api/ai_fact_check.py
+++ b/app/api/ai_fact_check.py
@ -0,0 +1,112 @@
+from fastapi import APIRouter, HTTPException
+from app.services.openai_client import OpenAIClient, AIFactChecker
+from app.config import OPENAI_API_KEY
+from app.models.fact_check_models import (
+    AIFactCheckRequest,
+    AIFactCheckResponse,
+    VerificationResult,
+    TokenUsage,
+    ErrorResponse
+)
+from urllib.parse import urlparse
+import asyncio
+
+# Initialize router and OpenAI client
+aifact_check_router = APIRouter()
+openai_client = OpenAIClient(api_key=OPENAI_API_KEY)
+fact_checker = AIFactChecker(openai_client=openai_client)
+
+@aifact_check_router.post(
+    "/aicheck-facts",
+    response_model=AIFactCheckResponse,
+    responses={
+        400: {"model": ErrorResponse},
+        500: {"model": ErrorResponse}
+    }
+)
+async def ai_fact_check(request: AIFactCheckRequest):
+    """
+    Endpoint to fact-check a given statement based on multiple webpage URLs.
+    Input:
+        - urls: List of webpage URLs to analyze (with or without http/https)
+        - content: The fact statement to verify
+    Response:
+        - JSON response with verification results per URL, sources, and token usage
+    """
+    try:
+        results = {}
+        all_sources = set()
+        all_contexts = []
+        total_prompt_tokens = 0
+        total_completion_tokens = 0
+        total_tokens = 0
+        
+        # Process all URLs concurrently
+        tasks = [
+            fact_checker.check_fact(url=url, query=request.content)
+            for url in request.urls
+        ]
+        fact_check_results = await asyncio.gather(*tasks, return_exceptions=True)
+        
+        # Process results
+        for url, result in zip(request.urls, fact_check_results):
+            if isinstance(result, Exception):
+                # Handle failed URL checks
+                results[url] = VerificationResult(
+                    verdict="Error",
+                    confidence="Low",
+                    evidence=f"Error checking URL: {str(result)}",
+                    reasoning="URL processing failed",
+                    missing_info="Could not access or process the URL"
+                )
+                continue
+                
+            verification_result = VerificationResult(
+                verdict=result["verification_result"]["verdict"],
+                confidence=result["verification_result"]["confidence"],
+                evidence=result["verification_result"]["evidence"],
+                reasoning=result["verification_result"]["reasoning"],
+                missing_info=result["verification_result"].get("missing_info", None)
+            )
+            
+            results[url] = verification_result
+            all_sources.update(result["sources"])
+            all_contexts.extend(result["context_used"])
+            
+            # Accumulate token usage
+            total_prompt_tokens += result["token_usage"]["prompt_tokens"]
+            total_completion_tokens += result["token_usage"]["completion_tokens"]
+            total_tokens += result["token_usage"]["total_tokens"]
+
+        token_usage = TokenUsage(
+            prompt_tokens=total_prompt_tokens,
+            completion_tokens=total_completion_tokens,
+            total_tokens=total_tokens
+        )
+
+        return AIFactCheckResponse(
+            query=request.content,
+            verification_result=results,
+            sources=list(all_sources),
+            context_used=all_contexts,
+            token_usage=token_usage
+        )
+
+    except ValueError as e:
+        raise HTTPException(
+            status_code=400,
+            detail=ErrorResponse(
+                detail=str(e),
+                error_code="INVALID_URL",
+                path="/aicheck-facts"
+            ).dict()
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=ErrorResponse(
+                detail=f"Error processing fact-check request: {str(e)}",
+                error_code="PROCESSING_ERROR",
+                path="/aicheck-facts"
+            ).dict()
+        )
--- a/app/api/fact_check.py
+++ b/app/api/fact_check.py
@ -1,11 +1,15 @@
 from fastapi import APIRouter, HTTPException
 import json
 from datetime import datetime
-from typing import Dict
+from typing import Dict, List

 from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL
 from app.models.fact_check_models import (
-    FactCheckResponse, FactCheckRequest, Claim, ErrorResponse
+    GoogleFactCheckRequest as FactCheckRequest,
+    GoogleFactCheckResponse as FactCheckResponse,
+    Claim,
+    ErrorResponse,
+    TokenUsage
 )
 from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources

@ -32,6 +36,7 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
    Check facts using multiple fact-checking sources
    """
    all_results = []
+    verified_results = []
    
    # Validate configuration
    if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL:
@ -46,6 +51,8 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
    
    # Get all sources in priority order
    all_sources = get_all_sources()
+    all_sources_list = []  # To store source URLs
+    contexts_used = []     # To store context snippets
    
    for source in all_sources:
        try:
@ -58,11 +65,17 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
            
            if "claims" in result:
                # Validate each claim through Pydantic
-                validated_claims = [
-                    Claim(**claim).dict()
-                    for claim in result["claims"]
-                ]
-                all_results.extend(validated_claims)
+                for claim in result["claims"]:
+                    validated_claim = Claim(**claim).dict()
+                    all_results.append(validated_claim)
+                    
+                    # Extract source and context information
+                    if "claimReview" in validated_claim:
+                        review = validated_claim["claimReview"][0]
+                        if "publisher" in review and "site" in review["publisher"]:
+                            all_sources_list.append(review["publisher"]["site"])
+                        if "textualRating" in review:
+                            contexts_used.append(review["textualRating"])
                
        except HTTPException:
            raise
@ -81,14 +94,33 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse:
            ).dict()
        )

-    # Create the response using Pydantic model
+    # Prepare the verification result
+    verification_result = {
+        "verdict": "Insufficient Information",  # Default verdict
+        "confidence": "Low",
+        "evidence": contexts_used,
+        "reasoning": "Based on available fact checks",
+        "missing_info": "Additional verification may be needed"
+    }
+
+    # Create token usage information
+    token_usage = TokenUsage(
+        prompt_tokens=0,
+        completion_tokens=0,
+        total_tokens=0
+    )
+
+    # Create the response using Pydantic model with all required fields
    response = FactCheckResponse(
        query=request.content,
        total_claims_found=len(all_results),
        results=all_results,
+        verification_result=verification_result,
+        sources=list(set(all_sources_list)),
+        context_used=contexts_used,
+        token_usage=token_usage,
        summary={
-            "total_sources": len(set(claim.get("claimReview", [{}])[0].get("publisher", {}).get("site", "") 
-                                 for claim in all_results if claim.get("claimReview"))),
+            "total_sources": len(set(all_sources_list)),
            "fact_checking_sites_queried": len(all_sources)
        }
    )
--- a/app/api/scrap_websites.py
+++ b/app/api/scrap_websites.py
@ -0,0 +1,160 @@
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import List, Dict
+import requests
+from bs4 import BeautifulSoup
+import urllib.parse
+import numpy as np
+from app.services.openai_client import OpenAIClient
+from app.config import OPENAI_API_KEY
+
+scrap_websites_router = APIRouter()
+
+class SearchRequest(BaseModel):
+    search_text: str
+    site_domains: List[str]
+
+class UrlSimilarityInfo(BaseModel):
+    url: str
+    similarity: float
+    extracted_text: str
+
+class SearchResponse(BaseModel):
+    results: Dict[str, List[str]]
+    error_messages: Dict[str, str]
+    url_similarities: Dict[str, List[UrlSimilarityInfo]]
+
+def extract_url_text(url: str) -> str:
+    """Extract and process meaningful text from URL path with improved cleaning"""
+    try:
+        # Parse the URL and get the path
+        parsed = urllib.parse.urlparse(url)
+        path = parsed.path
+        
+        # Remove common URL parts and file extensions
+        path = path.replace('.html', '').replace('/index', '').replace('.php', '')
+        
+        # Split path into segments
+        segments = [seg for seg in path.split('/') if seg]
+        
+        # Remove dates and numbers
+        cleaned_segments = []
+        for segment in segments:
+            # Replace hyphens and underscores with spaces
+            segment = segment.replace('-', ' ').replace('_', ' ')
+            
+            # Filter out segments that are just dates or numbers
+            if not (segment.replace(' ', '').isdigit() or 
+                   all(part.isdigit() for part in segment.split() if part)):
+                cleaned_segments.append(segment)
+        
+        # Remove very common words that don't add meaning
+        common_words = {
+            'www', 'live', 'news', 'intl', 'index', 'world', 'us', 'uk', 
+            'updates', 'update', 'latest', 'breaking', 'new', 'article'
+        }
+        
+        # Join segments and split into words
+        text = ' '.join(cleaned_segments)
+        words = [word.lower() for word in text.split() 
+                if word.lower() not in common_words and len(word) > 1]
+        
+        return ' '.join(words)
+    except Exception:
+        return ''
+
+def google_search_scraper(search_text: str, site_domain: str) -> List[str]:
+    query = f"{search_text} \"site:{site_domain}\""
+    encoded_query = urllib.parse.quote(query)
+    base_url = "https://www.google.com/search"
+    url = f"{base_url}?q={encoded_query}"
+    
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+    }
+    
+    try:
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        
+        soup = BeautifulSoup(response.content, 'html.parser')
+        search_results = soup.find_all('div', class_='g')
+        
+        urls = []
+        for result in search_results[:5]:
+            link = result.find('a')
+            if link and 'href' in link.attrs:
+                url = link['href']
+                if url.startswith('http'):
+                    urls.append(url)
+                    
+        return urls[:5]
+        
+    except requests.RequestException as e:
+        raise HTTPException(status_code=500, detail=f"Error scraping {site_domain}: {str(e)}")
+
+def calculate_similarity(query_embedding: List[float], url_embedding: List[float]) -> float:
+    query_array = np.array(query_embedding)
+    url_array = np.array(url_embedding)
+    
+    similarity = np.dot(url_array, query_array) / (
+        np.linalg.norm(url_array) * np.linalg.norm(query_array)
+    )
+    return float(similarity)
+
+@scrap_websites_router.post("/search", response_model=SearchResponse)
+async def search_websites(request: SearchRequest):
+    results = {}
+    error_messages = {}
+    url_similarities = {}
+    
+    # Initialize OpenAI client
+    openai_client = OpenAIClient(OPENAI_API_KEY)
+    
+    # Enhance search text with key terms
+    search_context = request.search_text
+    query_embedding = openai_client.get_embeddings([search_context])[0]
+    
+    # Higher similarity threshold for better filtering
+    SIMILARITY_THRESHOLD = 0.75
+    
+    for domain in request.site_domains:
+        try:
+            urls = google_search_scraper(request.search_text, domain)
+            url_sims = []
+            valid_urls = []
+            
+            for url in urls:
+                url_text = extract_url_text(url)
+                
+                # Skip URLs with no meaningful text extracted
+                if not url_text:
+                    continue
+                    
+                url_embedding = openai_client.get_embeddings([url_text])[0]
+                similarity = calculate_similarity(query_embedding, url_embedding)
+                
+                url_sims.append(UrlSimilarityInfo(
+                    url=url,
+                    similarity=similarity,
+                    extracted_text=url_text
+                ))
+                
+                if similarity >= SIMILARITY_THRESHOLD:
+                    valid_urls.append(url)
+            
+            results[domain] = valid_urls
+            url_similarities[domain] = sorted(url_sims, 
+                                           key=lambda x: x.similarity, 
+                                           reverse=True)
+                
+        except HTTPException as e:
+            error_messages[domain] = str(e.detail)
+        except Exception as e:
+            error_messages[domain] = f"Unexpected error for {domain}: {str(e)}"
+    
+    return SearchResponse(
+        results=results,
+        error_messages=error_messages,
+        url_similarities=url_similarities
+    )
--- a/app/models/pycache/fact_check_models.cpython-312.pyc
+++ b/app/models/pycache/fact_check_models.cpython-312.pyc
--- a/app/models/fact_check_models.py
+++ b/app/models/fact_check_models.py
@ -1,7 +1,14 @@
 from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Any, Union
 from enum import Enum
 from datetime import datetime
+from urllib.parse import urlparse
+
+# Common Models
+class TokenUsage(BaseModel):
+    prompt_tokens: Optional[int] = 0
+    completion_tokens: Optional[int] = 0
+    total_tokens: Optional[int] = 0

 class ErrorResponse(BaseModel):
    detail: str
@ -18,11 +25,7 @@ class ErrorResponse(BaseModel):
        }
    })

-class RequestValidationError(BaseModel):
-    loc: List[str]
-    msg: str
-    type: str
-
+# Fact Check Models
 class Publisher(BaseModel):
    name: str
    site: Optional[str] = Field(None, description="Publisher's website")
@ -47,11 +50,116 @@ class Claim(BaseModel):
    claimDate: Optional[str] = None
    claimReview: List[ClaimReview]

-class FactCheckResponse(BaseModel):
-    query: str = Field(..., description="Original query that was fact-checked")
-    total_claims_found: int = Field(..., ge=0)
-    results: List[Claim] = Field(default_factory=list)
-    summary: Dict[str, int] = Field(...)
+class SourceType(str, Enum):
+    FACT_CHECKER = "fact_checker"
+    NEWS_SITE = "news_site"
+
+class FactCheckSource(BaseModel):
+    domain: str
+    type: SourceType
+    priority: int = Field(default=1, ge=1, le=10)
+
+# Verification Models
+class VerificationResult(BaseModel):
+    verdict: str = Field(..., description="True/False/Insufficient Information")
+    confidence: str = Field(..., description="High/Medium/Low")
+    evidence: Union[str, List[str]]
+    reasoning: str
+    missing_info: Optional[str] = None
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "verdict": "True",
+            "confidence": "High",
+            "evidence": ["Direct quote from source supporting the claim"],
+            "reasoning": "Detailed analysis of why the claim is considered true",
+            "missing_info": "Any caveats or limitations of the verification"
+        }
+    })
+
+# Request Models
+class BaseFactCheckRequest(BaseModel):
+    content: str = Field(
+        ...,
+        min_length=10,
+        max_length=1000,
+        description="The claim to be fact-checked"
+    )
+    
+    @validator('content')
+    def validate_content(cls, v):
+        if not v.strip():
+            raise ValueError("Content cannot be empty or just whitespace")
+        return v.strip()
+
+class GoogleFactCheckRequest(BaseFactCheckRequest):
+    language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
+    max_results_per_source: int = Field(default=10, ge=1, le=50)
+
+class AIFactCheckRequest(BaseFactCheckRequest):
+    urls: List[str] = Field(
+        ...,
+        min_items=1,
+        max_items=5,
+        description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing"
+    )
+    
+    @validator('urls')
+    def validate_urls(cls, urls):
+        validated_urls = []
+        for url in urls:
+            if not url.strip():
+                raise ValueError("URL cannot be empty")
+            
+            # Add https:// if no protocol specified
+            if not url.startswith(('http://', 'https://')):
+                url = f'https://{url}'
+            
+            try:
+                result = urlparse(url)
+                if not result.netloc:
+                    raise ValueError(f"Invalid URL structure for {url}")
+                validated_urls.append(url)
+            except Exception as e:
+                raise ValueError(f"Invalid URL {url}: {str(e)}")
+        
+        return validated_urls
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "content": "Indian flag was drawn in BUET campus",
+            "urls": [
+                "www.altnews.in/article-about-flag",
+                "www.another-source.com/related-news"
+            ]
+        }
+    })
+
+# Response Models
+class BaseFactCheckResponse(BaseModel):
+    query: str
+    token_usage: TokenUsage
+    sources: List[str]
+    context_used: List[str]
+
+    model_config = ConfigDict(json_schema_extra={
+        "example": {
+            "query": "Example statement to verify",
+            "token_usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
+            "sources": ["source1.com", "source2.com"],
+            "context_used": ["Relevant context from sources"]
+        }
+    })
+
+class GoogleFactCheckResponse(BaseFactCheckResponse):
+    total_claims_found: int
+    results: List[Dict[str, Any]]
+    verification_result: Dict[str, Any]
+    summary: Dict[str, int]

    model_config = ConfigDict(json_schema_extra={
        "example": {
@ -68,6 +176,19 @@ class FactCheckResponse(BaseModel):
                    "textualRating": "True"
                }]
            }],
+            "verification_result": {
+                "verdict": "True",
+                "confidence": "High",
+                "evidence": ["Supporting evidence"],
+                "reasoning": "Detailed analysis"
+            },
+            "sources": ["factchecker.com"],
+            "context_used": ["Relevant context"],
+            "token_usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
            "summary": {
                "total_sources": 1,
                "fact_checking_sites_queried": 10
@ -75,35 +196,41 @@ class FactCheckResponse(BaseModel):
        }
    })

-class SourceType(str, Enum):
-    FACT_CHECKER = "fact_checker"
-    NEWS_SITE = "news_site"
-
-class FactCheckSource(BaseModel):
-    domain: str
-    type: SourceType
-    priority: int = Field(default=1, ge=1, le=10)
+class AIFactCheckResponse(BaseFactCheckResponse):
+    verification_result: Dict[str, VerificationResult]  # Changed to Dict to store results per URL

    model_config = ConfigDict(json_schema_extra={
        "example": {
-            "domain": "factcheck.org",
-            "type": "fact_checker",
-            "priority": 1
+            "query": "Indian flag was drawn in BUET campus",
+            "verification_result": {
+                "https://www.source1.com": {
+                    "verdict": "True",
+                    "confidence": "High",
+                    "evidence": ["Supporting evidence from source 1"],
+                    "reasoning": "Detailed analysis from source 1",
+                    "missing_info": None
+                },
+                "https://www.source2.com": {
+                    "verdict": "True",
+                    "confidence": "Medium",
+                    "evidence": ["Supporting evidence from source 2"],
+                    "reasoning": "Analysis from source 2",
+                    "missing_info": "Additional context needed"
+                }
+            },
+            "sources": ["source1.com", "source2.com"],
+            "context_used": [
+                "Context from source 1",
+                "Context from source 2"
+            ],
+            "token_usage": {
+                "prompt_tokens": 200,
+                "completion_tokens": 100,
+                "total_tokens": 300
+            }
        }
    })

-class FactCheckRequest(BaseModel):
-    content: str = Field(
-        ...,
-        min_length=10,
-        max_length=1000,
-        description="The claim to be fact-checked"
-    )
-    language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$")
-    max_results_per_source: int = Field(default=10, ge=1, le=50)
-
-    @validator('content')
-    def validate_content(cls, v):
-        if not v.strip():
-            raise ValueError("Content cannot be empty or just whitespace")
-        return v.strip()
+# Backwards compatibility aliases
+FactCheckRequest = GoogleFactCheckRequest
+FactCheckResponse = GoogleFactCheckResponse
--- a/app/services/openai_client.py
+++ b/app/services/openai_client.py
@ -0,0 +1,173 @@
+from langchain_community.document_loaders import AsyncHtmlLoader
+from langchain_community.document_transformers import BeautifulSoupTransformer
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
+from typing import List, Dict, Any
+import numpy as np
+import logging as logger
+import openai
+import json
+
+class OpenAIClient:
+    def __init__(self, api_key: str):
+        """
+        Initialize OpenAI client with the provided API key.
+        """
+        openai.api_key = api_key
+
+    async def generate_text_response(self, system_prompt: str, user_prompt: str, max_tokens: int) -> dict:
+        """
+        Generate a response using OpenAI's chat completion API.
+        """
+        try:
+            response = openai.ChatCompletion.create(
+                model="gpt-4",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                max_tokens=max_tokens
+            )
+            content = response['choices'][0]['message']['content']
+            # Parse the JSON string into a dictionary
+            parsed_content = json.loads(content)
+            
+            return {
+                "response": parsed_content,  # Now returns a dictionary instead of string
+                "prompt_tokens": response['usage']['prompt_tokens'],
+                "completion_tokens": response['usage']['completion_tokens'],
+                "total_tokens": response['usage']['total_tokens']
+            }
+        except json.JSONDecodeError as e:
+            raise Exception(f"Failed to parse OpenAI response as JSON: {str(e)}")
+        except Exception as e:
+            raise Exception(f"OpenAI text generation error: {str(e)}")
+
+    def get_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """
+        Retrieve embeddings for a list of texts using OpenAI's embedding API.
+        """
+        try:
+            response = openai.Embedding.create(
+                input=texts,
+                model="text-embedding-ada-002"
+            )
+            embeddings = [data['embedding'] for data in response['data']]
+            return embeddings
+        except Exception as e:
+            raise Exception(f"OpenAI embedding error: {str(e)}")
+
+class AIFactChecker:
+    def __init__(self, openai_client: OpenAIClient):
+        """Initialize the fact checker with OpenAI client."""
+        self.openai_client = openai_client
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len,
+            separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
+        )
+        
+    async def scrape_webpage(self, url: str) -> List[Document]:
+        """Scrape webpage content using LangChain's AsyncHtmlLoader."""
+        try:
+            loader = AsyncHtmlLoader([url])
+            docs = await loader.aload()
+            
+            bs_transformer = BeautifulSoupTransformer()
+            docs_transformed = bs_transformer.transform_documents(docs)
+            docs_chunks = self.text_splitter.split_documents(docs_transformed)
+            
+            logger.info(f"Successfully scraped webpage | chunks={len(docs_chunks)}")
+            return docs_chunks
+            
+        except Exception as e:
+            logger.error(f"Error scraping webpage | url={url} | error={str(e)}")
+            raise
+
+    def find_relevant_chunks(
+        self, 
+        query_embedding: List[float], 
+        doc_embeddings: List[List[float]], 
+        docs: List[Document]
+    ) -> List[Document]:
+        """Find most relevant document chunks using cosine similarity."""
+        try:
+            query_array = np.array(query_embedding)
+            chunks_array = np.array(doc_embeddings)
+            
+            similarities = np.dot(chunks_array, query_array) / (
+                np.linalg.norm(chunks_array, axis=1) * np.linalg.norm(query_array)
+            )
+            
+            top_indices = np.argsort(similarities)[-5:][::-1]
+            return [docs[i] for i in top_indices]
+            
+        except Exception as e:
+            logger.error(f"Error finding relevant chunks | error={str(e)}")
+            raise
+
+    async def verify_fact(self, query: str, relevant_docs: List[Document]) -> Dict[str, Any]:
+        """Verify fact using OpenAI's API with context from relevant documents."""
+        try:
+            context = "\n\n".join([doc.page_content for doc in relevant_docs])
+            
+            system_prompt = """You are a professional fact-checking assistant. Analyze the provided context 
+            and determine if the given statement is true, false, or if there isn't enough information.
+            
+            Provide your response in the following JSON format:
+            {
+                "verdict": "True/False/Insufficient Information",
+                "confidence": "High/Medium/Low",
+                "evidence": "Direct quotes or evidence from the context",
+                "reasoning": "Your detailed analysis and reasoning",
+                "missing_info": "Any important missing information (if applicable)"
+            }"""
+            
+            user_prompt = f"""Context:
+            {context}
+            
+            Statement to verify: "{query}"
+            
+            Analyze the statement based on the provided context and return your response in the specified JSON format."""
+            
+            response = await self.openai_client.generate_text_response(
+                system_prompt=system_prompt,
+                user_prompt=user_prompt,
+                max_tokens=800
+            )
+            
+            sources = list(set([doc.metadata.get('source', 'Unknown source') for doc in relevant_docs]))
+            
+            return {
+                "verification_result": response["response"],  # This is now a dictionary
+                "sources": sources,
+                "context_used": [doc.page_content for doc in relevant_docs],
+                "token_usage": {
+                    "prompt_tokens": response["prompt_tokens"],
+                    "completion_tokens": response["completion_tokens"],
+                    "total_tokens": response["total_tokens"]
+                }
+            }
+            
+        except Exception as e:
+            logger.error(f"Error verifying fact | error={str(e)}")
+            raise
+
+    async def check_fact(self, url: str, query: str) -> Dict[str, Any]:
+        """Main method to check a fact against a webpage."""
+        try:
+            docs = await self.scrape_webpage(url)
+            
+            doc_texts = [doc.page_content for doc in docs]
+            doc_embeddings = self.openai_client.get_embeddings(doc_texts)
+            query_embedding = self.openai_client.get_embeddings([query])
+            
+            relevant_docs = self.find_relevant_chunks(query_embedding[0], doc_embeddings, docs)
+            verification_result = await self.verify_fact(query, relevant_docs)
+            
+            return verification_result
+            
+        except Exception as e:
+            logger.error(f"Error checking fact | error={str(e)}")
+            raise
--- a/main.py
+++ b/main.py
@ -1,6 +1,8 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from app.api.fact_check import fact_check_router
+from app.api.ai_fact_check import aifact_check_router
+from app.api.scrap_websites import scrap_websites_router
 from app.config import FRONTEND_URL

 # Initialize FastAPI app
@ -39,6 +41,8 @@ async def health_check():
    return {"status": "healthy"}

 app.include_router(fact_check_router, prefix="")
+app.include_router(aifact_check_router, prefix="")
+app.include_router(scrap_websites_router, prefix="")

 # Include routers (uncomment and modify as needed)
 # from routes import some_router