From 49c9c9c92d8f9a4cd7bf7dc977828d4e9e255017 Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Tue, 17 Dec 2024 18:23:13 +0600 Subject: [PATCH 01/10] added cicd --- .flake8 | 4 +++ .gitignore | 42 ++++++++++++++++++++++++++-- .gitlab-ci.yml | 70 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 26 +++++++++++++++++ tests/test_main.py | 18 ++++++++++++ 5 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 .flake8 create mode 100644 .gitlab-ci.yml create mode 100644 tests/test_main.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..3bb0c7e --- /dev/null +++ b/.flake8 @@ -0,0 +1,4 @@ +[flake8] +max-line-length = 100 +exclude = .git,__pycache__,dist,*.egg-info,venv +extend-ignore = E203 \ No newline at end of file diff --git a/.gitignore b/.gitignore index cd4609c..e3da943 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,42 @@ -env +# Environment +env/ .env +venv/ +ENV/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +.Python +*.so +.pytest_cache/ +.coverage +.coverage.* +coverage.xml +*.cover +htmlcov/ + +# IDEs and editors +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Project specific test.py -__pycache__ \ No newline at end of file +*.log +.pip-cache/ + +# Temporary files +*.tmp +.DS_Store + +# Distribution / packaging +dist/ +build/ +*.egg-info/ + +# Docker +.docker/ \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..b546ea9 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,70 @@ +image: python:3.10-slim + +variables: + PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache" + PYTHONPATH: "$CI_PROJECT_DIR" + +cache: + paths: + - .pip-cache + - venv/ + +stages: + - setup + - lint + - test + +before_script: + - python --version + - pip install virtualenv + - virtualenv venv + - source venv/bin/activate + +setup: + stage: setup + script: + - pip install --no-cache-dir -r requirements.txt + artifacts: + paths: + - venv/ + expire_in: 1 hour + +lint: + stage: lint + needs: + - setup + script: + - black --check app/ main.py tests/ + - flake8 app/ main.py tests/ --max-line-length=100 + +test: + stage: test + needs: + - setup + script: + # Run all tests + - pytest tests/ -v + # Start FastAPI server + - uvicorn main:app --host 0.0.0.0 --port 8000 & + # Wait for server to start + - sleep 10 + # Test health endpoint + - | + RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/health) + if [ "$RESPONSE" = "200" ]; then + echo "✅ Health check passed" + else + echo "❌ Health check failed with status $RESPONSE" + exit 1 + fi + +build: + stage: build + needs: + - test + - lint + script: + - docker build -t fact-check-api . + only: + - master + - dev \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f7bc893..459c6e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ annotated-types==0.7.0 anyio==4.7.0 attrs==24.3.0 beautifulsoup4==4.12.3 +black==24.10.0 certifi==2024.12.14 charset-normalizer==3.4.0 click==8.1.7 @@ -13,14 +14,20 @@ dataclasses-json==0.6.7 dnspython==2.7.0 email_validator==2.2.0 fastapi==0.115.6 +fastapi-cli==0.0.7 +flake8==7.1.1 frozenlist==1.5.0 greenlet==3.1.1 gunicorn==23.0.0 h11==0.14.0 httpcore==1.0.7 +httptools==0.6.4 httpx==0.28.1 httpx-sse==0.4.0 idna==3.10 +iniconfig==2.0.0 +itsdangerous==2.2.0 +Jinja2==3.1.4 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.3.12 @@ -28,17 +35,29 @@ langchain-community==0.3.12 langchain-core==0.3.25 langchain-text-splitters==0.3.3 langsmith==0.2.3 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 marshmallow==3.23.1 +mccabe==0.7.0 +mdurl==0.1.2 multidict==6.1.0 mypy-extensions==1.0.0 numpy==2.2.0 openai==0.28.0 orjson==3.10.12 packaging==24.2 +pathspec==0.12.1 +platformdirs==4.3.6 +pluggy==1.5.0 propcache==0.2.1 +pycodestyle==2.12.1 pydantic==2.10.3 +pydantic-extra-types==2.10.1 pydantic-settings==2.7.0 pydantic_core==2.27.1 +pyflakes==3.2.0 +Pygments==2.18.0 +pytest==8.3.4 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 python-json-logger==3.2.1 @@ -46,6 +65,9 @@ python-multipart==0.0.20 PyYAML==6.0.2 requests==2.32.3 requests-toolbelt==1.0.0 +rich==13.9.4 +rich-toolkit==0.12.0 +shellingham==1.5.4 six==1.17.0 sniffio==1.3.1 soupsieve==2.6 @@ -53,9 +75,13 @@ SQLAlchemy==2.0.36 starlette==0.41.3 tenacity==9.0.0 tqdm==4.67.1 +typer==0.15.1 typing-inspect==0.9.0 typing_extensions==4.12.2 ujson==5.10.0 urllib3==2.2.3 uvicorn==0.34.0 +uvloop==0.21.0 +watchfiles==1.0.3 +websockets==14.1 yarl==1.18.3 diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..2298bfc --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,18 @@ +from fastapi.testclient import TestClient +from main import app + +client = TestClient(app) + +def test_root_endpoint(): + response = client.get("/") + assert response.status_code == 200 + assert response.json() == {"message": "Welcome to your FastAPI application"} + +def test_health_endpoint(): + response = client.get("/health") + assert response.status_code == 200 + assert response.json() == {"status": "healthy"} + +def test_cors_headers(): + response = client.get("/", headers={"Origin": "http://localhost:5173"}) + assert response.headers["access-control-allow-origin"] == "http://localhost:5173" \ No newline at end of file -- 2.45.3 From 954c01432b8459d4bf96b9eaf046d17009ce3dbd Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Tue, 17 Dec 2024 18:24:39 +0600 Subject: [PATCH 02/10] added cicd --- .gitlab-ci.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b546ea9..676a881 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -57,14 +57,3 @@ test: echo "❌ Health check failed with status $RESPONSE" exit 1 fi - -build: - stage: build - needs: - - test - - lint - script: - - docker build -t fact-check-api . - only: - - master - - dev \ No newline at end of file -- 2.45.3 From 9c15f7a59c6f611641ff0450e00bb4db8983ff5e Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Tue, 17 Dec 2024 18:27:37 +0600 Subject: [PATCH 03/10] added cicd --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 459c6e9..1fbe5de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,7 +42,7 @@ mccabe==0.7.0 mdurl==0.1.2 multidict==6.1.0 mypy-extensions==1.0.0 -numpy==2.2.0 +numpy==1.26.4 openai==0.28.0 orjson==3.10.12 packaging==24.2 -- 2.45.3 From 019e07e1b911f3f2f81cc1901448a96f83475204 Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Tue, 17 Dec 2024 18:34:04 +0600 Subject: [PATCH 04/10] added cicd modified --- .gitlab-ci.yml | 11 +- app/__pycache__/config.cpython-312.pyc | Bin 646 -> 646 bytes .../__pycache__/fact_check.cpython-312.pyc | Bin 8116 -> 8105 bytes app/api/ai_fact_check.py | 38 ++- app/api/fact_check.py | 82 +++--- app/api/scrap_websites.py | 94 ++++--- app/config.py | 4 +- .../fact_check_models.cpython-312.pyc | Bin 4380 -> 4372 bytes app/models/ai_fact_check_models.py | 251 ++++++++++-------- app/models/fact_check_models.py | 40 +-- app/models/scrap_websites_models.py | 10 +- app/services/openai_client.py | 116 ++++---- .../fact_checker_website.cpython-312.pyc | Bin 4885 -> 4880 bytes app/websites/fact_checker_website.py | 250 ++++++++--------- main.py | 10 +- tests/test_main.py | 5 +- 16 files changed, 481 insertions(+), 430 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 676a881..2f4fa69 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,7 +11,6 @@ cache: stages: - setup - - lint - test before_script: @@ -29,14 +28,6 @@ setup: - venv/ expire_in: 1 hour -lint: - stage: lint - needs: - - setup - script: - - black --check app/ main.py tests/ - - flake8 app/ main.py tests/ --max-line-length=100 - test: stage: test needs: @@ -47,7 +38,7 @@ test: # Start FastAPI server - uvicorn main:app --host 0.0.0.0 --port 8000 & # Wait for server to start - - sleep 10 + - sleep 15 # Test health endpoint - | RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/health) diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index b086fe1a5fd129666635b64227baa2bfcfa3fb8f..74c6db23679571ee99553ec76248c59aa42279ef 100644 GIT binary patch delta 39 tcmZo;ZDZv<&CAQh00fEoiRn2Tc{P}LWaZ~a&5XLB;c`*db+RLqD*(X}3cUaT delta 39 tcmZo;ZDZv<&CAQh00g@u64SFb@@g>g$jHt2n(1{x-T9)7%Vb9;R{+Kj3fKSu diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index b5709d28204fc20b85cd04dc7acb2b256391067d..f784c29a418da8adfc21cffadd4400a7b7cf543c 100644 GIT binary patch delta 471 zcmdmDztW!XG%qg~0}v$TC#GA;ZscPV5EKG(i=CMm7+x^^Xkhrjk~UdYphsdu^ofuU z%nVX?pBb2>xV|$miEwpreBe-;d|JTBgGbPOg4-Q&#RY0>l&^^EePCly*S)T0eo@VQ z2ip}j`_9otP9+_nyOo51?v@dn!0~}?a+38eMs2N|HbKR)m=$b>Nd_y98Z=NG-nQZk9cLJZOlj6ZG&d;a(U*7Wm>&*VNC ze_mg>2|&v?zmlK#$Z73FRB& z(jV9vr0hO3ut;%zXJ8TG>frdmqdfVffRPukp!o#1JK~B9)Yd3p5!ZX5p?6)w@}h?2 z4z(*9jx*CvNL&btxDXh5!98jQ^9>30>k@hwCG>76tKCr1`pC*`AoZ1jSyK872ZNN# zWM@GaHff-FA7m!?3d(cX0`2+v#es42X2A~}Y;_>V*-Tz36)I`O%J+c*NGwQuBCmH@ z-rxd<%mXp4PYhC%#ido$1bHQID5@`)S}1i#MDK>Q;td&PpyQQvfR0xZ0yA^J#L0I@Rtu z0j%lgm!QdQGXA{5a1(&GZ+;|W!Nk}(Sz6vElqH*yk!wNn2L=$m!v#WLa1Hwm7Wfdw z%)lgkol*QEqxcNI3+5gd7{xC$dVXMXWMt&KAg}+40mS$WROH3@5iD4g2n;#^wo;a# diff --git a/app/api/ai_fact_check.py b/app/api/ai_fact_check.py index 6d1f2d7..c848b1b 100644 --- a/app/api/ai_fact_check.py +++ b/app/api/ai_fact_check.py @@ -6,7 +6,7 @@ from app.models.ai_fact_check_models import ( AIFactCheckResponse, VerificationResult, TokenUsage, - ErrorResponse + ErrorResponse, ) from urllib.parse import urlparse import asyncio @@ -16,13 +16,11 @@ aifact_check_router = APIRouter() openai_client = OpenAIClient(api_key=OPENAI_API_KEY) fact_checker = AIFactChecker(openai_client=openai_client) + @aifact_check_router.post( "/aicheck-facts", response_model=AIFactCheckResponse, - responses={ - 400: {"model": ErrorResponse}, - 500: {"model": ErrorResponse} - } + responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}}, ) async def ai_fact_check(request: AIFactCheckRequest): """ @@ -40,14 +38,14 @@ async def ai_fact_check(request: AIFactCheckRequest): total_prompt_tokens = 0 total_completion_tokens = 0 total_tokens = 0 - + # Process all URLs concurrently tasks = [ fact_checker.check_fact(url=url, query=request.content) for url in request.urls ] fact_check_results = await asyncio.gather(*tasks, return_exceptions=True) - + # Process results for url, result in zip(request.urls, fact_check_results): if isinstance(result, Exception): @@ -57,21 +55,21 @@ async def ai_fact_check(request: AIFactCheckRequest): confidence="Low", evidence=f"Error checking URL: {str(result)}", reasoning="URL processing failed", - missing_info="Could not access or process the URL" + missing_info="Could not access or process the URL", ) continue - + verification_result = VerificationResult( verdict=result["verification_result"]["verdict"], confidence=result["verification_result"]["confidence"], evidence=result["verification_result"]["evidence"], reasoning=result["verification_result"]["reasoning"], - missing_info=result["verification_result"].get("missing_info", None) + missing_info=result["verification_result"].get("missing_info", None), ) - + results[url] = verification_result all_sources.update(result["sources"]) - + # Accumulate token usage total_prompt_tokens += result["token_usage"]["prompt_tokens"] total_completion_tokens += result["token_usage"]["completion_tokens"] @@ -80,24 +78,22 @@ async def ai_fact_check(request: AIFactCheckRequest): token_usage = TokenUsage( prompt_tokens=total_prompt_tokens, completion_tokens=total_completion_tokens, - total_tokens=total_tokens + total_tokens=total_tokens, ) return AIFactCheckResponse( query=request.content, verification_result=results, sources=list(all_sources), - token_usage=token_usage + token_usage=token_usage, ) except ValueError as e: raise HTTPException( status_code=400, detail=ErrorResponse( - detail=str(e), - error_code="INVALID_URL", - path="/aicheck-facts" - ).dict() + detail=str(e), error_code="INVALID_URL", path="/aicheck-facts" + ).dict(), ) except Exception as e: raise HTTPException( @@ -105,6 +101,6 @@ async def ai_fact_check(request: AIFactCheckRequest): detail=ErrorResponse( detail=f"Error processing fact-check request: {str(e)}", error_code="PROCESSING_ERROR", - path="/aicheck-facts" - ).dict() - ) \ No newline at end of file + path="/aicheck-facts", + ).dict(), + ) diff --git a/app/api/fact_check.py b/app/api/fact_check.py index b52ef24..ab4cd9f 100644 --- a/app/api/fact_check.py +++ b/app/api/fact_check.py @@ -4,16 +4,17 @@ from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KE from app.api.scrap_websites import search_websites, SearchRequest from app.services.openai_client import OpenAIClient from app.models.fact_check_models import ( - FactCheckRequest, - FactCheckResponse, + FactCheckRequest, + FactCheckResponse, ErrorResponse, - Source + Source, ) from app.websites.fact_checker_website import get_all_sources fact_check_router = APIRouter() openai_client = OpenAIClient(OPENAI_API_KEY) + async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse: """Generate a fact check report using OpenAI based on the fact check results.""" try: @@ -55,7 +56,7 @@ Ensure all URLs in sources are complete (including https:// if missing) and each 2. Specify verification dates when available 3. Name the fact-checking organizations involved 4. Describe the verification process""" - + else: system_prompt = base_system_prompt user_prompt = f"""Query: {query} @@ -70,37 +71,34 @@ Ensure all URLs in sources are complete (including https:// if missing) and each 4. Note any conflicting information between sources""" response = await openai_client.generate_text_response( - system_prompt=system_prompt, - user_prompt=user_prompt, - max_tokens=1000 + system_prompt=system_prompt, user_prompt=user_prompt, max_tokens=1000 ) - + try: # First try to parse the response directly response_data = response["response"] - + # Clean up sources before validation - if isinstance(response_data.get('sources'), list): + if isinstance(response_data.get("sources"), list): cleaned_sources = [] - for source in response_data['sources']: + for source in response_data["sources"]: if isinstance(source, str): # Convert string sources to Source objects - url = source if source.startswith('http') else f"https://{source}" - cleaned_sources.append({ - "url": url, - "name": source - }) + url = ( + source if source.startswith("http") else f"https://{source}" + ) + cleaned_sources.append({"url": url, "name": source}) elif isinstance(source, dict): # Ensure URL has proper scheme - url = source.get('url', '') - if url and not url.startswith('http'): - source['url'] = f"https://{url}" + url = source.get("url", "") + if url and not url.startswith("http"): + source["url"] = f"https://{url}" cleaned_sources.append(source) - response_data['sources'] = cleaned_sources - + response_data["sources"] = cleaned_sources + fact_check_response = FactCheckResponse(**response_data) return fact_check_response - + except Exception as validation_error: print(f"Response validation error: {str(validation_error)}") raise HTTPException( @@ -108,10 +106,10 @@ Ensure all URLs in sources are complete (including https:// if missing) and each detail=ErrorResponse( detail=f"Invalid response format: {str(validation_error)}", error_code="VALIDATION_ERROR", - path="/check-facts" - ).dict() + path="/check-facts", + ).dict(), ) - + except Exception as e: print(f"Error generating fact report: {str(e)}") raise HTTPException( @@ -119,10 +117,11 @@ Ensure all URLs in sources are complete (including https:// if missing) and each detail=ErrorResponse( detail="Error generating fact report", error_code="FACT_CHECK_ERROR", - path="/check-facts" - ).dict() + path="/check-facts", + ).dict(), ) + @fact_check_router.post("/check-facts", response_model=FactCheckResponse) async def check_facts(request: FactCheckRequest): """ @@ -134,52 +133,49 @@ async def check_facts(request: FactCheckRequest): detail=ErrorResponse( detail="Google API key or base URL is not configured", error_code="CONFIGURATION_ERROR", - path="/check-facts" - ).dict() + path="/check-facts", + ).dict(), ) headers = {"Content-Type": "application/json"} async with httpx.AsyncClient() as client: # Get fact checker sources from the centralized configuration fact_checker_sources = get_all_sources() - + for source in fact_checker_sources: params = { "key": GOOGLE_API_KEY, "query": request.query, "languageCode": "en-US", "reviewPublisherSiteFilter": source.domain, - "pageSize": 10 + "pageSize": 10, } try: response = await client.get( - GOOGLE_FACT_CHECK_BASE_URL, - params=params, - headers=headers + GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers ) response.raise_for_status() json_response = response.json() if json_response.get("claims"): return await generate_fact_report(request.query, json_response) - + except httpx.RequestError as e: print(f"Error fetching results for site {source.domain}: {str(e)}") continue except Exception as e: print(f"Unexpected error for site {source.domain}: {str(e)}") continue - + try: search_request = SearchRequest( - search_text=request.query, - source_types=["fact_checkers"] + search_text=request.query, source_types=["fact_checkers"] ) - + ai_response = await search_websites(search_request) return await generate_fact_report(request.query, ai_response) - + except Exception as e: print(f"Error in AI fact check: {str(e)}") raise HTTPException( @@ -187,6 +183,6 @@ async def check_facts(request: FactCheckRequest): detail=ErrorResponse( detail="No fact check results found", error_code="NOT_FOUND", - path="/check-facts" - ).dict() - ) \ No newline at end of file + path="/check-facts", + ).dict(), + ) diff --git a/app/api/scrap_websites.py b/app/api/scrap_websites.py index 946ec01..f685158 100644 --- a/app/api/scrap_websites.py +++ b/app/api/scrap_websites.py @@ -7,7 +7,7 @@ from pydantic import BaseModel from app.models.ai_fact_check_models import ( AIFactCheckRequest, FactCheckSource, - SourceType + SourceType, ) from app.websites.fact_checker_website import SOURCES, get_all_sources from app.api.ai_fact_check import ai_fact_check @@ -18,10 +18,10 @@ class SearchRequest(BaseModel): search_text: str source_types: List[str] = ["fact_checkers"] + # Configure logging logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) @@ -38,51 +38,58 @@ def get_domain_from_url(url: str) -> str: try: parsed = urlparse(url) domain = parsed.netloc.lower() - if domain.startswith('www.'): + if domain.startswith("www."): domain = domain[4:] return domain except Exception as e: logger.error(f"Error extracting domain from URL {url}: {str(e)}") return "" + def is_valid_source_domain(domain: str, sources: List[FactCheckSource]) -> bool: """Check if domain matches any source with improved matching logic.""" if not domain: return False - + domain = domain.lower() - if domain.startswith('www.'): + if domain.startswith("www."): domain = domain[4:] - + for source in sources: source_domain = source.domain.lower() - if source_domain.startswith('www.'): + if source_domain.startswith("www."): source_domain = source_domain[4:] - - if domain == source_domain or domain.endswith('.' + source_domain): + + if domain == source_domain or domain.endswith("." + source_domain): return True - + return False -async def build_enhanced_search_query(query: str, sources: List[FactCheckSource]) -> str: + +async def build_enhanced_search_query( + query: str, sources: List[FactCheckSource] +) -> str: """Build search query with site restrictions.""" site_queries = [f"site:{source.domain}" for source in sources] site_restriction = " OR ".join(site_queries) return f"({query}) ({site_restriction})" -async def google_custom_search(query: str, sources: List[FactCheckSource], page: int = 1) -> Optional[Dict]: + +async def google_custom_search( + query: str, sources: List[FactCheckSource], page: int = 1 +) -> Optional[Dict]: """Perform Google Custom Search with enhanced query.""" enhanced_query = await build_enhanced_search_query(query, sources) start_index = ((page - 1) * RESULTS_PER_PAGE) + 1 - + params = { "key": GOOGLE_API_KEY, "cx": GOOGLE_ENGINE_ID, "q": enhanced_query, "num": RESULTS_PER_PAGE, - "start": start_index + "start": start_index, } - + async with httpx.AsyncClient(timeout=30.0) as client: try: response = await client.get(GOOGLE_SEARCH_URL, params=params) @@ -92,69 +99,70 @@ async def google_custom_search(query: str, sources: List[FactCheckSource], page: logger.error(f"Search error: {str(e)}") raise HTTPException(status_code=500, detail=f"Search error: {str(e)}") + @scrap_websites_router.post("/search") async def search_websites(request: SearchRequest): # Get the source types from the request source_types = request.source_types if request.source_types else ["fact_checkers"] - + # Get sources based on requested types selected_sources = [] for source_type in source_types: if source_type in SOURCES: selected_sources.extend(SOURCES[source_type]) - + # If no valid sources found, use fact checkers as default if not selected_sources: selected_sources = SOURCES["fact_checkers"] - + all_urls = [] domain_results = {} - + try: for page in range(1, MAX_PAGES + 1): if len(all_urls) >= 50: break - - search_response = await google_custom_search(request.search_text, selected_sources, page) - + + search_response = await google_custom_search( + request.search_text, selected_sources, page + ) + if not search_response or not search_response.get("items"): break - + for item in search_response.get("items", []): url = item.get("link") if not url: continue - + domain = get_domain_from_url(url) - + if is_valid_source_domain(domain, selected_sources): if domain not in domain_results: domain_results[domain] = [] - + if len(domain_results[domain]) < MAX_URLS_PER_DOMAIN: - domain_results[domain].append({ - "url": url, - "title": item.get("title", ""), - "snippet": item.get("snippet", "") - }) + domain_results[domain].append( + { + "url": url, + "title": item.get("title", ""), + "snippet": item.get("snippet", ""), + } + ) all_urls.append(url) - + if len(all_urls) >= 50: break - + if not all_urls: - return { - "status": "no_results", - "urls_found": 0 - } - + return {"status": "no_results", "urls_found": 0} + fact_check_request = AIFactCheckRequest( - content=request.search_text, - urls=all_urls[:5] + content=request.search_text, urls=all_urls[:5] ) - + return await ai_fact_check(fact_check_request) except Exception as e: logger.error(f"Error during search/fact-check process: {str(e)}") - raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file + raise HTTPException(status_code=500, detail=str(e)) diff --git a/app/config.py b/app/config.py index b890247..6e7437c 100644 --- a/app/config.py +++ b/app/config.py @@ -4,9 +4,9 @@ from dotenv import load_dotenv load_dotenv() GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] -GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] +GOOGLE_FACT_CHECK_BASE_URL = os.environ["GOOGLE_FACT_CHECK_BASE_URL"] GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"] GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] -FRONTEND_URL = os.environ["FRONTEND_URL"] \ No newline at end of file +FRONTEND_URL = os.environ["FRONTEND_URL"] diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc index 7cb8e9acf2b369f7da11e0dc68daf6904a7d2842..1e810e2e116619d2f9073244b85d2e74028286a7 100644 GIT binary patch delta 232 zcmbQEG)0N;G%qg~0}v$TC#JvU*~k~g#K=B5lW8v_-)42@az;jx>8h7#Wo&Yq1qG z%59##KZJz19b8k-hSSMkNkKY7*N7YJW4^0^}I+u+_2eVN7grlQ&n4(#KZGg+4R8k;ImSMklsKY7*tZ*Xwm5YhX>$}Gdy;NB7anSq&=?IQ<+k~&zz z6eyv@*5EaP4<;e21d1{R=3uFdIuF-(ChER1p!lqZyY zVE{6|@Gvt9ObDK#@`VA&_`=1`$Tp$!3j>h)BE-ok*b(`K0Z4rjvu5O)!TE&&NPVed SWM(v;;Q1A(!MI2g=vV-u2|$nl diff --git a/app/models/ai_fact_check_models.py b/app/models/ai_fact_check_models.py index 0949e51..525b1cb 100644 --- a/app/models/ai_fact_check_models.py +++ b/app/models/ai_fact_check_models.py @@ -4,38 +4,46 @@ from enum import Enum from datetime import datetime from urllib.parse import urlparse + # Common Models class TokenUsage(BaseModel): prompt_tokens: Optional[int] = 0 completion_tokens: Optional[int] = 0 total_tokens: Optional[int] = 0 + class ErrorResponse(BaseModel): detail: str error_code: str = Field(..., description="Unique error code for this type of error") timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) - path: Optional[str] = Field(None, description="The endpoint path where error occurred") + path: Optional[str] = Field( + None, description="The endpoint path where error occurred" + ) - model_config = ConfigDict(json_schema_extra={ - "example": { - "detail": "Error description", - "error_code": "ERROR_CODE", - "timestamp": "2024-12-09T16:49:30.905765", - "path": "/check-facts" + model_config = ConfigDict( + json_schema_extra={ + "example": { + "detail": "Error description", + "error_code": "ERROR_CODE", + "timestamp": "2024-12-09T16:49:30.905765", + "path": "/check-facts", + } } - }) + ) + # Fact Check Models class Publisher(BaseModel): name: str site: Optional[str] = Field(None, description="Publisher's website") - - @validator('site') + + @validator("site") def validate_site(cls, v): - if v and not (v.startswith('http://') or v.startswith('https://')): + if v and not (v.startswith("http://") or v.startswith("https://")): return f"https://{v}" return v + class ClaimReview(BaseModel): publisher: Publisher url: Optional[HttpUrl] = None @@ -44,21 +52,25 @@ class ClaimReview(BaseModel): textualRating: Optional[str] = None languageCode: str = Field(default="en-US") + class Claim(BaseModel): text: str claimant: Optional[str] = None claimDate: Optional[str] = None claimReview: List[ClaimReview] + class SourceType(str, Enum): FACT_CHECKER = "fact_checker" NEWS_SITE = "news_site" + class FactCheckSource(BaseModel): domain: str type: SourceType priority: int = Field(default=1, ge=1, le=10) + # Verification Models class VerificationResult(BaseModel): verdict: str = Field(..., description="True/False/Insufficient Information") @@ -67,54 +79,56 @@ class VerificationResult(BaseModel): reasoning: str missing_info: Optional[str] = None - model_config = ConfigDict(json_schema_extra={ - "example": { - "verdict": "True", - "confidence": "High", - "evidence": ["Direct quote from source supporting the claim"], - "reasoning": "Detailed analysis of why the claim is considered true", - "missing_info": "Any caveats or limitations of the verification" + model_config = ConfigDict( + json_schema_extra={ + "example": { + "verdict": "True", + "confidence": "High", + "evidence": ["Direct quote from source supporting the claim"], + "reasoning": "Detailed analysis of why the claim is considered true", + "missing_info": "Any caveats or limitations of the verification", + } } - }) + ) + # Request Models class BaseFactCheckRequest(BaseModel): content: str = Field( - ..., - min_length=10, - max_length=1000, - description="The claim to be fact-checked" + ..., min_length=10, max_length=1000, description="The claim to be fact-checked" ) - - @validator('content') + + @validator("content") def validate_content(cls, v): if not v.strip(): raise ValueError("Content cannot be empty or just whitespace") return v.strip() + class GoogleFactCheckRequest(BaseFactCheckRequest): language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") max_results_per_source: int = Field(default=10, ge=1, le=50) + class AIFactCheckRequest(BaseFactCheckRequest): urls: List[str] = Field( ..., min_items=1, max_items=5, - description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing" + description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing", ) - - @validator('urls') + + @validator("urls") def validate_urls(cls, urls): validated_urls = [] for url in urls: if not url.strip(): raise ValueError("URL cannot be empty") - + # Add https:// if no protocol specified - if not url.startswith(('http://', 'https://')): - url = f'https://{url}' - + if not url.startswith(("http://", "https://")): + url = f"https://{url}" + try: result = urlparse(url) if not result.netloc: @@ -122,18 +136,21 @@ class AIFactCheckRequest(BaseFactCheckRequest): validated_urls.append(url) except Exception as e: raise ValueError(f"Invalid URL {url}: {str(e)}") - + return validated_urls - model_config = ConfigDict(json_schema_extra={ - "example": { - "content": "Indian flag was drawn in BUET campus", - "urls": [ - "www.altnews.in/article-about-flag", - "www.another-source.com/related-news" - ] + model_config = ConfigDict( + json_schema_extra={ + "example": { + "content": "Indian flag was drawn in BUET campus", + "urls": [ + "www.altnews.in/article-about-flag", + "www.another-source.com/related-news", + ], + } } - }) + ) + # Response Models class BaseFactCheckResponse(BaseModel): @@ -141,17 +158,20 @@ class BaseFactCheckResponse(BaseModel): token_usage: TokenUsage sources: List[str] - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Example statement to verify", - "token_usage": { - "prompt_tokens": 100, - "completion_tokens": 50, - "total_tokens": 150 - }, - "sources": ["source1.com", "source2.com"], + model_config = ConfigDict( + json_schema_extra={ + "example": { + "query": "Example statement to verify", + "token_usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + }, + "sources": ["source1.com", "source2.com"], + } } - }) + ) + class GoogleFactCheckResponse(BaseFactCheckResponse): total_claims_found: int @@ -159,71 +179,80 @@ class GoogleFactCheckResponse(BaseFactCheckResponse): verification_result: Dict[str, Any] summary: Dict[str, int] - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Example claim", - "total_claims_found": 1, - "results": [{ - "text": "Example claim text", - "claimant": "Source name", - "claimReview": [{ - "publisher": { - "name": "Fact Checker", - "site": "factchecker.com" - }, - "textualRating": "True" - }] - }], - "verification_result": { - "verdict": "True", - "confidence": "High", - "evidence": ["Supporting evidence"], - "reasoning": "Detailed analysis" - }, - "sources": ["factchecker.com"], - "token_usage": { - "prompt_tokens": 100, - "completion_tokens": 50, - "total_tokens": 150 - }, - "summary": { - "total_sources": 1, - "fact_checking_sites_queried": 10 - } - } - }) - -class AIFactCheckResponse(BaseFactCheckResponse): - verification_result: Dict[str, VerificationResult] # Changed to Dict to store results per URL - - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Indian flag was drawn in BUET campus", - "verification_result": { - "https://www.source1.com": { + model_config = ConfigDict( + json_schema_extra={ + "example": { + "query": "Example claim", + "total_claims_found": 1, + "results": [ + { + "text": "Example claim text", + "claimant": "Source name", + "claimReview": [ + { + "publisher": { + "name": "Fact Checker", + "site": "factchecker.com", + }, + "textualRating": "True", + } + ], + } + ], + "verification_result": { "verdict": "True", "confidence": "High", - "evidence": ["Supporting evidence from source 1"], - "reasoning": "Detailed analysis from source 1", - "missing_info": None + "evidence": ["Supporting evidence"], + "reasoning": "Detailed analysis", }, - "https://www.source2.com": { - "verdict": "True", - "confidence": "Medium", - "evidence": ["Supporting evidence from source 2"], - "reasoning": "Analysis from source 2", - "missing_info": "Additional context needed" - } - }, - "sources": ["source1.com", "source2.com"], - "token_usage": { - "prompt_tokens": 200, - "completion_tokens": 100, - "total_tokens": 300 + "sources": ["factchecker.com"], + "token_usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + }, + "summary": {"total_sources": 1, "fact_checking_sites_queried": 10}, } } - }) + ) + + +class AIFactCheckResponse(BaseFactCheckResponse): + verification_result: Dict[ + str, VerificationResult + ] # Changed to Dict to store results per URL + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "query": "Indian flag was drawn in BUET campus", + "verification_result": { + "https://www.source1.com": { + "verdict": "True", + "confidence": "High", + "evidence": ["Supporting evidence from source 1"], + "reasoning": "Detailed analysis from source 1", + "missing_info": None, + }, + "https://www.source2.com": { + "verdict": "True", + "confidence": "Medium", + "evidence": ["Supporting evidence from source 2"], + "reasoning": "Analysis from source 2", + "missing_info": "Additional context needed", + }, + }, + "sources": ["source1.com", "source2.com"], + "token_usage": { + "prompt_tokens": 200, + "completion_tokens": 100, + "total_tokens": 300, + }, + } + } + ) + # Backwards compatibility aliases FactCheckRequest = GoogleFactCheckRequest -FactCheckResponse = GoogleFactCheckResponse \ No newline at end of file +FactCheckResponse = GoogleFactCheckResponse diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py index 1b30511..3ab4a8c 100644 --- a/app/models/fact_check_models.py +++ b/app/models/fact_check_models.py @@ -3,74 +3,73 @@ from typing import List, Literal, Union from datetime import datetime from enum import Enum + class VerdictEnum(str, Enum): TRUE = "True" FALSE = "False" PARTIALLY_TRUE = "Partially True" UNVERIFIED = "Unverified" + class ConfidenceEnum(str, Enum): HIGH = "High" MEDIUM = "Medium" LOW = "Low" + class FactCheckRequest(BaseModel): query: str = Field( ..., min_length=3, max_length=500, description="The claim or statement to be fact-checked", - example="Did NASA confirm finding alien structures on Mars in 2024?" + example="Did NASA confirm finding alien structures on Mars in 2024?", ) + class Source(BaseModel): url: str name: str = "" - @validator('url') + @validator("url") def validate_url(cls, v): # Basic URL validation without requiring HTTP/HTTPS if not v or len(v) < 3: raise ValueError("URL must not be empty and must be at least 3 characters") return v + class FactCheckResponse(BaseModel): claim: str = Field( ..., min_length=10, max_length=1000, - description="The exact claim being verified" - ) - verdict: VerdictEnum = Field( - ..., - description="The verification verdict" + description="The exact claim being verified", ) + verdict: VerdictEnum = Field(..., description="The verification verdict") confidence: ConfidenceEnum = Field( - ..., - description="Confidence level in the verdict" + ..., description="Confidence level in the verdict" ) sources: List[Source] = Field( - ..., - min_items=1, - description="List of sources used in verification" + ..., min_items=1, description="List of sources used in verification" ) evidence: str = Field( ..., min_length=20, max_length=500, - description="Concise summary of key evidence" + description="Concise summary of key evidence", ) explanation: str = Field( ..., min_length=50, max_length=1000, - description="Detailed explanation of verification findings" + description="Detailed explanation of verification findings", ) additional_context: str = Field( ..., min_length=20, max_length=500, - description="Important context about the verification" + description="Important context about the verification", ) class Config: @@ -82,20 +81,21 @@ class FactCheckResponse(BaseModel): "sources": [ { "url": "https://www.nasa.gov/mars-exploration", - "name": "NASA Mars Exploration" + "name": "NASA Mars Exploration", }, { "url": "https://factcheck.org/2024/mars-claims", - "name": "FactCheck.org" - } + "name": "FactCheck.org", + }, ], "evidence": "NASA has made no such announcement. Recent Mars rover images show natural rock formations.", "explanation": "Multiple fact-checking organizations investigated this claim. NASA's official communications and Mars mission reports from 2024 contain no mention of alien structures. The viral images being shared are misidentified natural geological formations.", - "additional_context": "Similar false claims about alien structures on Mars have circulated periodically since the first Mars rovers began sending back images." + "additional_context": "Similar false claims about alien structures on Mars have circulated periodically since the first Mars rovers began sending back images.", } } + class ErrorResponse(BaseModel): detail: str error_code: str = Field(..., example="VALIDATION_ERROR") - path: str = Field(..., example="/check-facts") \ No newline at end of file + path: str = Field(..., example="/check-facts") diff --git a/app/models/scrap_websites_models.py b/app/models/scrap_websites_models.py index 1c629c5..39dd949 100644 --- a/app/models/scrap_websites_models.py +++ b/app/models/scrap_websites_models.py @@ -1,38 +1,46 @@ from pydantic import BaseModel from typing import List, Dict + class SearchRequest(BaseModel): search_text: str source_types: List[str] = ["fact_checkers"] + class Publisher(BaseModel): name: str site: str + class ClaimReview(BaseModel): publisher: Publisher textualRating: str + class Claim(BaseModel): claimReview: List[ClaimReview] claimant: str text: str + class Summary(BaseModel): fact_checking_sites_queried: int total_sources: int + class TokenUsage(BaseModel): prompt_tokens: int completion_tokens: int total_tokens: int + class VerificationResult(BaseModel): verdict: str confidence: str evidence: List[str] reasoning: str + class EnhancedFactCheckResponse(BaseModel): query: str results: List[Claim] @@ -40,4 +48,4 @@ class EnhancedFactCheckResponse(BaseModel): summary: Summary token_usage: Dict[str, int] total_claims_found: int - verification_result: VerificationResult \ No newline at end of file + verification_result: VerificationResult diff --git a/app/services/openai_client.py b/app/services/openai_client.py index e6d2f76..06e0f46 100644 --- a/app/services/openai_client.py +++ b/app/services/openai_client.py @@ -9,6 +9,7 @@ import json import aiohttp from bs4 import BeautifulSoup + class OpenAIClient: def __init__(self, api_key: str): """ @@ -16,7 +17,9 @@ class OpenAIClient: """ openai.api_key = api_key - async def generate_text_response(self, system_prompt: str, user_prompt: str, max_tokens: int) -> dict: + async def generate_text_response( + self, system_prompt: str, user_prompt: str, max_tokens: int + ) -> dict: """ Generate a response using OpenAI's chat completion API. """ @@ -25,19 +28,19 @@ class OpenAIClient: model="gpt-4", messages=[ {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt} + {"role": "user", "content": user_prompt}, ], - max_tokens=max_tokens + max_tokens=max_tokens, ) - content = response['choices'][0]['message']['content'] + content = response["choices"][0]["message"]["content"] # Parse the JSON string into a dictionary parsed_content = json.loads(content) - + return { "response": parsed_content, # Now returns a dictionary instead of string - "prompt_tokens": response['usage']['prompt_tokens'], - "completion_tokens": response['usage']['completion_tokens'], - "total_tokens": response['usage']['total_tokens'] + "prompt_tokens": response["usage"]["prompt_tokens"], + "completion_tokens": response["usage"]["completion_tokens"], + "total_tokens": response["usage"]["total_tokens"], } except json.JSONDecodeError as e: raise Exception(f"Failed to parse OpenAI response as JSON: {str(e)}") @@ -50,14 +53,14 @@ class OpenAIClient: """ try: response = openai.Embedding.create( - input=texts, - model="text-embedding-ada-002" + input=texts, model="text-embedding-ada-002" ) - embeddings = [data['embedding'] for data in response['data']] + embeddings = [data["embedding"] for data in response["data"]] return embeddings except Exception as e: raise Exception(f"OpenAI embedding error: {str(e)}") + class AIFactChecker: def __init__(self, openai_client: OpenAIClient): """Initialize the fact checker with OpenAI client.""" @@ -66,65 +69,71 @@ class AIFactChecker: chunk_size=1000, chunk_overlap=200, length_function=len, - separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""] + separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""], ) - + async def scrape_webpage(self, url: str) -> List[Document]: """Scrape webpage content without saving HTML files.""" try: async with aiohttp.ClientSession() as session: async with session.get(url) as response: if response.status != 200: - raise Exception(f"Failed to fetch URL: {url}, status: {response.status}") - + raise Exception( + f"Failed to fetch URL: {url}, status: {response.status}" + ) + html_content = await response.text() - + # Parse HTML with BeautifulSoup - soup = BeautifulSoup(html_content, 'html.parser') - + soup = BeautifulSoup(html_content, "html.parser") + # Create a Document with the parsed content doc = Document( - page_content=soup.get_text(separator='\n', strip=True), - metadata={"source": url} + page_content=soup.get_text(separator="\n", strip=True), + metadata={"source": url}, ) - + # Split into chunks docs_chunks = self.text_splitter.split_documents([doc]) - - logger.info(f"Successfully scraped webpage | chunks={len(docs_chunks)}") + + logger.info( + f"Successfully scraped webpage | chunks={len(docs_chunks)}" + ) return docs_chunks - + except Exception as e: logger.error(f"Error scraping webpage | url={url} | error={str(e)}") raise def find_relevant_chunks( - self, - query_embedding: List[float], - doc_embeddings: List[List[float]], - docs: List[Document] + self, + query_embedding: List[float], + doc_embeddings: List[List[float]], + docs: List[Document], ) -> List[Document]: """Find most relevant document chunks using cosine similarity.""" try: query_array = np.array(query_embedding) chunks_array = np.array(doc_embeddings) - + similarities = np.dot(chunks_array, query_array) / ( np.linalg.norm(chunks_array, axis=1) * np.linalg.norm(query_array) ) - + top_indices = np.argsort(similarities)[-5:][::-1] return [docs[i] for i in top_indices] - + except Exception as e: logger.error(f"Error finding relevant chunks | error={str(e)}") raise - async def verify_fact(self, query: str, relevant_docs: List[Document]) -> Dict[str, Any]: + async def verify_fact( + self, query: str, relevant_docs: List[Document] + ) -> Dict[str, Any]: """Verify fact using OpenAI's API with context from relevant documents.""" try: context = "\n\n".join([doc.page_content for doc in relevant_docs]) - + system_prompt = """You are a professional fact-checking assistant. Analyze the provided context and determine if the given statement is true, false, or if there isn't enough information. @@ -136,32 +145,37 @@ class AIFactChecker: "reasoning": "Your detailed analysis and reasoning", "missing_info": "Any important missing information (if applicable)" }""" - + user_prompt = f"""Context: {context} Statement to verify: "{query}" Analyze the statement based on the provided context and return your response in the specified JSON format.""" - + response = await self.openai_client.generate_text_response( - system_prompt=system_prompt, - user_prompt=user_prompt, - max_tokens=800 + system_prompt=system_prompt, user_prompt=user_prompt, max_tokens=800 ) - - sources = list(set([doc.metadata.get('source', 'Unknown source') for doc in relevant_docs])) - + + sources = list( + set( + [ + doc.metadata.get("source", "Unknown source") + for doc in relevant_docs + ] + ) + ) + return { "verification_result": response["response"], # This is now a dictionary "sources": sources, "token_usage": { "prompt_tokens": response["prompt_tokens"], "completion_tokens": response["completion_tokens"], - "total_tokens": response["total_tokens"] - } + "total_tokens": response["total_tokens"], + }, } - + except Exception as e: logger.error(f"Error verifying fact | error={str(e)}") raise @@ -170,16 +184,18 @@ class AIFactChecker: """Main method to check a fact against a webpage.""" try: docs = await self.scrape_webpage(url) - + doc_texts = [doc.page_content for doc in docs] doc_embeddings = self.openai_client.get_embeddings(doc_texts) query_embedding = self.openai_client.get_embeddings([query]) - - relevant_docs = self.find_relevant_chunks(query_embedding[0], doc_embeddings, docs) + + relevant_docs = self.find_relevant_chunks( + query_embedding[0], doc_embeddings, docs + ) verification_result = await self.verify_fact(query, relevant_docs) - + return verification_result - + except Exception as e: logger.error(f"Error checking fact | error={str(e)}") - raise \ No newline at end of file + raise diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc index b0b0fa42365d2d038144e1a28f8284abff9098f8..e4ce169e3a1593c1a2b7f1024be19bf4dd22ec0f 100644 GIT binary patch delta 223 zcmbQLHbIT=G%qg~0}v$TC#I{2Z{$14Cpa0%Eq((eUNHPaI*4qePCeaAp7U1=*d5Y FWB`0dLx2DP delta 252 zcmbQBHdT%9G%qg~0}zBIB&I(S-pF^7Pp}urEq)IqUNHPz%BKG6U5?TVB`AC4Pu2cvN2lD5SfvG qQBv;<1CagmQzB~~qZr%IPjSq`!feIdKzkR Dict: """ Fetch fact checks from a specific site using the Google Fact Check API @@ -156,9 +159,9 @@ async def fetch_fact_checks( "query": query, "languageCode": "en-US", "reviewPublisherSiteFilter": site.domain, - "pageSize": 10 + "pageSize": 10, } - + response = requests.get(base_url, params=params) response.raise_for_status() return response.json() @@ -168,23 +171,22 @@ async def fetch_fact_checks( detail=ErrorResponse( detail=f"Error fetching from {site.domain}: {str(e)}", error_code="FACT_CHECK_SERVICE_ERROR", - path="/check-facts" - ).dict() + path="/check-facts", + ).dict(), ) except ValueError as e: raise HTTPException( status_code=500, detail=ErrorResponse( - detail=str(e), - error_code="CONFIGURATION_ERROR", - path="/check-facts" - ).dict() + detail=str(e), error_code="CONFIGURATION_ERROR", path="/check-facts" + ).dict(), ) + def get_all_sources() -> List[FactCheckSource]: """ Get all sources sorted by priority """ # all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"] - all_sources = SOURCES["fact_checkers"] - return sorted(all_sources, key=lambda x: x.priority) \ No newline at end of file + all_sources = SOURCES["fact_checkers"] + return sorted(all_sources, key=lambda x: x.priority) diff --git a/main.py b/main.py index 25d68c4..7048f3b 100644 --- a/main.py +++ b/main.py @@ -7,9 +7,7 @@ from app.config import FRONTEND_URL # Initialize FastAPI app app = FastAPI( - title="Your API Title", - description="Your API Description", - version="1.0.0" + title="Your API Title", description="Your API Description", version="1.0.0" ) # CORS configuration @@ -30,16 +28,19 @@ app.add_middleware( allow_headers=["*"], ) + # Basic root endpoint @app.get("/") async def root(): return {"message": "Welcome to your FastAPI application"} + # Health check endpoint @app.get("/health") async def health_check(): return {"status": "healthy"} + app.include_router(fact_check_router, prefix="") app.include_router(aifact_check_router, prefix="") app.include_router(scrap_websites_router, prefix="") @@ -50,4 +51,5 @@ app.include_router(scrap_websites_router, prefix="") if __name__ == "__main__": import uvicorn - uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) \ No newline at end of file + + uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) diff --git a/tests/test_main.py b/tests/test_main.py index 2298bfc..e71e19a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -3,16 +3,19 @@ from main import app client = TestClient(app) + def test_root_endpoint(): response = client.get("/") assert response.status_code == 200 assert response.json() == {"message": "Welcome to your FastAPI application"} + def test_health_endpoint(): response = client.get("/health") assert response.status_code == 200 assert response.json() == {"status": "healthy"} + def test_cors_headers(): response = client.get("/", headers={"Origin": "http://localhost:5173"}) - assert response.headers["access-control-allow-origin"] == "http://localhost:5173" \ No newline at end of file + assert response.headers["access-control-allow-origin"] == "http://localhost:5173" -- 2.45.3 From b79c746e1587b46f378bcfd00af1ba87e824c4f8 Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Tue, 17 Dec 2024 18:43:42 +0600 Subject: [PATCH 05/10] added .env --- app/__pycache__/config.cpython-312.pyc | Bin 646 -> 583 bytes app/config.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index 74c6db23679571ee99553ec76248c59aa42279ef..6007d78b61e0918fb5842bbbdab427fb5d083f57 100644 GIT binary patch delta 69 zcmZo;JUM++-o9c=krVB3>X50As5Upa1{> delta 109 zcmX@k(#FbrnwOW00SFTF6Vr1h@=7w^ny9{aqMtZ7ha^xminEeYlXK#WGJ#t>ZbANj zA+CNd@u5LJnp~3;7>${VI44hFbmtNPs$>M>;+)A38RJD8_-^nE_E&XQ&Ct8Zui3~` I#0QiE03x~^xc~qF diff --git a/app/config.py b/app/config.py index 6e7437c..91b2bfe 100644 --- a/app/config.py +++ b/app/config.py @@ -9,4 +9,4 @@ GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"] GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] -FRONTEND_URL = os.environ["FRONTEND_URL"] +# FRONTEND_URL = os.environ["FRONTEND_URL"] -- 2.45.3 From f32745326b09eb862f8f88896a085d71358bd30d Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Tue, 17 Dec 2024 18:46:38 +0600 Subject: [PATCH 06/10] added .env --- app/__pycache__/config.cpython-312.pyc | Bin 583 -> 646 bytes app/config.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index 6007d78b61e0918fb5842bbbdab427fb5d083f57..f94cc0b8b460774ed4b789e858d80b86129ba038 100644 GIT binary patch delta 109 zcmX@k(#FbrnwOW00SMBH64P@g@=7w^ny9{aqMtZ7ha^xminEeYlXK#WGJ#t>ZbANj zA+CNd@u5LJnp~3;7>${VI44hFbmtNPs$>M>;+)A38RJD8_-^nE_E&XQ&Ct8Zui3~` I#0QiE0428^!2kdN delta 69 zcmZo;JUM++-o9c=krVB3>X50As5Upa1{> diff --git a/app/config.py b/app/config.py index 91b2bfe..6e7437c 100644 --- a/app/config.py +++ b/app/config.py @@ -9,4 +9,4 @@ GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"] GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] -# FRONTEND_URL = os.environ["FRONTEND_URL"] +FRONTEND_URL = os.environ["FRONTEND_URL"] -- 2.45.3 From 9be0343695d8370a4bee6d38b45a4430b017f6d0 Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Tue, 17 Dec 2024 18:51:05 +0600 Subject: [PATCH 07/10] added curl command --- .gitlab-ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2f4fa69..61bc1ba 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,6 +14,8 @@ stages: - test before_script: + - apt-get update + - apt-get install -y curl - python --version - pip install virtualenv - virtualenv venv @@ -47,4 +49,4 @@ test: else echo "❌ Health check failed with status $RESPONSE" exit 1 - fi + fi \ No newline at end of file -- 2.45.3 From 15a0061a0d9c556f57ead6812f8405bbd6c52dc5 Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Wed, 18 Dec 2024 13:10:03 +0600 Subject: [PATCH 08/10] fixed response --- .../__pycache__/fact_check.cpython-312.pyc | Bin 8105 -> 9837 bytes app/api/fact_check.py | 112 ++++++++++-------- .../fact_check_models.cpython-312.pyc | Bin 4372 -> 4938 bytes app/models/fact_check_models.py | 54 +++++++-- images-test.jpg | Bin 0 -> 11062 bytes main.py | 1 + 6 files changed, 107 insertions(+), 60 deletions(-) create mode 100644 images-test.jpg diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index f784c29a418da8adfc21cffadd4400a7b7cf543c..a189688628c487b2c9a7384063623286ff4007c8 100644 GIT binary patch literal 9837 zcmcIKTWlQ1kv;p~<-SO+_BoUFdj#~^1#Ls@(K}&$VEwv z8{=Z8F%yU1rl@($3~zJP60?q3S(_zli`mEQF~^uA<{WdfHfz)ss~D?@xyRh>+ZOf2 zyklNgw?}<3|Cm2kIaV2~8mo#`k5$J4V}aPFu}!g>u^I;Bh}OpH#_Bj?A`*8aPu5sH ze0e434WJI6u?Ep8HHt2&Nvx2pQl(TCG1WtR#3s65vWzuL_0xR}u&fY0rLZ@>H%xlB z=#%^rljwiRQkbuG-qcz7d+hEsY0m!BBWDvyRic{b=(%&J2QP=E8C6ciPj+&e^?V#^ zoo3BDeCpKj@xgHb9zQmCL2Jl=5A_e68y`42IB;zIi~Xa6dR$15sIutDSbuKO}_Jfhj3`DG#Gn zmb54{iMS$ZjpyUD5|tyeBtF^c8Kg8p&q{e%+h`(5!;)6C3RegM#mKfu4q=Ekj`tq2+%VgMuNsgK@~I`mDD7Shx1%!yhaUt5q=Nu zTL?KuIM6jG=-Q$w#ZBjB%i!{bXfh+>t*YP3| z1K))D_sy@EziYLUMVphPY~m)|Q;T-B(ioYtZ^)S%gEMNC0cmiO{&12kI#PDA25}uB z7rzG>oh2BZYT$`6-6`jEt@bz{Drca8;k)4wF>w08JK$`O>7Vw4Jeq;Me9?pKTf0K0Qy`=A5hV75T<$xl?AH? z^j)<86mpBf+r=hame|bnw(|41uT&KEQrsewl>JBC_qb7_HXG|KZQV~^{-}efEk@sF zLQgGLsBOl#*rIMS>ZuBY^SB=ZB@ZLKPWP}B>FBrj0JHbWB;Nm0NX z2~h^#%}^sk9lJz>Jb+VP6_iW7tng}r4^v5}MU+%Qj>3FT5>-YA%rXo90Hg&rKFALx z!bt^b6vE*o6~gmfd^jplS%r!a7m=tLiSl7t70_eyi3p$f8$p-rY%(e-y{?`hKN1f| zlOn7l8qN3f;3pM^A}+)vg_m_UFCxqqmv7bl0Fa+kAG)ybWa2Tg8#GM`vl4H_0l+N~J)4@Jm(<|G&rX3#=-~%Q zktP%c^(Cnmm{!yyBRb(lS%lTAd_)Er3-NhaGL3=Sl~RhbF3uI?&*8jcLQ>+}R6ZsN zx(yI2lDyUWzA@ zlT)~T<^)OweGXiHkuunI!ME(1l@X=cfiDlTvU5q-*oZt zv@`nJd-?V+!=44U0Uvc>otqO9nh!&D+Pm~Y`PJaCQ*jC8+{>S%NhvghA`G1ts4Bxo zp4S_T5vVb|$b(W0N9D<>&$GDHpLW8l$gdl>^mt!+gS+`n1!((i4EiTy4;K8AnUuY{Z8^tt)y2=hVJe*C?(*a0B z8A#cOfM-@$SreLryhl~063M98Th?9TaQcrQf5PFE>H*KZPuXbjI-@gESdPq>o4bwN z312o6d=FDQ6i}I|C{sQ3b+d`+tR$8V2FG+nQo>Y5>jnrml7+DQFU$(IdOkuEF@7SU zPL)lX_e9J`8=eLZno&_HqZ`ga&s}%fgutki35>76EupCa9aM1# z$#LdE7$p;uI)^?bPq+L#IQ+mF-lNPbsuUXs)sD@mS_MQA)cA14gv;Y<;u09!lbQp3 zGdp3^c8HNR%ap3lEI6=KdP5;N&zr$0X%=K3-GLB<^7wKUt>HqWaq{M8g{TbLgDjHp zNDlhm+n=xV2@I?#c%8W^h|e&nz+Hz6GSsLLLh51R!ckOzeI?+k!Vru=>H-je$7SgA zn2cHf2&U7_XM+O~cvT8d#bGx`^EfcLLubyMC6yvkh?I<2Ri#OFDw2%Gfl$LHGJg_W zXkj@Aeo_yDaTUyK>6;bS{p{1$T{7Z{b)&%wE{r~X*~vHqNejB%n zo{(|LAUcTg(A!%uh}F!BN;MZWQB|3f)hW#;%m6A;vtYzPJvf82kaE(jGZdoJP7lRn zGsP2{R)qojc)9BOSzC$z(ya);4U0}3U}?HOPD(t%Z9R~Ekf z9T|Ve((zS)PuAVI=5EcnTbCU7+@06I_DlD(g{CyWcbVRE_pZ4QW!#6-htI6K&;GVL zn5}7Cuj^Q=3ufwq>y4djjoq2X?rdE{w*E}cM4JD~4CUr;njZQb0dI~tJlT*1B2d;iAQ?WJM6ufdG+p=XzTKaO1aNTX$ww-CdCr3=XJRf+P)9#iWv9pHTvjX67MIP`#68eETvCE&W^ z$hljb-fYw6rLj!Y&MPN=>#M)kz1FZJ)3D?E$i0Sr|880J^<~>PU-e~Mpe56??>=!< z?!P*;UfZ^`^=8LvZ7{n%xVHV-%=TyB-nzQI|JBIdy7bwvq|aPP5070lXY01d-mg2pUf;1abo0pj^*z~^&96_tI(>c9vguyS?rY{>HMT4%Z@%!x3(Jw!ZGG9` zzGY=uN_QQ+Hng<&)e{9&*Jt0a+n1{ZGICX<>$$6*YjVcF{R3aqwF?>Fw(AFf>D&MI z;@aWUnZu{kXU0|!e|5cfSEhDg`63i)|G;|n=1le8r9~*x?!CDlV8bU5_Yi;med2O@ z*WH!rK|q^oZvw1V z*Sf=#_6Kh&%RM({uTQ1D`&S(Y)_u=pt7_K+t!sg2GJ$8-YqzY`c4lfjvw@oIrqkKF zShn>WfYR>*luj3-w7UMFyb4&|Y3y0gcH^+T+Fm;)d!f zq9bLQM#J{lgQ?&jSQo93d^K&9&@A#SWleFAEQ6>8A4uQo^jVDbZOS&?sP~nY2CW<< zcc%+U!6Jmwzi5Y4;|2*;v5-)sJ!-2lZsRqk>_)ouaq7Zwp%gZWcKj>Az;`K!IWkIK ze$xV%11fqe_G~OPJloEEb#=ApMHjE6h7hCgB(mTsU45C<9zf){hft#AQ4uzzt>r z-bkpHrqD2I&(EU-7eb|H1k4>^A|!wMFlN=Uz&uGQNb$yD!99Q(t+?8KZhl5u@IzE+ zqzXdQ3f!zNSfzOP`O)9wrCFy{GebUuMZB6Dvb>W?VNx0Z*Q3=_$g@aur;`&9OHWC3 z6p~m&auly>9WwwlDlbTI8nev^RDevYeM-X2ghK0(23|jlL{Jd8Ch%&QZD0vQyoBW! zOqh8%|M=kn6N;=~^RQ@@IY+<>6IJA|aWUxE+K&V~0&l#2}?jcjAYi3Xrvev@sper<*OvbQW)G?)-H!BbVYm6y1--b6B7_s1u6L z3ObDSQ&3&~>J>4TbQ?vpxa{gt8L z)^5qx^RG9*+MMOLWxIF(_~Kg^vrR1@R@iF1SB7$apQk?Cw(ZS^HyYLh!R6Uob2&3< z?|MK??NyJ6sj2F!BWEL(^=tmljK6c$zaz_Uf7A1Z=O2AnJs(u_YZ%>bUi!*vb#T3H z@ABkNV?T-Ax_GZ`bggkT6BvEe31l$>KV(#HC4o?`4d}}4sdW0*o1e)xb-aE8g6)>> zbaU{b)!gE}a`K^zwC!B>-s2CiH5|TjY`wnw`s9yeZ^hpJ>b?4rwd#>aBb0T_jMyR=p)tz2!bJdFt2wo6>a$-mdwN=6`K|+joopXUARP z7hB=%P;++GKl*Eb-Dm67O-0CE%lm%X_mjTm(52kvq|AG&oQT?0qA69}2J0>V#0 z9*tk%KF(H;e4Jwhee&=)$HICCtPI2YuA8IFl{db0{b;&s@2Y!WVIA3qtsiXfT5oAv zYI`m6q1C*->O-sTkoN(xH2|+{0T3n>4~fm=dvvCJsh{PJbAbNSM<)vHjIn=EFv0en z{1O+vAzXI-R|Lr58H0< zvSGcK1Dv-HvholIEq6%u@B#as>dixa_B;D+FxQytmtmEeFh4 ze~tsp@Aa|rFk-mNIY)L_@460;Y_+a9xe>Qx#bv>|mxJ~dp9My(R3jxTt=vevZG}fl zR<>HPcL(BJ3EG|~_LV(w&Ty~nwPXFD9bh_n%^nfJ*v!ZjgH{T@lF)NdXf`m9V0|f` z0W=plf}rj-N^Bnf4~15PT`)cfIZ$Dy@Q*Ug8Zo=1H6TR6F_qjy12?5xVMF|#7 z)wte5@joV5x<$9K_-_*wgEp4+(K9`5h`_i6D?tHS!hEZ4Z7WN#Q}&-jbT?vRhO`%J zgHULenFJ(%(0b7j6c6A>n0fouaS(`t+tEXG8rm?(Q+@)W23ZIB z_uEJG_pS5kV_!>ueLj6`J~O!Rfc51-Os48Jt~SHfUh^&|(|sq?r!S@ZE@k#d(_HO6 bE|xPL;(Y1W(0u~$2LK)CKEx&}1EK#5_Rd+J delta 3853 zcmai1Yiu0V6~1?7c4lYaFMFT%jvrZj6F*`n#vySXEPHJ)#5M^@po_rW?2PSQ_TkQq zNeH_b0g8A8lxPqH1w<-URin5q6_v`5{vgDTgZiUuB`wz0EosY-wo=uF7$rO;dhe{q zb^>k3nzQGgd(U^yy>st(uK%^~{NCo@n#~4)o+G*Vu13K+XTCsG#_|@y zGC}ax=csiw-xEaGrgXvvx8#a2D{oJVcVtlSndeF$G=L@?U!OxL01A2mmRBf9I2@@;aWKGh84+huZfn z;EhkBfv=ITlP7dKP@q|mr+JeX^%fXervc=d6r-H>IuDt(m67#YpEiH-tw~VO=NaCN zy$8S}uOJL|69y}*Ul;qW`?TiSlo=z}STtmj$g^3Bx8~Wi;4PwH$m>&9gA5S$wKgs!RI zj9ao(i@?_=L7w?Bd<7PadE*118=Sn_0A!ce-w4F9f=PC3ZQdyxm4DK8d-1ZU5ATxw z>w1-J)bzYbn;*`Dujbuz9`vcyCuLVY_4)?qyxK&bfxNL`ddhSZ9tFq9?}MZ8jwJdE z=1ocXHia=Oy|lY&BOU*ybOLac_Zx4$fGbER8BA>aOT}>%rQvEccY|- zl%Lml-ufoo1?6X_W~ znVsaOWO-WJ-QLb6C%8;fl8`OT#j`vo#1m6%-rQ7N;*R9xDK3t2)$VLOBea$1L^__# zNXMD|Gs3&CS>7ku60;UjdDvb{|KWY82~G*yFVkJc^qXvVs_wl~*11yZ^C@a|qnlffoS>Kp9Qv zM7d-e?CU=`-aj1azps+Uc@VZJk2-f!88bGRu(B_ z7R^Y6Eh0WBqFwoebE~lmtJPR|6r-zUr@AIi9CBQp-nh0DC7Zf5af~^xPO$_9$$~sN z<&f*qZQnzh1HDo|qQbv}TTg>BH34;oGC|jdi+W$#W(&x$sP~i`l^;|+9)9QHD+kLY z@b%sRgwJpj5H7>>`m(;BwOkK0o{KF7wm(0zN&}lu`KGF_%Vh69HMrugI~SU3xaMvv z)1ayKqfqx!sQW$oT4?VZgYO?J-hcSYv0~&Q1nLc-0p4b1tl9^!E2-+9PA>u|`#{T{ zQ|2?tC0p|+^*fg82j0mbQ8WdYouQ&BRPIJOe6hM4*u0+sR{5&hH|Pj12ZAf^#^sv3 zekB$6+;_EmU-7PyqJMNb*s<(wT4gAYEXdjFe1U&y0y$d^$eA@@XPx0|=?GfPXW$S8 zpX&pWE^v{810L$4j>5X3Z_j`aT%!Bz=x5%IN6mX6>Y4Ykkxp{Hq0bO$B^O$CsIkyV zqK=w{E_Mi#3%$LAJ>=yc9nu#eiTuPMilYpBI*2(+dD0UMH*Ih#4`l}Rst-@_Bu~xh z*13qqT*S@Mo4APK^+{NO8!VGD;X&0q!LyKO)fz*r)fhsyDr4R-v|--Y8S+%hfi<&{ zH@&GV=uibBQdP&se77^8{3I?UssXU!>qX0qZRqVbXtilN1bv73}HYhC*Qa0rg zUvSH92nhm$1i`cTPn*WGsIC0S=QeNAX0G}o$^Y7JtjQCA!+gIT_WL>_l=BRUp3P>>nE)j1C<-u!+yrb0BgEOl(wM ztlfon=B3(up-Zs@Uy3AO#nkK{%~i-)Wky^iX40~RYQ#*IFR`Oz2ge7;4vh|kT_T>l zh%c)fv@3Qfe+haLR&2!li|R!#6HjKv7TmCj@uVb-qjED|DHSCuCFQarX4R7Efbhc^ zL6X(VAnw2jZCG?-!D4~$VPT`V7wf~ygIs-Z7gmR`@E}n#W3+WH7xySpjWzEBk|7-z~uXV-JX>V{)#O7xBn0RDm#F!wixUvS~{N|TCuu5vT{pS z?y9x^CTQ0oenjHj#3#YFtIGX#FL!N2O||1wwYM3#+RJqw)?40bXDur=TbBb3Z;!k& z@?NmGE4oS&wU*~cSM`dszOF%Y{0uvOQ4SCR$E~sj1P;PmWsLg8YMb(4eSFpd{P)0H z%g*SnaulleeXv6DD1gacbhXSq@Q(e$;hzo{9bMN;Bli@M-N$1IG5=!;GgH>$S}?F*gL)VV>py7dv!KSM4j9>{zl4Yy z>0JaGbgA2f9Og+FX{F~W9oAWFpEreuwiEN6aA+$%zoQ1}3pO~^LNC}!th=z+0tbiM z=!H5O>tPhXom$wY%2tR??arYl<}%SZ*uq>6(FpW%iwRqHH4fD?ives|tk)sSViSq= ztq?&kwy3fVBP?!bhkJC3clQo=>#lU`kbb2HkG!&%j@p?Ibug+oeMmD{H#1$xA?z

`rW3H~>ya+0X&oFuEeTx>?h8|VchJ_cXvdonqG zCN1=exbC9w2c#27lt~ET$Drkp!1OWLzXbMw4Ep{AcHAU%khlSm_#B#)e{XT`pMubE zE>Q;f_q~D2Z!vQChH5AuCIR962)dV``;4X7JzRwDt1wz7dZDElZ2t_P-y3Miz3@|P J65}ZHe*oSbpGyD$ diff --git a/app/api/fact_check.py b/app/api/fact_check.py index ab4cd9f..4d870a8 100644 --- a/app/api/fact_check.py +++ b/app/api/fact_check.py @@ -1,13 +1,17 @@ from fastapi import APIRouter, HTTPException import httpx +from typing import Union from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY from app.api.scrap_websites import search_websites, SearchRequest from app.services.openai_client import OpenAIClient from app.models.fact_check_models import ( FactCheckRequest, FactCheckResponse, + UnverifiedFactCheckResponse, ErrorResponse, Source, + VerdictEnum, + ConfidenceEnum ) from app.websites.fact_checker_website import get_all_sources @@ -15,7 +19,7 @@ fact_check_router = APIRouter() openai_client = OpenAIClient(OPENAI_API_KEY) -async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse: +async def generate_fact_report(query: str, fact_check_data: dict) -> Union[FactCheckResponse, UnverifiedFactCheckResponse]: """Generate a fact check report using OpenAI based on the fact check results.""" try: base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources. @@ -24,7 +28,24 @@ Rules: 1. Include all source URLs and names in the sources list 2. Keep the explanation focused on verifiable facts 3. Include dates when available -4. Maintain objectivity in the report""" +4. Maintain objectivity in the report +5. If no reliable sources are found, provide a clear explanation why""" + + # If no sources were found, return an unverified response + if not fact_check_data.get("claims") and ( + not fact_check_data.get("urls_found") or + fact_check_data.get("status") == "no_results" or + fact_check_data.get("verification_result", {}).get("no_sources_found") + ): + return UnverifiedFactCheckResponse( + claim=query, + verdict=VerdictEnum.UNVERIFIED, + confidence=ConfidenceEnum.LOW, + sources=[], + evidence="No fact-checking sources have verified this claim yet.", + explanation="Our search across reputable fact-checking websites did not find any formal verification of this claim. This doesn't mean the claim is false - just that it hasn't been formally fact-checked yet.", + additional_context="The claim may be too recent for fact-checkers to have investigated, or it may not have been widely circulated enough to warrant formal fact-checking." + ) base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format: { @@ -40,9 +61,7 @@ Rules: "evidence": "A concise summary of the key evidence (1-2 sentences)", "explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)", "additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)" -} - -Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name.""" +}""" if "claims" in fact_check_data: system_prompt = base_system_prompt @@ -71,75 +90,74 @@ Ensure all URLs in sources are complete (including https:// if missing) and each 4. Note any conflicting information between sources""" response = await openai_client.generate_text_response( - system_prompt=system_prompt, user_prompt=user_prompt, max_tokens=1000 + system_prompt=system_prompt, + user_prompt=user_prompt, + max_tokens=1000 ) try: - # First try to parse the response directly response_data = response["response"] - # Clean up sources before validation if isinstance(response_data.get("sources"), list): cleaned_sources = [] for source in response_data["sources"]: if isinstance(source, str): - # Convert string sources to Source objects - url = ( - source if source.startswith("http") else f"https://{source}" - ) + url = source if source.startswith("http") else f"https://{source}" cleaned_sources.append({"url": url, "name": source}) elif isinstance(source, dict): - # Ensure URL has proper scheme url = source.get("url", "") if url and not url.startswith("http"): source["url"] = f"https://{url}" cleaned_sources.append(source) response_data["sources"] = cleaned_sources - fact_check_response = FactCheckResponse(**response_data) - return fact_check_response + if response_data["verdict"] == "Unverified" or not response_data.get("sources"): + return UnverifiedFactCheckResponse(**response_data) + return FactCheckResponse(**response_data) except Exception as validation_error: print(f"Response validation error: {str(validation_error)}") - raise HTTPException( - status_code=422, - detail=ErrorResponse( - detail=f"Invalid response format: {str(validation_error)}", - error_code="VALIDATION_ERROR", - path="/check-facts", - ).dict(), + return UnverifiedFactCheckResponse( + claim=query, + verdict=VerdictEnum.UNVERIFIED, + confidence=ConfidenceEnum.LOW, + sources=[], + evidence="An error occurred while processing the fact check results.", + explanation="The system encountered an error while validating the fact check results.", + additional_context="This is a technical error and does not reflect on the truthfulness of the claim." ) except Exception as e: print(f"Error generating fact report: {str(e)}") - raise HTTPException( - status_code=500, - detail=ErrorResponse( - detail="Error generating fact report", - error_code="FACT_CHECK_ERROR", - path="/check-facts", - ).dict(), + return UnverifiedFactCheckResponse( + claim=query, + verdict=VerdictEnum.UNVERIFIED, + confidence=ConfidenceEnum.LOW, + sources=[], + evidence="An error occurred while generating the fact check report.", + explanation="The system encountered an unexpected error while processing the fact check request.", + additional_context="This is a technical error and does not reflect on the truthfulness of the claim." ) -@fact_check_router.post("/check-facts", response_model=FactCheckResponse) +@fact_check_router.post("/check-facts", response_model=Union[FactCheckResponse, UnverifiedFactCheckResponse]) async def check_facts(request: FactCheckRequest): """ Fetch fact check results and generate a comprehensive report. """ if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: - raise HTTPException( - status_code=500, - detail=ErrorResponse( - detail="Google API key or base URL is not configured", - error_code="CONFIGURATION_ERROR", - path="/check-facts", - ).dict(), + return UnverifiedFactCheckResponse( + claim=request.query, + verdict=VerdictEnum.UNVERIFIED, + confidence=ConfidenceEnum.LOW, + sources=[], + evidence="The fact-checking service is not properly configured.", + explanation="The system is missing required API configuration for fact-checking services.", + additional_context="This is a temporary system configuration issue." ) headers = {"Content-Type": "application/json"} async with httpx.AsyncClient() as client: - # Get fact checker sources from the centralized configuration fact_checker_sources = get_all_sources() for source in fact_checker_sources: @@ -170,7 +188,8 @@ async def check_facts(request: FactCheckRequest): try: search_request = SearchRequest( - search_text=request.query, source_types=["fact_checkers"] + search_text=request.query, + source_types=["fact_checkers"] ) ai_response = await search_websites(search_request) @@ -178,11 +197,10 @@ async def check_facts(request: FactCheckRequest): except Exception as e: print(f"Error in AI fact check: {str(e)}") - raise HTTPException( - status_code=404, - detail=ErrorResponse( - detail="No fact check results found", - error_code="NOT_FOUND", - path="/check-facts", - ).dict(), - ) + return await generate_fact_report(request.query, { + "status": "no_results", + "verification_result": { + "no_sources_found": True, + "reason": str(e) + } + }) \ No newline at end of file diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc index 1e810e2e116619d2f9073244b85d2e74028286a7..91cf86c610d9265d0dcc28d32913c7e643b06482 100644 GIT binary patch delta 1142 zcmbu7&r1|x7{}jt$C;g(-I@KVyPK}97F$D_RD{3M%nW~2+J+{I73MqYQYy`AupV*+ zA-gGWhYlUQS!jpqTz^0kBzc&Gpe~Y!V9iZvDd>G>CHBy*f%!b&clgdT&yRUj{x+(v zDT<8PwP)NKX>%>ALAym<~E;fO!a;j$csc@D@?l;?0WM?fF}uq1<8XtTIy{GM*ahvPb- zSPTf;Us+#SS}GpI&uF64|MW^BYlyEd4E3F>j?*&lr~)xK7POQ%YgOS+G<9Dgn&^ z1<(R00ki__fGR)-;4+|#&I&$P9Y_FFmv}>eDzz0$iT5>q8)f~ejs^8YLI1~+GSlrf z{dF0h3k#{%`R;}2!+~AI@xqQsN7Pz^b+h z)H#4v?Rij8?KTE=fkCrvN@MZyoAF6~EMd(g*F+l_U>-(0!0O~8s7nB=r%q5^2nEp) zQ-3dMU+q@V*HYL&f{92ZA`{0GdmQ(3NvEdJJ?S~!ln3xKt#&Niren0qdF9Y?&7P~; zs?{2*HP8Ghn$D(J5BtDvb|;|OIrQ0CXojwu#pPz{;k1Avt=NoI@9B)8Srl#y>wbBZvLTwk)$P-*CBk`8#6Bl^CNN>XtA7xv#^YB=>;?+^3&hHFi*)nxB@qiC|UG zM`zt(d5}GZO(*Wsr|y>1BKFMpWe^LQqEaj@X&J;qrWv2W@?$B3SjdcG4xd_R|AAPr GzWoQ&hXy|Y delta 871 zcmYk4%}*0S7{+(H?RIzj#cmO3LUpW3VGXnf!6=Hspoj>F2?sHP+3s$)BeaV<3kki* z4UCtP@t_F@Zzd+5h&TTNz0ivxUbxDEq?M{Sb*5J1B>S7^{mA=1ne2!3^HlU>BqDS4 zaGd~0-+QwZg6#hYg5Ryuo?0Plw+O~I_f?@IS^92zvMIs z9kLnd3(s+7k`_-TG~Vr@8Ml`~qxc*!mUUDtlSMJ1Xy8vSxMsN~GQ2X`68nThK>Nz% zo!EOuDx%Uu0;e@AMm<3aL1oa{D5 zQ!;})KHF!?X2=Yik)0p{_xbgx79&UDw-?d{&s)L6*=$}nJfmP4OBu(tvjgV_2D3%o z#r;k9xEvX$^|8No@-ngjzLOu3*LUGu!GdeZ)t4bE>b8Yp&O#*{;)3p4CZvExgpp~L zJd{T?Q_{Uv)@@tPvSTBI%Dm$ibq_fuoGEXTSJBydh*8O~phsutk&8VzO}#+Zq3^~P z*m?RgcZA_8!!?F6h8qmyBpw@;C#bwlKRiPgV(^K~!3nSr zv-JRfqN5g)y46}cms|%m9|!OGDSK9Y_0vjMw8qDia#c*Y2{vnzVn$P4g#{`@oWak~ zbkWCDKSR?`gsG;fZl;>ff>d?5Sr-3il7*85w8cqGmV~o3#8q-qzBIHihV~M}4c?a$ zHYO|4W6l>4=ntAq(L&|%UV4g_lETK#t+`72>4R38JZoDN+XM1fj)EM>okUbV33u3% zS++!;Vv-?8fO;{=)QlDh)d;C|mOlj$VzfAt1ky8XUbE QD}V>*E4RLJ^lHA)Z(_IJr2qf` diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py index 3ab4a8c..59ffbfe 100644 --- a/app/models/fact_check_models.py +++ b/app/models/fact_check_models.py @@ -33,12 +33,44 @@ class Source(BaseModel): @validator("url") def validate_url(cls, v): - # Basic URL validation without requiring HTTP/HTTPS if not v or len(v) < 3: raise ValueError("URL must not be empty and must be at least 3 characters") return v +class UnverifiedFactCheckResponse(BaseModel): + claim: str = Field( + ..., + min_length=10, + max_length=1000, + description="The exact claim being verified", + ) + verdict: VerdictEnum = Field(..., description="The verification verdict") + confidence: ConfidenceEnum = Field(..., description="Confidence level in the verdict") + sources: List[Source] = Field( + default=[], + description="List of sources used in verification" + ) + evidence: str = Field( + ..., + min_length=20, + max_length=500, + description="Concise summary of key evidence", + ) + explanation: str = Field( + ..., + min_length=50, + max_length=1000, + description="Detailed explanation of verification findings", + ) + additional_context: str = Field( + ..., + min_length=20, + max_length=500, + description="Important context about the verification", + ) + + class FactCheckResponse(BaseModel): claim: str = Field( ..., @@ -47,11 +79,11 @@ class FactCheckResponse(BaseModel): description="The exact claim being verified", ) verdict: VerdictEnum = Field(..., description="The verification verdict") - confidence: ConfidenceEnum = Field( - ..., description="Confidence level in the verdict" - ) + confidence: ConfidenceEnum = Field(..., description="Confidence level in the verdict") sources: List[Source] = Field( - ..., min_items=1, description="List of sources used in verification" + ..., + min_items=1, + description="List of sources used in verification" ) evidence: str = Field( ..., @@ -82,15 +114,11 @@ class FactCheckResponse(BaseModel): { "url": "https://www.nasa.gov/mars-exploration", "name": "NASA Mars Exploration", - }, - { - "url": "https://factcheck.org/2024/mars-claims", - "name": "FactCheck.org", - }, + } ], "evidence": "NASA has made no such announcement. Recent Mars rover images show natural rock formations.", - "explanation": "Multiple fact-checking organizations investigated this claim. NASA's official communications and Mars mission reports from 2024 contain no mention of alien structures. The viral images being shared are misidentified natural geological formations.", - "additional_context": "Similar false claims about alien structures on Mars have circulated periodically since the first Mars rovers began sending back images.", + "explanation": "Multiple fact-checking organizations investigated this claim. NASA's official communications and Mars mission reports from 2024 contain no mention of alien structures.", + "additional_context": "Similar false claims about alien structures on Mars have circulated periodically.", } } @@ -98,4 +126,4 @@ class FactCheckResponse(BaseModel): class ErrorResponse(BaseModel): detail: str error_code: str = Field(..., example="VALIDATION_ERROR") - path: str = Field(..., example="/check-facts") + path: str = Field(..., example="/check-facts") \ No newline at end of file diff --git a/images-test.jpg b/images-test.jpg new file mode 100644 index 0000000000000000000000000000000000000000..868ab179e44741a0d143309984c572b9222fc785 GIT binary patch literal 11062 zcmaiaWmH_vvgi!%?(Xhx2^KWL-Cc*l-Q6X)Cb$N7C&1t?!2`iP!CfBt&N+9Tch_6% zb^qDjT~oHJt7>-D>(c8l0EWD@oHPIk1OR|<6Y#nU2n8U*!DC@z;9_87R5W4? z3?g<~a$45EgZ*v(XR!UV{?F>a4*s+C`W=9Y0N4a6?$ismOR7L`dF`yGB$dB>E&R;f}Fa3Md7K&9O8tM%wU+N7G<}aB2 zqU7X%{YsKIOe{5PXF(C7{%TXLEkqi9a1O*kLBwKivl z*~YdNdDejQyVX#7YY>_WXH`P$|NaAEVqiKZ!x&s|~g-J_Baz zQ6NI$J& zIftdbllB35JK0o&Q`&P{n^1I$5e%pxH?zc?yB~jANh@RKa><@8QPr{NZ2RUx$*Q^n z7Ue}AMmE1R_M6hlnvE}yR5KcbPO)Y7k5p{7a+%C^ruWR+GLV!v>`|vD=l*8tYSq93BhCdks^53qUSWvz5Gv z#mQHMu$Ke^rs2(b3+Lh9UMteQKMZs`>+{V9486kC!OESjAEYn~r6W@utBX;W-7M?) z%$(60>xx~O1ct1B5t?L-#n^b|)NxnZHXA-dqkt|mUD3qBvaK-dg;;Vfyan-GVQ-0X6?- z0J>pRZ;*k?PzFfX!(`)9l_|4)GF${mN`Ga_O*3Exy^WGz(Md-RTrXA#AtQ2I3W2+zjt6 ztD);LBQM7P&;;ug$$f6*LUXYMo0*i?_DF)mf7p#ic>M`GN#RVfNb^199xMds*CHv?*-KvSD6(%6Lp&A@*g?OmHw>JOxM ze6et`Lsf%)E`u9%PqR+So=#4lCg3JP!(uT2?XLf_XhI@X!-k_q&<>OS0?$ylw#=0(j4pIXO5(G%Dg!JM11y5Gh^;cGn#c^z2#LzYOtskLLRTaO z$R`&TcXRX=u#=qBm{gN{>2h*b=Kw<;ugIqAzL@l)`bWfSNDjqW9LLkMU*gg#Y$>)G zNyl}j3glYZHj3#K{n7Hqaxo_WV)rYkUMcKXs524F1aPct{ zs|sgd8f}dt&d&X>R&H~^KXkPYAG34#<0fe8=(Oc!tM3zwS`qW{Ebu^|S&yr>Cm7{e z%b&c-e4pq|A>Tdj`91ZGWN!y#JR z+2iiq;S$B^(Wkt+!74t>h>RUJapSf~qho@NcIK&5EH^oa(Tz)&wL1bCy_9U_qC>Z1 zy$gf=B=;;mj2TF!B}diT4y@Gjm(%77*p?pon5cC1>f^728=H53ekOwytM&ej&;}4w z8BTAd{zOp@PLjg*Sc{!~pdkit&#QY~(9M0cOn-k)1|N~4o>JVNywfI=YiKFZJ*yb^ znFdWV)HK|*0zPwSS*F8l!TSIbYk#uO$3^1OC_F-L(&Ai zxM5X?oMYp^C@O~@fpQWLhFHs5d6UauS(rlY2G&JK!m z(byz&Q`TeYPNIc|6gz$u)Gul;O0XxRCW@teAJ?&bsb{AlxqOezp>5V*D9&O!@g>N# znTxFC`)PoIlfH0simLizK>0~JDJ;&~+D=g}lb#XVw!h&7yoVLTW}yaQ%AT~wC=&0L z-Z9AV`p4N}^KSQgF7yOBgoGjm%=5>pL1F#4CqMX1(9HAR!4d~>`vWnnV;L{oa9<0ZL*bL@wvZPwFmveZ$M3^=i8V`LoVZQZ}VM!ps|*z z{yKhC^9VX3-{So7P3gAHyW(1ZqMH0WjG7Er6^4A;S!t{MZ^Ps^lfPVQjitXlO>AwQ zv*?08RWxaAq&;;@ts0xseAU%oKf#p*s0@`j6~;d=9!oO*@{+u3f&j$Y7Tzao+ zK9Sw2a02&mwP9FlV2xS<6%u))h*)GJ=f?gGcK5Sl@Qo|qmGnC9VFa!KE!m%hlbJqW z5%ZIH{AGc>OiQiLsa-+4f#NXQJ7Tg6H{HL;Qbq>kot$j^0^DUCXF{9kAN=AJ*~9O; z+2I*jM)!QVlO0ZF-U~0GLQALzcp2(Azw`Xbb<{={LR3t1m5+&wWPxQ6c>6mT-Yw!LB*%qx51T?FGp9d~X2kRvKG-y`)0-Kfb6)H7q1OHm{-jsuVHdp|$xmC4 zT*n6t4wjq!X^lKwq=~OeV9SO&Fc@khCNWxS3BUUCgx(%_Zs#i@j_8yVQ?a2cx^Y%{ ztlM}k#G>Zt^3h^`x%Iq?WL)1sAubAT$cLQREl;3(=IqI_clZy}*vMF<7yxY|f|&!F z3FxdGsDI&`IOIJ33uwiU&>RZZSguX)mFn?;YWmve};7NNhMWV@# zOts@xK=td8sT{@@!NOSln#jHOoL%)jKKILrH@5-RX0M>pjJ)BlHl*L(#22zYu$F)_ z_`}EAGrjy>GW+h>Otukm?{a!(L<4c_1a3wkfW}D8LZxOW0qHLum6Ns}HpAEvr#t0= z&{{aEfsi3*b|1q>{p8b95IWX1*;sZjyR`awH+c{w)hsfKP+wMujJN@%*_7y4Q=-(c z5P%PAvH4AmOkXmq=cKCu(`Bb6<=*>gd_e5|*2MZ)wzBuG-b>jT&R?mDZ}is@y}1{6 z?L-&*Z=znBg!TPtkBls)ex(m4~bIQ$rY+%3;V= za2w||`oRa+=IaP^MADRsnw+HZjxBAOxgla0qjZ?k}4~uxpJ7x7_~8H>>%p{mpp+ zx3oE*0Ol??My&<`iK2v9)pwu&(O-Kr8Hyt)2Ku&r$j88#*thZ>`;k1Rfxi|R!MIXQ8xwBsInyS~Hst2)F#%Qw z=5;l-oz;+ICbQ7wXpSBECG_9<6SMIXj@bHt+gw+>34tX!_YFX=dj&jTSF~uhoZpZW zZDwJPkS&ml?q7D^oOzW7`;TSk-gIC8ytxuAl7KM4B-a9x{KOm#Khz{%>&fZ)5L1UM zU31;Pg}f(bgPbS!nVhat^f>QA{LM>%T(~k^QN|iTM2=csKrR4uWubG5>N}KJY@qQL{IXqPh2lxR^ z>{oP!pF;w?g@%C_reY}f2>%p>EG%bVeZQe z#IHkl17V<_iW zM5#c3zdl?EAvpM1vpEU3Gv2*$eYRcl?7{B}Giqof#bpI>g2QRHGLk=_j z8 zEj13{_!~cuo{+V>t)U@6<*g+5WVk^I`*%&@e{kW1GYT={b)Aa49C{LSPUC>}3$Fg~yxlSJ? zdPZkw12Hu!-hl2-c6^9#Z4RE0AN#R}BpXT1yZ&USmL_%kIsKn2#WyV5+n*~h5{>l) zu`M*mQTPxyu7sbZA2GY!b$*q+g{$;AmmxM}mO82T@?EPHw{53W_AmM~;qcLt6ET#O zg2{pP03iQt9x9hF*@s4`wrnlb|KTt16gvI84)pl_9uTOq{1vszP zQZXwz4(b<-W^u+Gn)Acmt`EM%eQSX)UH|i(fWJ*zyj?Np=VkQqx!X!%`8Q*zvq?Xj z1dye(zPYZ&bJgc-$@Ou6I`*x}137c-W@`r9oN&jn$AQ6O-Wg6;L!XPIYE2;OBi-qX z{tqo~?j0v1*wz@cb5G4x%!qhpA-K>Sk+e&GwG-Tis9YOLh)iTWmbd|zEe=*;r#7nV zx$x4e_bEw~xRK=HnGzg}YVu2UPrdXY%10u6NlUzLF`aC^<~Sd6%C4?;9rdn*lSx9Z zKIh%zE1`w~vmKt$rI^5y-Vr=v;%ipW;pm(>#qD9YKN`{^ zBxz}|NIi?m2G;#xpPGim!fJrd?5+U#CoF_cLotQy>w7b1EqQXxv5|5Agklsv{js!a z#QX}lzi40#_&{}4L*JA6V2`F<=;iX3koTN48;spDzQ{se97LDp?m!V%qR3ZP9V@l| z<^dyqLn8#a6Z?;hqNu`xcSeO|KSD04Uq1eR{~vIhzy6rurjk69{KClTLYn^4>t$4_ z|6jhi$@ppb6_E6pki;pPJ$0k%Pvq`$)+7Dy8~mQkQ~H{3V${=mKz;JrTtjZy1Lvu> zD3|C?_x4V#(%$8BN^baYzCi^HQ+j~WIq~gC%q=uTC5OzaTQj7`(Eyw!y2G7T6Y+eD&eh=iIHfyCFqO@Zj zn95T)m=^J8Qcmm@ru2{=?&brkm>e?iMdq?z0nw^Wctm3|T`zAHY%8&K~v*;!{1sd~-Rjj>vh?BIzHPa7)s_C#?UX&Dj03Pr7C4 zs#0L&!cx`ew>2LU@VSm^5$I6cHT+xqpz4vXaL6{B6>+owBJLbLB^5OlnfIA8-ykf$ z&_~apoW$l`7@hzMjEY!Jsim|fMXj+fpM5&~hWCoPn&}bVPPV=$s$yDePneCCs_+4x z;vt2B`X5J+!`+s%%cfi3kw0E!`W^^Q0w|wUSaaz-6?&TEXCWTKLd7x$NR?}w89%-v z>>EjckTFsV?7v&Y^PXF3=$A3^FTK;;iWj7{Pw<`@S4M)W)WTj;092ruXR2dC06@zg zDdJoWX$`y;IhUm~5k)rJ{_O6)BLEF3tDj?bpZf3tbyH2XU5JH{MxkT7ZTrGvkHDPBOSJ6KG~9Om3J}^t z&0*uMt^k8!JDbTN%D4v65mXAHPpgJ3tZFI zud~+T>Llr;p4)?LCe-`Vg38d;Lf>GdKpPB`rzRCiMTgBK*zVq&kV%0?nr^;yjhRdP zipO@t9Y^IT8u?b2H@jRQWFg2ocEv2Hj4XUNq5;j9&RQWbyAy8DA z*NdBBJ}$Ks)~#k@H@MHLOIKp&xGv06ZMF+bMW1}A$H4@^!>=3n0Ctpi)G;A_x_AXd za^jgf-Y_R~EF5QWV=V}w9S}j;_ieh>hcy@hhn%P+;S60a?gO(??NBSfxf-6$Zydz8 z6k{znd+u>)=g@ofWPD@hbDOeG<%ZG^g}X&wZ@MZ*w%9<0@Feb8j4pqXzcY7NdU9wZ z`t4C>!)+Oc0}nj=5=yw45`WArLx1$g>=@;`ET_c&34d!mhe%-Q=h+ku`tF!1j(yo|9T*s zgSEL-KE}$rn8lYEkua78Z}+`*P?`jENIoFjNNHRD(&=Yf`N-vd^PdBk&|UswnPX=K!T*WhQD+wp|+Z}M-P3yiysTO z^q?>9q8_P0tpVdPZkw*in>)YjUC%q zkdp4YsllopmEFx%q)ywpu%qt$z|W8QN0e=RMi=|_J&B7?^-9wNnXjsTq6ud}wrQgN zD_|62gtmAAr|$6JuW?N|ydAZsINR*@k6Unq++->Tmoam8(r4`6$F~45nxTGS5Q~@N zw@&zA6zM{UEp3PU&zzH(qjG|LN9|mo01agAv4hWYREx#Qauve#2(2i;NCRsh_!jy> zM>yKZdSFKLX&*Bmfuvyz`Y!htR}4vHi_herpnMx^FN@>olg|hMzM`B#i3<`f_`e_7 zO&u0XvQWyu*l~?Y+H~Ni=cP`vx6y~TRx%@I*44_EuPF_#`y|Y z)5M{YVTb0k_Gk7e8Fu%`Bpa!w(v==f zQ^gC{NQF{g4piACverH#PlU@R%t;%qz(7mzTHoE8o`<8chq~k?e+f%|lFTHy8AkXU zhi`|ixMxPzP+@B|7I%`}l9*?WBSBMFi751pW|ayFXrttQp# zQ*xPR{fM=r<<2N!P|3!q1H;X6CM|KaOmPMML@u*aWNfWAA5KU&cSs0^RF-UFj;A2B z&?C|IvH@zE>%YTD-&=zKt~k|O1z2#+%F=9)Q3}MJ%!mS1mb66%9pX;Awyh5~W=3FJ z`cHn?6lYyy(p>0qTlq+ixlV0lcvV-l5K?y*#xy>Tz_$!`MhkO0Z27{Hh3&v_9_0I<$oE)s04Nq0~=FiGTMpOg`60 z_c^g>Dm$Q@Z*7vfu)5a%LNKG%@d>o{!2uy{YM$2)3)TtSNN`6Xb&Fq7^^R4O-eC{do^tSD>waQzjq zD4IBHQaj1Xv32Qz1K1m4rr8t|Is%xY6@Dst;oTjAC*&ST(mfxzX6YckREq6=R-|3$ zd<8ILKU0EtNOz6Sv|NH_r{C(^UX4uN>P0q+AItX~L-KifvUQBqCTfR<7hD@aOd=~> zLPRMOjP&!6>7se4G8~wj3*zqjOA{>A08bUQak%h@<9RF(@1;dIWzqR z#UD4F2j%!G@6%Whx2Hg!5U8Wk)+OAYHn|RVWx@Y7x-x2 z$V?1utjERY9KFDl#{}0S-Rw?un17bX*Sg@iuH_rg7{OtjPor8rLlnSGW%`vQq$ z7Li4GT$YNUR2NMV3A>%Ymp6UaZ?`FSJ;h7$j|qSJh$n)nmZaxN{1fZ}8oLs94)fLk)~L z#`5SdWxX@m{6Ae8oEGp-5+yT1{WW0<46`HD&|zo1U+B^_jU9(ZvlQIRAbsn&jOqHH ztcRX*oijbimnQYDb^|>H^&#_k~hvSy?Ufw zRvoV32tnJ$LurA*D!7*;3Pf+NOH;URh_`D0iKj2zlpxuZ z#d~AJ;OrG3ckumrjy!(Y5t@*O9omGmALAQ^u@@I_Nu4npjElk}RrBQj?9p3O&_69K z|D!jGXebo3@9G6dmZ(H`s)=sgBdNZkaXk6eA3S^+WkHEq?RQ9x(yQ-aQYZ_D&el#? zW>$6zEy#<6$8Lkfd=nZ(Q+eP#(zS6Ze}$R^Qpl}xlKcy`YL7At%(Ec&mAKwM3aotyE{o` zgGRK$$NkxWwEob=!9cn*ITTK3o^9WK-LDoxFcnx_Rdja17b`YvhgI^L53260crP0!aey zh(HuTB$?!wB}_zII>)qjP89{+%V^pLH`vT*Hon46s2xyL)=DvCK`TB1hF9^9!xUod zki(}(Yk;#P@S63h>o8|elWIFml01R#D^}&luh4HheI&)+tfA)-K2F(%Zwk3%;sl# z?R0g_Pdu$j6qpTO*>Kk!c|tmO>WXo1<>ArM@5vRl6mcR^dAxUQ#CxM6B>BY7Td)m# zaR3dwAp@ECc4cCHNhDo5?LK(wb{%oDX^%WU3RN7qrW1B>&90aU3rxF!JJ7{Is=jHU zW5q~e02nSDB?*F4{|K5YS$fi)0cJ|{R)emGqCLqzlUXU9(r zNGD2t>4bj8vaf)Spnw~=Jm&pV4}$dBLDPWVB+-{z22}h{t}7x->CaxpCco(d8-5vt zPJSOwCxhBv_sDy{zpO|=8D*w{RKZdK8+fBk%JCK^{+A{HO9)6bG+ZPRw5Bft53D6~ z!_i|jR5t!u0L<5dn%xq#oK)W>cwnX(91f<@K%4+KK{j8mdBf%knC`9X(xw=uLreY zGX0r{Oh_3$orDt}?dEq_EzMfY+M!*y>CMd&5c~WFX`%nEG}NN%*7V!?s*OA#3|Q+q zo61#3-(yTwO>@UI(cwyl{2nqT8J`i9`-FdZ;PU1Zn4Us93DbvLN1~><{Ez-o z^Xx;SnC@Rh;-r4_0EYF|KVom+!Tx5||4)@FBSDjd_@E0xf&V{X|CeV8cwPQKR^ypR literal 0 HcmV?d00001 diff --git a/main.py b/main.py index 7048f3b..6db5c7d 100644 --- a/main.py +++ b/main.py @@ -17,6 +17,7 @@ origins = [ "http://localhost:5173", "http://0.0.0.0", "http://0.0.0.0:5173", + "*" ] -- 2.45.3 From 56335cbfa78dce301990d5cc332ac64b91cc4ea2 Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Wed, 18 Dec 2024 13:16:48 +0600 Subject: [PATCH 09/10] fixed pipeline error --- main.py | 24 +++--------------------- tests/test_main.py | 5 +---- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/main.py b/main.py index 6db5c7d..6d8bb26 100644 --- a/main.py +++ b/main.py @@ -11,46 +11,28 @@ app = FastAPI( ) # CORS configuration -origins = [ - FRONTEND_URL, - "http://localhost", - "http://localhost:5173", - "http://0.0.0.0", - "http://0.0.0.0:5173", - "*" -] - - app.add_middleware( CORSMiddleware, - allow_origins=origins, - allow_credentials=True, + allow_origins=["*"], # Only wildcard + allow_credentials=False, # Changed to False to work with wildcard allow_methods=["*"], allow_headers=["*"], ) - # Basic root endpoint @app.get("/") async def root(): return {"message": "Welcome to your FastAPI application"} - # Health check endpoint @app.get("/health") async def health_check(): return {"status": "healthy"} - app.include_router(fact_check_router, prefix="") app.include_router(aifact_check_router, prefix="") app.include_router(scrap_websites_router, prefix="") -# Include routers (uncomment and modify as needed) -# from routes import some_router -# app.include_router(some_router, prefix="/your-prefix", tags=["your-tag"]) - if __name__ == "__main__": import uvicorn - - uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) + uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py index e71e19a..47a3536 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -3,19 +3,16 @@ from main import app client = TestClient(app) - def test_root_endpoint(): response = client.get("/") assert response.status_code == 200 assert response.json() == {"message": "Welcome to your FastAPI application"} - def test_health_endpoint(): response = client.get("/health") assert response.status_code == 200 assert response.json() == {"status": "healthy"} - def test_cors_headers(): response = client.get("/", headers={"Origin": "http://localhost:5173"}) - assert response.headers["access-control-allow-origin"] == "http://localhost:5173" + assert response.headers["access-control-allow-origin"] == "*" \ No newline at end of file -- 2.45.3 From a1a699f9b3390874a87d80a93952d2b478cfd2ab Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Wed, 18 Dec 2024 17:39:00 +0600 Subject: [PATCH 10/10] dockerfile added --- Dockerfile | 8 ++++++++ docker-compose.yaml | 5 +++++ 2 files changed, 13 insertions(+) create mode 100644 docker-compose.yaml diff --git a/Dockerfile b/Dockerfile index e69de29..adbccf8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -0,0 +1,8 @@ + +FROM python:3.12 +COPY requirements.txt requirements.txt +RUN pip install --upgrade pip +RUN pip install -r requirements.txt +COPY . . +EXPOSE 8000 +ENTRYPOINT ["gunicorn", "main:app", "--workers", "4", "--timeout", "90", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000"] diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..d7cffe8 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,5 @@ +services: + backend: + build: . + container_name: backend-service + restart: always -- 2.45.3