From 83a886960b4e605927c015aee9409bdff0c912ca Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Wed, 11 Dec 2024 17:39:10 +0600 Subject: [PATCH 1/5] base code added --- app/__pycache__/config.cpython-312.pyc | Bin 519 -> 507 bytes .../__pycache__/fact_check.cpython-312.pyc | Bin 11344 -> 3800 bytes app/api/fact_check.py | 219 +----------------- app/config.py | 2 +- .../fact_check_models.cpython-312.pyc | Bin 0 -> 5577 bytes app/models/fact_check_models.py | 109 +++++++++ .../fact_checker_website.cpython-312.pyc | Bin 0 -> 3341 bytes app/websites/fact_checker_website.py | 99 ++++++++ 8 files changed, 221 insertions(+), 208 deletions(-) create mode 100644 app/models/__pycache__/fact_check_models.cpython-312.pyc create mode 100644 app/models/fact_check_models.py create mode 100644 app/websites/__pycache__/fact_checker_website.cpython-312.pyc create mode 100644 app/websites/fact_checker_website.py diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index 53d89a61cbd15bc0b5b4e4eca7e37df2c7c4ddfe..267bba1df1c923dade63cc9cc26619d42bbdc2a5 100644 GIT binary patch delta 72 zcmZo?`OVCGnwOW00SGn(N2aq(&P74p2ENHx8I1tr#T7#U delta 84 zcmey(+|I&#nwOW00SK6Pg{4bPF)3E?&BKo=I9&}@9g2~>^<@MeidJ!k`K&` njEuJ#IPWsZJ!If+;e8+@H{WZf*9CQ_i!#m^7$hg3WHbT*?>`xC diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index daa45a81346a8c905d7b637ae6176f1e957e0533..56cc8981234a04f5d9b6da8c11310f6f86677fde 100644 GIT binary patch delta 1823 zcmZuy+iz4w7@s+py#>9+4A528vs?`*U4}NpH3xqfizx}@7%=gWl z@3yy&|Jb+uZ8RD}uqv<2&V3z5=vSe1RPA;a9d4 zThSGdQ!Lf?={`?ZEx#Sm19ngkdb-aF*e z;7jeh^)3lbA|r4H89^d146=*cQ&D+iQ|~5S3&Skin_3Q$un{IP6aNoJE-06ym)bT) zj6{q$30D*&aYlJ~%cez1OWqT1HHinujvk(=HC#e@^rd6RjuuXrN!>MT)hP}0fKhUZ zYuW^4Wx_1GU=Z_7PESw1Two{U{IaI-qw6OU1EHHhPKb=)eM zHjfr4t)30fQw>T_;7f9j=05=Wfh%TgO-U?ZU@Kvk=n}I4iMf)>@)dF zO2bo{%0s4ORvougEfcD;6Urj)VO-gPh_2nKo&ZEU**(y0snSl4AhUpey9Sm@g zZUv-?4#dPp6yW|Dv@XeNeiZ@GZJqrGkv-mPcZ-KRT(xTku{6az3(oF+B;YRqa~(iL zjIvB78{j@l+yboGet~#_A+zNoq+G=nWI0B{_)PC>zM8ag5#0rr7ap zq}Trrr1qSAUVcwi(UM{WTFNOz_q9M`%h{fAvg6N4vQM+4I0crY@v!TE$VZGQOXi+F z($`^HXhD4L6U$9^h!(yCpCMKYKVseN28WOlV;|QSUCil! z9_QHA{_G@AjZaS%-FHR(^JL5%*^x*6>ryM?lXv6GA$k=qUwreND^1-C3hZj z5{6Q78;(a(jjiOv`l9n`b2R|mvAH+ zM22DoZ_>z9T=h&GaX6EqTiJI5Ba)7oG+4nJ8%XXx3B961H_%1i<<{JiC5q9sonocd zs2bc)iPNy4nEXz|wo7#J3#28f$Qum^Py|E(xMDyfQjs$%N|gJ{wW>?1E)BCQgMAtJ z?3!INt6X+mO2t^?vPFanPYu!mc6V@uJR|7Dsgh-i4ma9Q;h69Myu<5aN0??h%WDt_ zi|A3WT^%|N`y$KE$50=?qx&mpbvsJ+tQ`5Dn-!$oa1AiVJyvW+Lyq-kSpX`pDQ`ICLY2LAjAv1!`S| zA@#=od|^PnnfAf%%>fmdn?o|l+Gg&rlpw2hlZx@@1&k{TPOVB+(7DfDtee$2>O*K5 z@ldH=AA!JC%(+KF=Gz79HM2AFXeJdm7%!Go{DV-@wj0UF3X3K=DN5pT8$ye1)=QF? zE|_*3bB?J)TXZpOpd4$EB@sz_K!7)4#4LIccn)rDSmXem1yyvQa}i)w#u)#CcKw9H zcTnFQwD~R?TbCp(ts#JQAHuI<_SMifX*I%b4<8(S5##P#={>8+6ZRJV^n|sZpd|g# JEjaJW{{TIN%FzG- literal 11344 zcmbt4TW}lKb$79gH%L4QzA5l!36@CeWm}eQMKlFUlw{IFN{$8BK|t)11cg`bE=3U` zQ^r#^ zTVukRbcJ0EXG*w}?y$R1?g@LK+??rarV&BxxF8hI ze8-d!1DeCtO#;+&!OlB4M9`pGyPxeFOiw5hRV$92I`wSmd`!wH@pLL^RIN`(W$Br; zC?!;5Z(K@A{mReUtDBoW{~x6FmM3xK1KrP9$N&92wz6?t&%E&yn+7SQx@ZyL%>NC7nDz zG|(SP#bALnrgwn7AgLiMtPKPB26u@l9AsFU=9FU1Hw!uHn&!rhdX3UkKZGHom~`yC zVVZo^O~^F&W}(j^a*gYU&9=qmcuH2HshC8aP+hgg<@89JCZmdKlqWK9KxR=IiB2Sx zAg>Ctlo(O@^zgW9jYMLJs4Pb!GEO=Wn7x0XYb>3Vx+WBP3>G)l6&2-7Jdudfu90X= z=^2w^XS<@A4E*8@6Nw?JGc!eNp@(ek$F`@lFmeSH%R2ykG)r#Vs}^FH53JhTa$f(X zr!GGA+NYKrEw`P%`N{deT$ERwtxJ~HHKV3jt|hXt^h z))oPjsN*z|cu5esCjkrc!YCR<{<@$aY}mv~jm1*a1#;c2A6(eXN-f1wt7t20YmuxH zkJ}0T+Y5DUqC>0@D@CX165W@DICtGs?9D!94SLmz5T$84D9M?0N|scUC@Il+qBFY# z80d2oQb5Au0FrC~Xd|GE#pQr9m5~DJ5v}YRr`knHj?p;dJ=MlqM6kt%4MEyvG>($3 zoWjS8^bG}tq*L;Pshn0p^&qcNFP}{m5gFK62mD#rUTKXCns%+79!l$R)H3!EOLB zY4^CCPDNy>nv6!I^9qdy9dsL32q0(%pz^XpRc9m;O{LOG6vZ&GHAU&MsXvO;Pp#O- z0qi6t;Dk{M)18QCJixf64POy>;3p3Q_!7xEoG+ia<*#4tS?c)os{i@f;|u(X@cgZ+ zy5*hEtXB2U_RYyFLjSG0ZC8Y)z2Vim7iN#l?O511-?1XR@PW56XL0BJ!Cc)#x!Ol^ zzLvXIqtBM-&3jDi#OyPF@uW694!VKjklHY0q;4uW7Ht8VZ5a1aw>Vr+P zR?NlbYhUiqSsgDQ%h@U}^l9tDdK=ay00Pt6^8hxl>j9z##799q=mN$f8g+3Xf~e8C zE^M7L1kI}T*@@vqTn18B1(cZmS#J@uT@Fl2!*U$d_(L!YP!EvZpwn>o@mHATG)K3C z)+*>Az!#HP)J(Gf6KWXrS;}xu)7%^6n}#9KXrNm8{;YWnOoW47U0DleWXOTGw*hZb zMV^c+W7iBSA4|w8cTPr069_P|u2vKb3Mqn9&sGC06$E=CuJ77i^rnsZLqoP`}+(6*Te zJ&Irxp?nH)2ym0-{Q!VaY!xr}6?o<7YTdCCuN?cpSUGoY#pu5a+R44f6H_(IX>^3$ zY@j1N9ZYK%0BlW1q9JUQOp>`kH;gj?X91kGfD@2g0B0-1G5P|Wy$ok6s3eDIzHZUa zGh6{mOAxJ$;|xKYYVS!z z`XC6e)aZSql(tr#ceR)aXQXRO$hV6|CHR9&C3Rf1ZHa+EJJ>cM<_Rt!FUT zkANN7vzXg-Mtz7wfMQq18gNF=niZiY*VsJUGgq^)ec4*KA~fgfx6Ov;4lMM{KeZxk z%LUp%4q6ty7b{kTwp{D>*=H6GEcRT4vhBBO8n2A5)^yIETxeYtI<@n|A+|n09P@$a zr}Mpjer7lq9mmd30^^}vHUR&dR3keJ5OJ`>{i+3QzIZa4QdBF;vE8?`tae-vqW4oU zLv4RgW6q9%?c1ik9K-?y2jC~8$5^2ETvN;JvAMAYc|N%!wB)w6g3dU<$j{HL2(3AH z)xz0jcgw2KqU{uRuwkc)4)QOdqVBS<7hICfOwi+eMQ`(OUa5TCaq@EhC0P9b2Inr)V!X(?ri@gXk4~mkpzwSaq2P zSRwkwYQWYo6Rx({qid=zSg*Q!bsJkZ<2ti#12jGwPel^}P+)Wl?6#;9n2gE+G#NUW z^&p9w=r*z%6lEg_PQ$3AdK9n)K*8gI%aM`vL`tL`P^_A%B!fF3t7ds3nT*mYx(CqN zijZc~1XznZDC4u94Y&Z(P}V+_o}e))fXXE6z*z@+&<~Jk)`|1drbRnr>15DA4?{N; zWuz*cq7&e@(c_3gW(1pW2=k1ro`c*SJYf*@723>YHjb>;W3QU80m6X^dKnx5iN+=I znn2HBA1@&I3j{ABKpEQ~bZ-_FjQd&rk`Tw1Di1cFDu9|)v`!dlGcsn~;k=mDM3JI5 ziw;!a6u}|*$^Qu8OXQZ@zaTH|=wEdYfV9}RA`IMWYQ55PwR^Rxb4l*`sr(7TV`uSP>p&mEIU)UjRW!R4|S`PDY%2v(?0q$ zl}<+EDf$$QPd|eIXL5~aN{r%CR0F6zONPeNG_Gh|cfX>@GM#BUO5<9SG(5D9DCRVB zwr|?cGRu_h7^7}hU&2OK1Y6N-3JVb6B;~^ZKs=*ypR@T^1m7)R?Um-M+|}V#-yV?G zu@zyDZoRjI-JYwdy=xYbluSrUrV=S(mq8bm;gPk;sdPq?Q3zx!GwDQJiQ_`qyPZlH zIMNEu!^1I#wM8>XJ1p;*kRe7SMyIs;sVU~Cu@?`BzcCO4N;;KE%UXx#=txFuH!_h( zuyJ57vmS*`C}U|l0-+@^`dJNULV~CUIDhfNHyfM5|!2Y0T}wt?Znzyc!+wtw$_9Jfn?ckmgs`3{Sp zR6;TMG^+JfY{N!kvw>AYz>H4e3gp5J*ht_lkFnFknFUI-A1;=b&*BbKYM>VIWHmhi z)T7$4S4|si5Cq5unAZLpK)E2$&4dEm7aXv#W-91x&m-Gkx0aa-Xh`Tv9z-Cj`Rf%$ zp@H!llUP|W0&Jp57Zakh%oVdwxq@!h)vJq)f=AlPL`3}eJa9|{#vhpK4JhfrFa&C# zvaaJHbZ1M(Q;`H53`8Gn$>{k)o^FR#s%CKG6(A;x!Qia-GcQCtvM+vq?@Y%F-5p`f zw1MJ9gGDt%_yU+OrCM~WnvwOLy--LqSi_H<7fgf*#1A8y9)7S}xRp!zCiI3$wkxne zEtTBFm$G;RLzEa77KpLf2JVZL%qUXTcaY2j*{60AojbTLC2-FtO zs5bEQCnV=F@jeRSg z<4ca?;4UKaD^p9B8fLuqe=J6$c^=cyN153E0=~{7U=rJcIdtPTE5s?pAy|Z;jA9!s z3~x=&V$XT2a^C8kZ|^#UQ%qnt46PBP(R9~As_L$6U)pzU)z_y9@xEIv+ut(0^|>2^ zt1X9@gu2C^72)s);A&2-7;DkNtVLI|j^#8190yd*5bTYdm8Mk7Fhunu;J;}G7c=Qh zR8l5rDpp`6s80y0>zCoT_FrI0oFE*qpp{G$3LZZYf^J>F<(AGUKnP^o01*Lxs~93O z>t*ANpct`pOyfF(Yp)DWbJM8F@th&e7^jWnsQxx3owPUtk7=~tPx+o|h4g$V21cB|kP#MQa=BA|2?Ycu> z~iP04zV421@t>+1(}Ufo`Ud*7PJ(udTtm_FQ!P}B zSjs9tDk-XqM&rPH5V?=%?75t9GO}J_cG~j9Y_)FR$=&tT=AaB=W{_O^D&u;$hu5R*l7ynL| z176;FVt3C@{@tBM$iKUrhf0h@R3i(y2Zi_oP{EzA1E8-#m-MRuKzmi{hk-X;aCvIg z-@#Ka5JU{x6cb7?CTab=E@?-}Oqn#GrqZpWyuBnrMNZMZC5ZWL^nxBFH%k$empBD| zVwOY}S1fzC)B-*WzZt`{VH~g8HYL#t7h*PD{)03&93d})=QG1wNlB)bq*`ee(`4MW zsWW(;=HDzx>zCovgU`cmyv|?Zzic#;89{VR3+EtAKMgsY7hKgF3)lVis3}TPQLKuC zUidcmHSR%s0N4JF0+}&^j^VbF(@fKB<`IjudYU^b^sar8d*W*uChzY`E^z_j|@NZ5bkukyrk? z@R5LVa)a&!eFPs5yhIegJ|rjrO zBM-`nJI`*hHHl1j0W#aj^ePj}GTeZsW6^|ssCX5+232Ha&RKG0&vy=f>)@<*u?Nxg zdBFCoCM_bU-&}%^8v~aR*G|6%73kjq$kz9#%XE04;10m$F+>qnSN}j{Ff?@f4uNUA@ywKafpUE z(Q9p44#uQj{yKMw0~UQoAX7$Wz?kTB&@GFgYB9=gcvT^c$kEp!2LjsAxmz`AyI1B> z+_yeCDy7iTKa_3S`e^j>MxX)t6m-Tk=RLx;f11m+KYZi#zn=N=nV-(AK6N_JlZSSG z=ghaxT%EZ=Z-^`HJwNSUZtuO>etfz8_%EJVdOp0;{=&`n$Z~sR>9ZqCW8*j5&)#MI z{82``_@!=lx0j<)$Wjr(P5?mAL@hHB{uW+U^U%QQ!Jg0%-2J7g!t^_3UdkYr znjqi{VG7k+G%-{o#IL~7L_0T($&mTbv7yWlQ5-p0hT=sTf@lW;MEQ3ch9dukBb8 zd~m1pzNhgX@f)k=OnF0;|$hUdkuW4N8!DGAFzZkmMf77>p*|+^yzF?k*vL#mlzEElN zKofVvO;^jZt7WNm->Pf>NBM0q^4;CUQC+sc{ki&%2bePJKH_T5RaU*4o=@MdZ(98P zO8riVs{XDq@NKU~N4d_&*Lft&HNv$p*z`R2yPcg^Kfki`35fi;ZR=L6%QSl|?_Gku2aCIZ&jWM(Xx%|vup^w+lXJLlJ8SL{-dgoLhkMD_er4o4$!{gEMBaM(hw_a* zKREZ+*plZ{tB%9Jb@=|2H$$~Q%Xq8wQg?H&m3zCbHDolrW9E(=GQDGQV!k>MBBpm9 z^FsbzTlapb^lyhac=;JIA~bq>O~#*9xAk(ypFLrM(w}oi!2I0ALp~T(%_C8;oilL? z)`7+Z-~&vFa1$1fQ50F4gN}9)WqvqPooa#)15nDNJk{>I&>5s?)6j2V1wn%E(r6{V zqccd+L02J^iSbDGVMUSgR5u26G&?#%wNK~lWuKC1UnsRZf86y7!z&y`! z+n?$7orC)DSa;NM z=0&IjkM#ovu3^EuAg&Y0tXu5RYkZx+Bj4h#w*A5A^PApxc<%DVTC;fW>hYVqdX{(f z{37~m%d_`*D9d-cu$I@msn+4;U59_M=hv2J?(tA|x8q^EVb;9Uc`N2@#AI8uW9~WlQr3Hut6t`+bAl)5ZF|46 zYGG!nq8%7)%O`yE;xy&W#89(FtOi5g*UNFfrEOh#!ZQ0q_gH4F3JUMC!av@N;=2BL X?_roXy9#;dKtqTV-mPm0nN0r&zV3hw diff --git a/app/api/fact_check.py b/app/api/fact_check.py index 3e7a12d..dcd30de 100644 --- a/app/api/fact_check.py +++ b/app/api/fact_check.py @@ -1,11 +1,13 @@ from fastapi import APIRouter, HTTPException -from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict -from typing import Dict, List, Optional, Union -import requests -from enum import Enum -from datetime import datetime import json -from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL +from datetime import datetime +from typing import Dict + +from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL +from app.models.fact_check_models import ( + FactCheckResponse, FactCheckRequest, Claim, ErrorResponse +) +from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources fact_check_router = APIRouter() @@ -15,199 +17,6 @@ class CustomJSONEncoder(json.JSONEncoder): return obj.isoformat() return super().default(obj) -class ErrorResponse(BaseModel): - detail: str - error_code: str = Field(..., description="Unique error code for this type of error") - timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) - path: Optional[str] = Field(None, description="The endpoint path where error occurred") - - model_config = ConfigDict(json_schema_extra={ - "example": { - "detail": "Error description", - "error_code": "ERROR_CODE", - "timestamp": "2024-12-09T16:49:30.905765", - "path": "/check-facts" - } - }) - -class RequestValidationError(BaseModel): - loc: List[str] - msg: str - type: str - -class Publisher(BaseModel): - name: str - site: Optional[str] = Field(None, description="Publisher's website") - - @validator('site') - def validate_site(cls, v): - if v and not (v.startswith('http://') or v.startswith('https://')): - return f"https://{v}" - return v - -class ClaimReview(BaseModel): - publisher: Publisher - url: Optional[HttpUrl] = None - title: Optional[str] = None - reviewDate: Optional[str] = None - textualRating: Optional[str] = None - languageCode: str = Field(default="en-US") - -class Claim(BaseModel): - text: str - claimant: Optional[str] = None - claimDate: Optional[str] = None - claimReview: List[ClaimReview] - -class FactCheckResponse(BaseModel): - query: str = Field(..., description="Original query that was fact-checked") - total_claims_found: int = Field(..., ge=0) - results: List[Claim] = Field(default_factory=list) - summary: Dict[str, int] = Field(...) - - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Example claim", - "total_claims_found": 1, - "results": [{ - "text": "Example claim text", - "claimant": "Source name", - "claimReview": [{ - "publisher": { - "name": "Fact Checker", - "site": "factchecker.com" - }, - "textualRating": "True" - }] - }], - "summary": { - "total_sources": 1, - "fact_checking_sites_queried": 10 - } - } - }) - -class SourceType(str, Enum): - FACT_CHECKER = "fact_checker" - NEWS_SITE = "news_site" - -class FactCheckSource(BaseModel): - domain: str - type: SourceType - priority: int = Field(default=1, ge=1, le=10) - - model_config = ConfigDict(json_schema_extra={ - "example": { - "domain": "factcheck.org", - "type": "fact_checker", - "priority": 1 - } - }) - -# Sources configuration with validation -SOURCES = { - "fact_checkers": [ - FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) - for domain in [ - "factcheck.org", - "snopes.com", - "politifact.com", - "reuters.com", - "bbc.com", - "apnews.com", - "usatoday.com", - "nytimes.com", - "washingtonpost.com", - "afp.com", - "fullfact.org", - "truthorfiction.com", - "leadstories.com", - "altnews.in", - "boomlive.in", - "en.prothomalo.com" - ] - ], - "news_sites": [ - FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) - for domain in [ - "www.thedailystar.net", - "www.thefinancialexpress.com.bd", - "www.theindependentbd.com", - "www.dhakatribune.com", - "www.newagebd.net", - "www.observerbd.com", - "www.daily-sun.com", - "www.tbsnews.net", - "www.businesspostbd.com", - "www.banglanews24.com/english", - "www.bdnews24.com/english", - "www.risingbd.com/english", - "www.dailyindustry.news", - "www.bangladeshpost.net", - "www.daily-bangladesh.com/english" - ] - ] -} - -class FactCheckRequest(BaseModel): - content: str = Field( - ..., - min_length=10, - max_length=1000, - description="The claim to be fact-checked" - ) - language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") - max_results_per_source: int = Field(default=10, ge=1, le=50) - - @validator('content') - def validate_content(cls, v): - if not v.strip(): - raise ValueError("Content cannot be empty or just whitespace") - return v.strip() - -async def fetch_fact_checks( - api_key: str, - base_url: str, - query: str, - site: FactCheckSource -) -> Dict: - """ - Fetch fact checks from a specific site using the Google Fact Check API - """ - try: - if not api_key or not base_url: - raise ValueError("API key or base URL not configured") - - params = { - "key": api_key, - "query": query, - "languageCode": "en-US", - "reviewPublisherSiteFilter": site.domain, - "pageSize": 10 - } - - response = requests.get(base_url, params=params) - response.raise_for_status() - return response.json() - except requests.RequestException as e: - raise HTTPException( - status_code=503, - detail=ErrorResponse( - detail=f"Error fetching from {site.domain}: {str(e)}", - error_code="FACT_CHECK_SERVICE_ERROR", - path="/check-facts" - ).dict() - ) - except ValueError as e: - raise HTTPException( - status_code=500, - detail=ErrorResponse( - detail=str(e), - error_code="CONFIGURATION_ERROR", - path="/check-facts" - ).dict() - ) - @fact_check_router.post( "/check-facts", response_model=FactCheckResponse, @@ -225,7 +34,7 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: all_results = [] # Validate configuration - if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: + if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: raise HTTPException( status_code=500, detail=ErrorResponse( @@ -235,17 +44,13 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: ).dict() ) - # Check all sources in priority order - all_sources = ( - SOURCES["fact_checkers"] + - SOURCES["news_sites"] - ) - all_sources.sort(key=lambda x: x.priority) + # Get all sources in priority order + all_sources = get_all_sources() for source in all_sources: try: result = await fetch_fact_checks( - GOOGLE_FACT_CHECK_API_KEY, + GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, request.content, source diff --git a/app/config.py b/app/config.py index d9de9e9..a13fd4d 100644 --- a/app/config.py +++ b/app/config.py @@ -3,7 +3,7 @@ from dotenv import load_dotenv load_dotenv() -GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"] +GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e2c8b7cf78c3bf747b3006eb0a7078967671dd0e GIT binary patch literal 5577 zcmaJ_+iw$B8lUlu?{*R=j&VW)PC^N@+ay3*7AR$DO=!Z!R0;HAwsi;3AsOl$XT}73 zsgxEfCA%xF`$VL&5-%$ikq4?>seRc0V5L=%y;|pJtG3#g<&7xbmp<+Hov|Iq%QBX~ z`TU*BocS)l^E>{l-|rFN`pawY&ky(o;a~WpaXISBR!|g#dx9otVoFGhaWQR++tN~8 zO3QIMZI9bUY_p{tX=mJ-cEw$3cihcwQp%I|#=TsZQ@*q>?h^%@@Rp$2?+TiOh>s-e z8F4?iI>G8{v378)8?2rdYbUpQ!Rl+Vc5$m8tQ{@Z0JnC6wX4P2&8-2jc9S0BeI(ac z5#;tBum@Xu_i}45SVJw=P+^x6eu@JXJ9EFO8stJ&BPnJ-qmz`zoYSV6yG&EeeN#>8 znrdb#^F*_mYx?|YJ!vWulkv{v^SWU&*TtNvXESO_5t%%d$*1A&fPQi<%I(T^5K$WKBE)R+~WVnoX0|a9`HNweIF@-hx}{Vg4yfvvigixopND z%%Kre)l=ie1D7-UjXa4EY>p(sWsz&3m-CohbMtys*RI=}rl!(pn-dfVhQPAp{8WFl`G3$UKTgsK@iJ(W`F#5FZ(Mi)r(;|VpFn@D3y7~GghqBUVz z#&dl1%E`H#tSPbSZmCoe1sbQiRM zlR7nWP0gpw1U@-S3(So7c1_+v;eNoXt}JOF(WzET!<_#5ZZ8mHkr&*N;D>8 z6LXSVFe*ig0luKf4);&Z&R(2NL@%D6D)t{fc=(mELx;x>9=mkt$njT>9iKcne(d0@ zzc})0(Ko^0a|~BwC^GGYRnl%011JWqMF*}M*-XNKu4y$vZkbe7e9V zQFJr}qMdOS#OFfQ*LmmMMrikHbbajIO6a|%vn8o4zqirb|K#TvD!s9#nPsCa$2R)+ zK9JWB$1D9;mQF7pC{3=6mE|ix_6$~Cfoe#p_V2Iuy;=AMrFx_x)1s~-Q| z8SA~U-;VdHqpPs>9<;Q-*9#yEV5tr}4d8NV_6E#o0AY=ajk^mr#mU@n=dY%81M-{6 zm@ctmPt!7LL>9?aLpKTSf>kgX)nqF|p!e}xU*&C)j@}lpH_-%I>X#-q)?kGGPY+wO z2gK>N_@(d%+njJ)1Z0t7Mdt$S496!XiY` z;fc!p^?igSaOuTe%~u=W(Xa=G7%ziZ5`OghO6tAT7pVuwDxtB*fo}%B9{Bcp<yH=)uw0A7uEZakyHpw2?k_5*tu3346)9lD2j1FE~cR@Tqk2G7{P8`Hp z&l_GDU~_@ZUAM_tEnxGs*m!<{&D&ygU_aPOeVTL4)re=@51XN^xp~Fe6c6)8Q>vbx zB{y{f`$VphfH5ZJVehw_x|t%(LwWmY*w2~Y1h@k{n+5R7%>QI3nX$`rkO0m)j5A+K z&CKW3c@l-aL+NOb3HLCs(c`c_D;&6)(brJ$h~eVs>nM2GPNCKor=w^?u@An++aMrL zo#C<^t_}_@MVG^+(I@WyvOHAXy?1G9`DiJ+@_Jd`TaD}kEOf2*eBm$4`>G?OOBYH< zSEFA*+vrAk@WFf~Jic_kG*Xertr+4Xo*zSe<_lxksQpF^oe-ObjmMAxF=@pIzyJrc z^LPNvftO>9MhRcBy)>N#KC zGrTmtyihV$(q(zLx_1Qjja#eI$|q%cq#Ecgef%UaT#<(@PvHbRJjHoA7`OfgqSaH7 zlaeNCwl%3q?_fV54vl-I-fCZSG+UkEDTn5)=d(oIE&1#rUgG0kbXZw!jc>{K4w&0@ z%~tQ@sn61>d0Q!<)_vcm^=QHSws}$Oy)S|EYawkHSi_ti_BChhD)cM6S>Q~ACp9SI zcyZrFs?X~{f+Mhr(E@Nn)r>5vMg+OU7^f9Pv*6p#dr-tjs2$rpVcjP1fmDLW)<|5- z<}(^a5MoYB44_8_a~k<{TBQYw5Lxt3SzIT=M?6KGFLv*+MKFho-nnd^CP@T0l%fw; z9f=~5Bed9w>#~+b$CKH#Vxto<4Mm(}@+F!lU^#{sEJk3IQPg=_odNABMp&@{BJh

`SOz?y|pM{0tk?I8Y z3U_P{b|dEsBIgQjb1q!>jw);H?^{7=*{(88j7zW|$OTD}na4_jOTe6%Z#z0bGu#=n zXz&ol6nmTdneWUS(MyTw^i=fR)GTwyrrw=P%*|YyYIo>6@MMZ2244eD9e0Il&(PBO zYS->h>zoRkcdQ$UEnZt6f!JPm>oE}OR<>9o)U-pB;3Kcuno{Hzl*pC`WM3^>qb)2g1K2znG(P7%=`O7xegp9R5eu=TleRL@e~ zY*bydWX7{}p1up+pe{bZA$+2bP>XU;n<@& z8UfSb&yFTB_7=L70PAW7uYNul=fTqdEdl5a;1Q97V8qNuuEKE~BHch+_?yC$)-#C| z$;`t6(UVqh)pZ(xcVJF9RGB1W(v$G3*z@5Pb*%WouMdARcIAz+I4b)90x=_)6Hb^w zzBA0#;5R&Lo+66S53xrGC$EEbB1dS#Lc|x7^yd-u#@icS6JF4|wn_VE;`2#+V-%2d zTdepq_^SqAERy6WP4o*%=gdL`{;Rm2H(*~{02XfK)FgmBvjdXqIp%?LSDx@A7oCLX z8n{0-_}`Gv)5H>%Ox<-qHBm z!n!NWX+%u%wU-UA69RLz6wo~W#K|eI9Zu)d_%noJ`*3^}f1*gh*T5(N*7t;~E^oD` zx7xF-8a!Ns!>t1ld~Qpy+Z~%ep|}6R==$VzB{*Z1)6B;3=wsXC8{f`WhEJ`_{j1Tk zdiabTZZ(ff43yk80dBRR zL!2ln04d@`txFb1mZ3k~YQBEaxe9&YRy%4F2TDC9ttNm{b9rG_y(Yk|*54ual?wk6 I;KrTyU$OlTbN~PV literal 0 HcmV?d00001 diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py new file mode 100644 index 0000000..6c85771 --- /dev/null +++ b/app/models/fact_check_models.py @@ -0,0 +1,109 @@ +from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict +from typing import Dict, List, Optional +from enum import Enum +from datetime import datetime + +class ErrorResponse(BaseModel): + detail: str + error_code: str = Field(..., description="Unique error code for this type of error") + timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) + path: Optional[str] = Field(None, description="The endpoint path where error occurred") + + model_config = ConfigDict(json_schema_extra={ + "example": { + "detail": "Error description", + "error_code": "ERROR_CODE", + "timestamp": "2024-12-09T16:49:30.905765", + "path": "/check-facts" + } + }) + +class RequestValidationError(BaseModel): + loc: List[str] + msg: str + type: str + +class Publisher(BaseModel): + name: str + site: Optional[str] = Field(None, description="Publisher's website") + + @validator('site') + def validate_site(cls, v): + if v and not (v.startswith('http://') or v.startswith('https://')): + return f"https://{v}" + return v + +class ClaimReview(BaseModel): + publisher: Publisher + url: Optional[HttpUrl] = None + title: Optional[str] = None + reviewDate: Optional[str] = None + textualRating: Optional[str] = None + languageCode: str = Field(default="en-US") + +class Claim(BaseModel): + text: str + claimant: Optional[str] = None + claimDate: Optional[str] = None + claimReview: List[ClaimReview] + +class FactCheckResponse(BaseModel): + query: str = Field(..., description="Original query that was fact-checked") + total_claims_found: int = Field(..., ge=0) + results: List[Claim] = Field(default_factory=list) + summary: Dict[str, int] = Field(...) + + model_config = ConfigDict(json_schema_extra={ + "example": { + "query": "Example claim", + "total_claims_found": 1, + "results": [{ + "text": "Example claim text", + "claimant": "Source name", + "claimReview": [{ + "publisher": { + "name": "Fact Checker", + "site": "factchecker.com" + }, + "textualRating": "True" + }] + }], + "summary": { + "total_sources": 1, + "fact_checking_sites_queried": 10 + } + } + }) + +class SourceType(str, Enum): + FACT_CHECKER = "fact_checker" + NEWS_SITE = "news_site" + +class FactCheckSource(BaseModel): + domain: str + type: SourceType + priority: int = Field(default=1, ge=1, le=10) + + model_config = ConfigDict(json_schema_extra={ + "example": { + "domain": "factcheck.org", + "type": "fact_checker", + "priority": 1 + } + }) + +class FactCheckRequest(BaseModel): + content: str = Field( + ..., + min_length=10, + max_length=1000, + description="The claim to be fact-checked" + ) + language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") + max_results_per_source: int = Field(default=10, ge=1, le=50) + + @validator('content') + def validate_content(cls, v): + if not v.strip(): + raise ValueError("Content cannot be empty or just whitespace") + return v.strip() \ No newline at end of file diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fe153f4fe7eb6088bf019108f8b358e5f779ac9 GIT binary patch literal 3341 zcmbsrOKcm*b(Xs%S4&c)C|R;4#aPEi0ooSnG)dz&t`Ui*Y}cvmin7xXO^GFUNUglw z-Oemclb~9`^&vq5q(Ki>krW7!iw&!AjzQZ)om>JGC?v#)*{zBM=%wzZv?Vm3_Ru#& zQgQ^Rmkuy9Z{GjSzV~MT7>Otd#-E;jv3xg#(BIghH#h|j8!-W)Ye+}BV4{kU6Dq!( zPhe-CDdt4jMN_KybAFDK%s?fW3s&TuTf9XIx70gFkc!$Xc74k4mJ^1 z7KVG~ws$?S61*vP&Tsf$+arP0BGTor)v5bkJO>O<;z9%5boE*Fe5PgaZrz9 zp`_@st5Plsun!0KfSQs|9{^0O?khsH%!3-NJtz^s$4n)Bo^-+}x}NmBLEV{z;6+Z& z&7I3!E@Hrcj!ioXgIdYqoZ(W{G`St-L);}*citu?h#Z3C98nW% zI(2PgboD8k>2kY<)d&@AyJ8xba1ss!*h)IY1_c$(w3+ZdNHCNTy?_oV!}25-Lic2c z7@*FrC!&p^TCJ9J=drFCW}UhkNm|%#?CoMphNW3WLo@MZhhWM>O&0XV?k?T1bnIYQ zEVrO@Z@E5wUc0Ed#3)oP%&{@X4?b&U41CN&oMG$&#pDtuUD*MKAwjzSF3SMF%qlHr!PHxOl& zx}?q`pj{8RB^}dw{z;i<>bCcVE$YKw-1Bwvn&(}fCmy7fr!cK~YPgtsLCrDp7jfN_ z3mV1wDlt7jq%Ns@5@YZJ1iMvY6(v?8-4TZU=P)<^0U!Ge5}>2>p(Y}}ra(h3O8`D} z;bs@UA~aFcr~8`X0`q9c&_!K(%fBeO{vKf=(1W)3rqGmD{5QMui~gp6p>LbogKl;K z0RbJl;Lg#bXVHG-ZpE~~BHA$yAd*=OxX~V354ZzcaPZNQya}Dn9qe&61HJg)M8B5w zK3!P}^?KpLt{&I+-t%oG+>7#xphp%1KMm9bcZ&`DdipDV^>6$2DBF7U|FK&`m&F%R zP56yi6QF^eQ&bq!7@C;MT8PSXimD}IS5!@<4lWwdNmQ0PDl3_?3YD&&wC%Eq)oCWd z+ow8t?lf1`cpM-V@=vvi$}&(rKl{9D*`V3BN=CU#u-@=vYvO$N9(%+Reoq8$!Y}nbDUl#kAwnPVBpup&B1l*U zlQ>)01(G4QjKjhVb>A=nQzpm0GmI}!rl7auRMn;P5Li704})WvrbbdMe*Q62n&YHu zxWF3AN=)1uyD`?Y`dhhYyXa6wpy9@7~S{=!>#-48N+P@i;hWbB~QFQPX{ngTDBp6lNDA=d`8w`hs zzp80{WOVt7UtVgB9r<5^Z4s>hhkgrUz9OWb6s}H=Opc@5V+_7MKA1it-cC)Tsg(GB zDh!ABpFNU3Bz|zn5AX*^L|`g{mwfC5FHo`h2iNR9b>ilqEFw3!x zQCP0Q>#zX(&H>g0_|YN}AV6sfmnjod$poxl%EwHtQqZ;IjeU^&d2l7)btQSsw2PWa zj|0$QVOK&~e^^4-e{d^zBe#S$-~>tu`=NwN3}(-spG{}7UZ4{Pm<_|(#w_0YgC4z2Bf=1%D7s&tgFyiN>w z0r>ra#zNSI6ZWmh0a(0XNrM7*3|^<+I22H_0_A1W|`U(vo#5dH|^c6eY!`ImoaIPhsWeqTg=yO%Fr|K8oh>9xb@54Dfwa}PwIwUaT% zH8{9~>$$bV&wcpB$MW|dh(NnPaWLdt3bubJBm|`uiM2)8KkXlA`(cM74@cWU2FWNi z+U{eJf&$7$h(Q+wzwqtlp=En*PjV9h{KbQc4=jK2;2>zGEW&rjCyz_F{o_+&;J?kq Bfqwu1 literal 0 HcmV?d00001 diff --git a/app/websites/fact_checker_website.py b/app/websites/fact_checker_website.py new file mode 100644 index 0000000..d6fae44 --- /dev/null +++ b/app/websites/fact_checker_website.py @@ -0,0 +1,99 @@ +from typing import Dict, List +import requests +from fastapi import HTTPException +from app.models.fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType + +# Sources configuration with validation +SOURCES = { + "fact_checkers": [ + FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) + for domain in [ + "factcheck.org", + "snopes.com", + "politifact.com", + "reuters.com", + "bbc.com", + "apnews.com", + "usatoday.com", + "nytimes.com", + "washingtonpost.com", + "afp.com", + "fullfact.org", + "truthorfiction.com", + "leadstories.com", + "altnews.in", + "boomlive.in", + "en.prothomalo.com" + ] + ], + "news_sites": [ + FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) + for domain in [ + "www.thedailystar.net", + "www.thefinancialexpress.com.bd", + "www.theindependentbd.com", + "www.dhakatribune.com", + "www.newagebd.net", + "www.observerbd.com", + "www.daily-sun.com", + "www.tbsnews.net", + "www.businesspostbd.com", + "www.banglanews24.com/english", + "www.bdnews24.com/english", + "www.risingbd.com/english", + "www.dailyindustry.news", + "www.bangladeshpost.net", + "www.daily-bangladesh.com/english" + ] + ] +} + +async def fetch_fact_checks( + api_key: str, + base_url: str, + query: str, + site: FactCheckSource +) -> Dict: + """ + Fetch fact checks from a specific site using the Google Fact Check API + """ + try: + if not api_key or not base_url: + raise ValueError("API key or base URL not configured") + + params = { + "key": api_key, + "query": query, + "languageCode": "en-US", + "reviewPublisherSiteFilter": site.domain, + "pageSize": 10 + } + + response = requests.get(base_url, params=params) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + raise HTTPException( + status_code=503, + detail=ErrorResponse( + detail=f"Error fetching from {site.domain}: {str(e)}", + error_code="FACT_CHECK_SERVICE_ERROR", + path="/check-facts" + ).dict() + ) + except ValueError as e: + raise HTTPException( + status_code=500, + detail=ErrorResponse( + detail=str(e), + error_code="CONFIGURATION_ERROR", + path="/check-facts" + ).dict() + ) + +def get_all_sources() -> List[FactCheckSource]: + """ + Get all sources sorted by priority + """ + all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"] + return sorted(all_sources, key=lambda x: x.priority) \ No newline at end of file From 1a1a713e0fbe79edef9e90764568a3a2f7c9b8fc Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Thu, 12 Dec 2024 17:31:44 +0600 Subject: [PATCH 2/5] base code added --- .gitignore | 2 +- app/__pycache__/config.cpython-312.pyc | Bin 507 -> 507 bytes .../__pycache__/fact_check.cpython-312.pyc | Bin 3800 -> 4224 bytes app/api/ai_fact_check.py | 112 ++++++++++ app/api/fact_check.py | 52 ++++- app/api/scrap_websites.py | 160 ++++++++++++++ .../fact_check_models.cpython-312.pyc | Bin 5577 -> 9611 bytes app/models/fact_check_models.py | 201 ++++++++++++++---- app/services/openai_client.py | 173 +++++++++++++++ main.py | 4 + 10 files changed, 656 insertions(+), 48 deletions(-) create mode 100644 app/api/ai_fact_check.py create mode 100644 app/api/scrap_websites.py create mode 100644 app/services/openai_client.py diff --git a/.gitignore b/.gitignore index 21d6e87..cd4609c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ env .env test.py -/__pycache__/ \ No newline at end of file +__pycache__ \ No newline at end of file diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index 267bba1df1c923dade63cc9cc26619d42bbdc2a5..91b068866373f8d9210744c00633448a6572698b 100644 GIT binary patch delta 19 Zcmey({F|BUG%qg~0}y=7-^lfm5db~D1_J;9 delta 19 Zcmey({F|BUG%qg~0}yNo-pKWl5db@^1*QN1 diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index 56cc8981234a04f5d9b6da8c11310f6f86677fde..d2530f5d85725dc2f22291a59760ae01d00a57cd 100644 GIT binary patch delta 2399 zcmaJ@U2Gf25#Htf@b~!tCy|uNNTMRkmSszhklKxFJC^J?jRL7LP_~J3=B`xAJf3=Y zq+J8LDiY+O=-AAeYwuLUprlMWVMe)_;`lC$8100G36U6Q-t zmOKTIE%RlsI1NpIl0yr?oqTogL5puWD2SY`jxvBe!PizUet zPAH13OyjoJ^CS5juDqg(^B;T)b(l?%XqLNlIs z+4B*Ga4fkvop~O-N^-CBMvxmn#jmr(i>$W*a$X?5zhmBIe#vp5?nEx#Sp$l$6RwL` z5I>FhxwMe&KnO~efbO~ue*+iX#+#1rM%}H$g-B=n@m+VX(=5m$Z`rN8Yb2}H1G*cz zDKPR-TK6EY?jaBCK|Y=ER7a$Q&VDCi+XDHhT^s!;6kebp1YXp+|KTV+;X*uCYb;oO zp9MNp2Q-eL*$V|V=OaFZ!f*3q!zhBHI)`E?juKb6x=+hIvhX1CyOHdaT3yg~8BcRP z4K3F((P8Hdbg28Z-i}`D?{GTXvFO}&P8S{{{6EXbRyx!1MqO7DjvL+UuAK+e{kne! zjDt&Xwa2*2_l|lsT8j2Uew|-PcBIGmKL8ZaXzAHY_ZqvMsf_ncFTuEooOQx2!6onp z^JB0C-)ENKVA?qA{66e6t~$qI&M=+(gdyVklEoKkYp6!QyKg|y@EclHEKiGCsWQJD zvfCxQsK`aEszj4rc6=|Z^cs6z2O7)CQx&y3H&-f_a7D|Ws?5oXMEvB6#k?plE%zJ~ zRgAK7B`em%QdvA(#@RX2u-PK*SarGoC_*J_E|#-3tdt0l?VXjx^VzdFTfrD%G{{*_ zvWw^d@D%0BTnXVy5nHYqTx+v#1&gX&A>mm*sbt4FTO!d6GL}F>mlm~IjrO@}g^ENf zWBSmJ%BRViSZ+(kzufzv%NX|bHzH4lG^=2h^uOg)<*GuC!t#+w=uyp9Ns5-&h9|r@ zkBLoHl0@bFpTJ;5q3>65Q$l-L5eTtZnu`*oa)8QC8>^xzJ}U9z;v%jf^YE+J7Pz&i@EUn}7y3u5Isi9(s6p}2^XJ$O}`V)j3mJuoe= zT?VtXB2?vdVnu6Q9dH2f5Ae+jXr+NC5?eWKCb~A+sBy-7IJv``(fG;_zYJ#{f;1QV z72k5Cj6ZsZ8ee95ubymX_C5rj&9M!Hgz}svYFbwHurgIEN7+%@1;kY>5+~2%|LVi$v>x0wPG}s zZ(0cw=!3&X+_$&Uii7B|8BdwfoEbZ4CXYPu@+p3kO>BojBxA-CW_QYr#kZYogkL$m z?FYe}>5u(Z{}uf^$yPCK}`|J&`|#&zG}SEk6h|Kg~3 zG7fL~dndwj+W&g~H3*}dyY{t!&5WIRXft2M+z!Ws JDAn1|`8P@mf6D*> delta 1958 zcmZ`)YiJZ#6h3!eJNtfQH%X0oZFY^D)%dKoO&S`LXfYb0R&>`&Ty`dLlih4OGcj6s zo1jwq2-t9`3bj89h1$mcDu{(bRkRdZNaVhSP}5LK@14!W{Adr%x96U7 z?z#88?vB>ZR=R$4I&DCx=U}Y&d;7SnocSBZH*Oz6Ac4fEEg3N`W~_0mu@=&{j6H78 zIO2|sGw#f|;x0pONxL(ixF_R{dl4`oqY3JgP~6ABK9IzVAX#O$Z$nYK7AM%^D(;tk zebH4-=PD=JWV>XST`BY*=(xt+aGEY8m+X+-vb~3sJQumuE0p3*XorAwu055+{b*|w zr+fD%l3Lp-Iobc3tPW-eR9W||5xqGitD4TYr4y-)?(9&Mtg>SF^&cpQPyqL{RmhFs zXWuvwAQ#o0-QCG_LRFJ;I^EqJK}tE1s))cV%5wx2j|ncSa1vO$2u~T74;6rbvd+-s z?OdwNPS^|NTX@I-bOM4a& z`wq`To|D8JcN&zJa)hH-XopLDlIevcbE1Om=QXdfk$~h)L5|1Y@-?<`W~`EfypZSd zL%wXYFhN5ggkY?ngB()pHNGez+w7WUm3K%^{DgZRRpK~bHKJ9UiX1XscCJvqjdN$zXR4UZXlUd)MyVKm*wU!5~6ip<7uBEHdIUsOtUox&-ARsnflA<3L^DB$iZ3O zh*!W4YiI6`xAPBPK2l&|L-h05r?Kn#TgokI zdRMsZcI#Al?@W0ARCxanO_Rss)8XSY;qIw$_vFc*$y0qZ;r=)IpCohWJ#3U@ZSDc@An9un?W zlo0-IW9v?0bT5d=;$DcS81Z#9@b{WR9bx`{wLrA@!#t7PZ(s=@DZ_VN(GuO37#wUQ zb?!;^uJi=v-9Zn{ze4{OjoK>?eArzt(56sac+?$gpwGF^4Q5r%sD#o&RmK}mM{rqe z&Sa&bwA`Y^@XzjbzTcqicW8aeFo;^`|+4?_OQ>MaE@n4KN-2D1>bGR&Nx I$_$tP0)hR%eE FactCheckResponse: Check facts using multiple fact-checking sources """ all_results = [] + verified_results = [] # Validate configuration if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: @@ -46,6 +51,8 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: # Get all sources in priority order all_sources = get_all_sources() + all_sources_list = [] # To store source URLs + contexts_used = [] # To store context snippets for source in all_sources: try: @@ -58,11 +65,17 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: if "claims" in result: # Validate each claim through Pydantic - validated_claims = [ - Claim(**claim).dict() - for claim in result["claims"] - ] - all_results.extend(validated_claims) + for claim in result["claims"]: + validated_claim = Claim(**claim).dict() + all_results.append(validated_claim) + + # Extract source and context information + if "claimReview" in validated_claim: + review = validated_claim["claimReview"][0] + if "publisher" in review and "site" in review["publisher"]: + all_sources_list.append(review["publisher"]["site"]) + if "textualRating" in review: + contexts_used.append(review["textualRating"]) except HTTPException: raise @@ -81,14 +94,33 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: ).dict() ) - # Create the response using Pydantic model + # Prepare the verification result + verification_result = { + "verdict": "Insufficient Information", # Default verdict + "confidence": "Low", + "evidence": contexts_used, + "reasoning": "Based on available fact checks", + "missing_info": "Additional verification may be needed" + } + + # Create token usage information + token_usage = TokenUsage( + prompt_tokens=0, + completion_tokens=0, + total_tokens=0 + ) + + # Create the response using Pydantic model with all required fields response = FactCheckResponse( query=request.content, total_claims_found=len(all_results), results=all_results, + verification_result=verification_result, + sources=list(set(all_sources_list)), + context_used=contexts_used, + token_usage=token_usage, summary={ - "total_sources": len(set(claim.get("claimReview", [{}])[0].get("publisher", {}).get("site", "") - for claim in all_results if claim.get("claimReview"))), + "total_sources": len(set(all_sources_list)), "fact_checking_sites_queried": len(all_sources) } ) diff --git a/app/api/scrap_websites.py b/app/api/scrap_websites.py new file mode 100644 index 0000000..8a1f48f --- /dev/null +++ b/app/api/scrap_websites.py @@ -0,0 +1,160 @@ +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel +from typing import List, Dict +import requests +from bs4 import BeautifulSoup +import urllib.parse +import numpy as np +from app.services.openai_client import OpenAIClient +from app.config import OPENAI_API_KEY + +scrap_websites_router = APIRouter() + +class SearchRequest(BaseModel): + search_text: str + site_domains: List[str] + +class UrlSimilarityInfo(BaseModel): + url: str + similarity: float + extracted_text: str + +class SearchResponse(BaseModel): + results: Dict[str, List[str]] + error_messages: Dict[str, str] + url_similarities: Dict[str, List[UrlSimilarityInfo]] + +def extract_url_text(url: str) -> str: + """Extract and process meaningful text from URL path with improved cleaning""" + try: + # Parse the URL and get the path + parsed = urllib.parse.urlparse(url) + path = parsed.path + + # Remove common URL parts and file extensions + path = path.replace('.html', '').replace('/index', '').replace('.php', '') + + # Split path into segments + segments = [seg for seg in path.split('/') if seg] + + # Remove dates and numbers + cleaned_segments = [] + for segment in segments: + # Replace hyphens and underscores with spaces + segment = segment.replace('-', ' ').replace('_', ' ') + + # Filter out segments that are just dates or numbers + if not (segment.replace(' ', '').isdigit() or + all(part.isdigit() for part in segment.split() if part)): + cleaned_segments.append(segment) + + # Remove very common words that don't add meaning + common_words = { + 'www', 'live', 'news', 'intl', 'index', 'world', 'us', 'uk', + 'updates', 'update', 'latest', 'breaking', 'new', 'article' + } + + # Join segments and split into words + text = ' '.join(cleaned_segments) + words = [word.lower() for word in text.split() + if word.lower() not in common_words and len(word) > 1] + + return ' '.join(words) + except Exception: + return '' + +def google_search_scraper(search_text: str, site_domain: str) -> List[str]: + query = f"{search_text} \"site:{site_domain}\"" + encoded_query = urllib.parse.quote(query) + base_url = "https://www.google.com/search" + url = f"{base_url}?q={encoded_query}" + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + + soup = BeautifulSoup(response.content, 'html.parser') + search_results = soup.find_all('div', class_='g') + + urls = [] + for result in search_results[:5]: + link = result.find('a') + if link and 'href' in link.attrs: + url = link['href'] + if url.startswith('http'): + urls.append(url) + + return urls[:5] + + except requests.RequestException as e: + raise HTTPException(status_code=500, detail=f"Error scraping {site_domain}: {str(e)}") + +def calculate_similarity(query_embedding: List[float], url_embedding: List[float]) -> float: + query_array = np.array(query_embedding) + url_array = np.array(url_embedding) + + similarity = np.dot(url_array, query_array) / ( + np.linalg.norm(url_array) * np.linalg.norm(query_array) + ) + return float(similarity) + +@scrap_websites_router.post("/search", response_model=SearchResponse) +async def search_websites(request: SearchRequest): + results = {} + error_messages = {} + url_similarities = {} + + # Initialize OpenAI client + openai_client = OpenAIClient(OPENAI_API_KEY) + + # Enhance search text with key terms + search_context = request.search_text + query_embedding = openai_client.get_embeddings([search_context])[0] + + # Higher similarity threshold for better filtering + SIMILARITY_THRESHOLD = 0.75 + + for domain in request.site_domains: + try: + urls = google_search_scraper(request.search_text, domain) + url_sims = [] + valid_urls = [] + + for url in urls: + url_text = extract_url_text(url) + + # Skip URLs with no meaningful text extracted + if not url_text: + continue + + url_embedding = openai_client.get_embeddings([url_text])[0] + similarity = calculate_similarity(query_embedding, url_embedding) + + url_sims.append(UrlSimilarityInfo( + url=url, + similarity=similarity, + extracted_text=url_text + )) + + if similarity >= SIMILARITY_THRESHOLD: + valid_urls.append(url) + + results[domain] = valid_urls + url_similarities[domain] = sorted(url_sims, + key=lambda x: x.similarity, + reverse=True) + + except HTTPException as e: + error_messages[domain] = str(e.detail) + except Exception as e: + error_messages[domain] = f"Unexpected error for {domain}: {str(e)}" + + return SearchResponse( + results=results, + error_messages=error_messages, + url_similarities=url_similarities + ) \ No newline at end of file diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc index e2c8b7cf78c3bf747b3006eb0a7078967671dd0e..239346e1dd0ca70c102289a9908f973365f9b466 100644 GIT binary patch literal 9611 zcmbVSTX5TEb_Pg*cT%KCQKBTupd?$O*ODwLcH*qoVKnZl3 z%FqcqV@X&t)`T^~B$y1FU}>zgq-_~{!k%#?92sZAnQ>a1x`#+&exGL!aY zdZv+0(MKjEh-3w4eXY#&mBU8Fy=8t+X62-g8zX9KsDa9zN4H*kZ5>jAE}fg2)R zA8=b5xNU^%2W~(Ni{8(e+WXoGzZLkwhSnW~8v<@y12;mrVc@nma61XN1Go{fv%Zcl z!tDfZR|B`3aJzvUZ5Sa+xIMrg?q#JjH39naUP2aCg7}qAF!XEEnW-o|hFdW$d6qYsW|JybRBGD3MCg z0+p~_2JuN)MMh*Jz!z_Oik+in0Zmx;HQ9 zGI=$rB45!1DR@f5>~dL?Vm=hh}~ zpW0yhwSmD3(^kH|ZoPeTgBjF%`&Q1E2iL}L4{b7ia+le^=`e6Gymr=rG#-WsMa|Y6)zH@=Iz=lvVn%S++PDOd z;1zsAi{KXmcNvMk*IFOVU3A8Sx_3&JbMl<1~`el+c}*fZUdT_|f4LM~^*w{8-5|LS%UeXQRX! z`2a4u7s()!I1o6Uz*Pm*Qh}x!J}F*TWj^kaaj9|`NjnhTs;IJzvLmBS-ZKW#NZF3< zJVb5aaM6hE*<=~q7)6K|3d$8Aph%vU4_IA)ze8#Y@C);R+Tw=7;!Bvq6-n1#+@G1VySs18IWj9K2G<$)XY)w*IpZS4|h za}G-QDJ?AZ7GP6u&_AdC#xhUcpg~Ejvybgd;EA3e8F}o$f&wKFW*5k-LoSJL%i$^%I-nq5FZ) zyMNjJ#nsKx8Lch4#;?tnPgguKt;N6UAO{zB?GZ$I7=~#O#BqE?hae*+lE06nS+(U+ zD&N3h>>nCF@Bf?a0> zC)R+MEYo>eBe0H$_&V3S$cHFo3Ovnxs z=bTO<56J-hl#4*%oLV|Im<}x(TbW$#C=Wbvc5W~+t?R(b)arBP$y?xi4`^IJ*bK*7 z@U|B{ao@nMUmkea5nW&0>=*`b+_%XL8)t~G*#8Xio!@zertX_(Xos^gdE^X5@Ue~d z0nWpwv*bK<26uT@cck!ict9^sQX(hdCPm}OzDwq3V1~y2B7~EjNXWkJ*~@cSfn)@J z3Z6hs=Cz*QmFd-`vT`f4!Sree`oO+kU$fo<*wUv3+RE=f2=s0;y~a-A08Kk(TAm$* zSi3ug@DU`HDO%%`Ab{>c$7A3rFo4dixU3LMF&k(1$dK-NX=3tHa&mfV z@|CGM-8nn;`h0SJ=F-$&DY*b|mXRETpW*@XA*BUlE9W(T*ZVWZys^4z-X=)cK?&TQ z_ho7qYl$rA5COh`x@>|K{xSF1I?lJl$ua`bzoI?v&|6+I;iIu!z(u(XFJp_RBZrj`jCS9a! zsFZC&rCx$cTVE4pX+cU6_1ciV8lgIOss7l4PH3hycnLkP&F_-i7sLI;^kOBtya24R03AC_3( zp-)-)9hj(`MDoW-cqCVlpksd0rW5HSeRe%A>5VF97#`A__+C5m(A4ae(zlU^1RaA? z0P+#_FxasctQ>xIGk9_3-0J)WbFl{hPH%=^FyP+{nlD(MTnnwGeikXuKJfLSb!ut- zU{>orWJXpS%%K|mYmaK3oEDj`bj)axi(2>jZMO|UpaVgmV;2PCJeowmo<=zjHT#Kv z%?RLLz?Cq>-*m!u!DhN(!QKG;97P92vmuQ3>qcTugiuLQhvnB{9O*x-6jkcRXn@E7 zr{=gTASz%{h6r94g}7CBWu$B}EoK)X7I0$S2RgAsH)$b$-!{oQDBKcr~lQ^l>q zER1UxJ^qm1A3d7A04-W(ONS?ow}Sv;^o^4jA=odXAYLsfDu^$@K_$c4Z32@1I$o?TtaNbS{oC8@O&q8X$O`pMi~lc|Z!LdZ{HnFP5}ny- zeYxU!8IUA0e_X6MI*9$-$LtbGzY1eC7+?Y>v&h?Hb{CO{i~))sMlqF%F;QS_IP6NETLoHG_A&xvty$!l^(|W`D z|FSmL{Sr7rW2_1zW2}19>ZDZkGvvBqH8=DobA!2R=F-e+xGTN}`^Fe!n0>&?S+2Sp z8rO=uqtevJX@8@=v;R>Uv5f$vC2Zahr@FQZ8XzpJ`%+Bnp6xj$5}G zO0CnPg1WI0kk2g>K1pjRigVEE37+b4ed z{jb7YC3bux{A|Vl?3RrRwcj~?`*fxE#Afh$t%F++uCtZ!p|W+0v3V!xuOhMazMn_S zHmyB+=jKmte%0Au8JPG@-*4I~2hVMEPFF(Hn!o3F{(}$v2iH?y`VVhK@O4`pnKl5wli_u?QyT4S4y|58}vEqO6zqYK{oS&p6L?v%fvnd8VEHr*;;~1PngzpUDampXC;Wr5Q6AQkGtj9vSwZ~Wt$|JW95ZS!rj`@%Kz zt?*9cGPQgM4SJ*5-Y<%4h|Ec}-tcV3FLr@AOv6~;wF(e-fWh`c+~Fgc@D@{l6ON)k z9?;wNK$2QUoCRnHnO-;f)R8RUHxS)!9G9Yd2%fpF0$&t}e3ZJTjKnTzGaO?E;}fv( z1g#ez39vL-0rKdtfk5=wii{uL~NcaA|GKBBtM3qf`|+vQMd1dm$g9G2GgZ^1LeM3 zZ*DMQtuh{_W}d2oiBhqRWKk0CpwH^l=$*mxDSF+=R=5Z3JWB9Yk@0M4|tu80)ygm z7sLgG#e$*gT`N#8enx?Ns}Q^kA6cLjf?t~u7TSdl5HY(D5jvq)*Ifsog6_J2xr)(v zkKXrCAy)t+Zm$6?J=hrN)!-KhU?G#?<)R^SUQ^`A2n4p3TAO$r zN=(T;Z$zq?sV;dCp>UJv16thcH!)e zL=;Bun5|aeyCOu$5NkOM)wqq_e}d$%knAc6(V3qj&rlcl-oy<7tteB5rl6fNaIG6l zPgR^U)*|@@{FHwL0*cdN>P&xGS(_|hzdiiG8^fe$B)XPbA75MgSGy4UsL0_;`(Z6QQt3XrM?*bSZ=W`T<97JApFM!;8AJ7q?W!lP zs7XC>7x3Cai@sMqMYf(FGQWA+@tdc>pt6DT*wH@UW#JhCHBE9u_zY@?`5~~_0ihLW zPza%-fs(b~#f(u$UAZtIkLbZBWMU|mangrjTJi@-aN35b#`Y1_MuW{?3gCdx;&(BK{Rt0102 z{&QUAUjQk!kwCuLRlv&jWsg1j7wm^RCmn$RfT$bfafw^mPh(J7o=kG?DhJ685v_Tu zhOFTmk^D3K6!ci2A*cwIzGIc1V-F(7H<;t5Gx8^NoK%^A7+fiJ~j&J*XWVG+op7NH@xQ56XJauB_wS zX5+n()-4L)dZm7rE14e|u?RA?IM;CDiQofJT$R3WJ7iT-uP1w zG;nI_(VYB%kL1RmWt^7(7KQ~Gpp<_D0?3%Ae@{()NlpEp8vl|S|C(z1nu>l+#i}+d z-CvGWDY#W#UfN!ssZwyOat=CBc0zWJ4pc)ndZZk$QgEyK8M<#3+Q6;qvC!S+U|FbA zP^dbr^jKNKk;bZ?7TUfBqr$D)@1Y}WC#n?Os@)cPg04hIvBv~mbsVJamB>()f_wEM w9e_?SG~B9(2B4;Ayh_2nI!DLgsdEI37;WF0rm4{33gy>A-+Mp>NCM%%0pGwD3;+NC delta 2687 zcmZuz-ESMm5x?UddB;zYBK1K^q8Un#e6dT_a^pI7KHS)Hq{^iV*GOR{F>~Rr=}Y(_ z@15#U50!)1fSkU_*cTTS+9W`N7;*sx$wN@|A83LUE~EnV0jZIv)}&N32$j{~3Rug804SJ#}6v5dfF;F zu6ow89ao=QaI9L@EE}?`PE{8x@YadxP{*p!Pqvh+av}uoT%}sO>iR9acB#fHro*38 zW_i_{mHqsdH|-q&lRNxV?|V=5fW&$ch7m?kA-l2H?OL^97cbL_S)gw^%rruL+cP%q zDz?MioX@igd#rK9!C?fo&U{apsAN%9W zNzhX-k)Ou53X*46Qc$X^hS%*sN3SeW+j-F}TM#w4pJJ@WT%}wqx=O_^xvEoNpn1c` zvgmIVVGO~Kupi-Zgs#OI(jg!X(DcF2Mz|q0{Na`2&E(tphClT7bR!VHKGTZF;B(q< z23)x$P*tPG15yKj1K7^wP zU5m>hhmhju{m-;^E{8kEH~jx7bj2x*MTp07n3&m%(g_6N9CwpFgCJIL5~VJe$B;u9 z=FX&fTlO`#LVJ4;gP69{de_sc}EwZK^hXJ0R@XI7zH|N;2tNQ%Z6gl>KA?=)pDBlJ6#Wr;OH9#t^?U z7GKUSjm5q^Y_0mvJ%?(!8SBkcE5 zk0V{hs=^^vhC`|~i6{U}Y->6lB@JO5p`$1$eowby5(N7LfE&{NSpTYh??8SlHg|1m z<;bQwcRxM)?x~M{u$9i;Q~Pf#n`-Xjst5R?%!zDvv@vmb+p7#lwnussfp3(=VP8{9B(%5AwmN&7-w3@N%;I8NM+g6V zXe8gUSk%1OhCB#19=F$}M*OjBXB*L>w|0&QwE0#=W9axidKFH)Bwq+Wk%i**6OHJ^ zPZSbZQ=qDXyHypahm0`)bvU`q;AoN<`eHu@xTCP=Q2rr+>nT%LUtm^^Sx&nuu+&7Q zQtNLDwA1kU z%g{>vud&IV7z}Z>Vy)`Xs>7b;mBi2}Zn~>2n2tkP)z!;pwX|rKs7TC1tXg)FZzP8J zzqK?!-TTY2BS6{M-I`c7$82{a1itmeSvc<>ahhD?8FmHsqvm(+U zx@0duC3I*Zh|Gw)qk^AZwfY)z2nG1r7x{n-jV68#LTtnmExv4b3Z%2 z;n}$I*}1Lk$$M(*_Nh(vxTi|*86kRTY5cMU>My&ALtNxGvbfrcX%k)`x1%~0-Zr=d-Mv-Ot zfi(4JY3hMA{wHbtE2-}*X=tRWhGlJarYXUznO5Y;?F&r_Ud;f1FgEsNO4e@sn-aX5 i$2{`zYJ8P6B@mi=5QbSz30}=qL{6^O|0TgoMEE~GjXT-^ diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py index 6c85771..bec9977 100644 --- a/app/models/fact_check_models.py +++ b/app/models/fact_check_models.py @@ -1,7 +1,14 @@ from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Any, Union from enum import Enum from datetime import datetime +from urllib.parse import urlparse + +# Common Models +class TokenUsage(BaseModel): + prompt_tokens: Optional[int] = 0 + completion_tokens: Optional[int] = 0 + total_tokens: Optional[int] = 0 class ErrorResponse(BaseModel): detail: str @@ -18,11 +25,7 @@ class ErrorResponse(BaseModel): } }) -class RequestValidationError(BaseModel): - loc: List[str] - msg: str - type: str - +# Fact Check Models class Publisher(BaseModel): name: str site: Optional[str] = Field(None, description="Publisher's website") @@ -47,11 +50,116 @@ class Claim(BaseModel): claimDate: Optional[str] = None claimReview: List[ClaimReview] -class FactCheckResponse(BaseModel): - query: str = Field(..., description="Original query that was fact-checked") - total_claims_found: int = Field(..., ge=0) - results: List[Claim] = Field(default_factory=list) - summary: Dict[str, int] = Field(...) +class SourceType(str, Enum): + FACT_CHECKER = "fact_checker" + NEWS_SITE = "news_site" + +class FactCheckSource(BaseModel): + domain: str + type: SourceType + priority: int = Field(default=1, ge=1, le=10) + +# Verification Models +class VerificationResult(BaseModel): + verdict: str = Field(..., description="True/False/Insufficient Information") + confidence: str = Field(..., description="High/Medium/Low") + evidence: Union[str, List[str]] + reasoning: str + missing_info: Optional[str] = None + + model_config = ConfigDict(json_schema_extra={ + "example": { + "verdict": "True", + "confidence": "High", + "evidence": ["Direct quote from source supporting the claim"], + "reasoning": "Detailed analysis of why the claim is considered true", + "missing_info": "Any caveats or limitations of the verification" + } + }) + +# Request Models +class BaseFactCheckRequest(BaseModel): + content: str = Field( + ..., + min_length=10, + max_length=1000, + description="The claim to be fact-checked" + ) + + @validator('content') + def validate_content(cls, v): + if not v.strip(): + raise ValueError("Content cannot be empty or just whitespace") + return v.strip() + +class GoogleFactCheckRequest(BaseFactCheckRequest): + language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") + max_results_per_source: int = Field(default=10, ge=1, le=50) + +class AIFactCheckRequest(BaseFactCheckRequest): + urls: List[str] = Field( + ..., + min_items=1, + max_items=5, + description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing" + ) + + @validator('urls') + def validate_urls(cls, urls): + validated_urls = [] + for url in urls: + if not url.strip(): + raise ValueError("URL cannot be empty") + + # Add https:// if no protocol specified + if not url.startswith(('http://', 'https://')): + url = f'https://{url}' + + try: + result = urlparse(url) + if not result.netloc: + raise ValueError(f"Invalid URL structure for {url}") + validated_urls.append(url) + except Exception as e: + raise ValueError(f"Invalid URL {url}: {str(e)}") + + return validated_urls + + model_config = ConfigDict(json_schema_extra={ + "example": { + "content": "Indian flag was drawn in BUET campus", + "urls": [ + "www.altnews.in/article-about-flag", + "www.another-source.com/related-news" + ] + } + }) + +# Response Models +class BaseFactCheckResponse(BaseModel): + query: str + token_usage: TokenUsage + sources: List[str] + context_used: List[str] + + model_config = ConfigDict(json_schema_extra={ + "example": { + "query": "Example statement to verify", + "token_usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "sources": ["source1.com", "source2.com"], + "context_used": ["Relevant context from sources"] + } + }) + +class GoogleFactCheckResponse(BaseFactCheckResponse): + total_claims_found: int + results: List[Dict[str, Any]] + verification_result: Dict[str, Any] + summary: Dict[str, int] model_config = ConfigDict(json_schema_extra={ "example": { @@ -68,6 +176,19 @@ class FactCheckResponse(BaseModel): "textualRating": "True" }] }], + "verification_result": { + "verdict": "True", + "confidence": "High", + "evidence": ["Supporting evidence"], + "reasoning": "Detailed analysis" + }, + "sources": ["factchecker.com"], + "context_used": ["Relevant context"], + "token_usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, "summary": { "total_sources": 1, "fact_checking_sites_queried": 10 @@ -75,35 +196,41 @@ class FactCheckResponse(BaseModel): } }) -class SourceType(str, Enum): - FACT_CHECKER = "fact_checker" - NEWS_SITE = "news_site" - -class FactCheckSource(BaseModel): - domain: str - type: SourceType - priority: int = Field(default=1, ge=1, le=10) +class AIFactCheckResponse(BaseFactCheckResponse): + verification_result: Dict[str, VerificationResult] # Changed to Dict to store results per URL model_config = ConfigDict(json_schema_extra={ "example": { - "domain": "factcheck.org", - "type": "fact_checker", - "priority": 1 + "query": "Indian flag was drawn in BUET campus", + "verification_result": { + "https://www.source1.com": { + "verdict": "True", + "confidence": "High", + "evidence": ["Supporting evidence from source 1"], + "reasoning": "Detailed analysis from source 1", + "missing_info": None + }, + "https://www.source2.com": { + "verdict": "True", + "confidence": "Medium", + "evidence": ["Supporting evidence from source 2"], + "reasoning": "Analysis from source 2", + "missing_info": "Additional context needed" + } + }, + "sources": ["source1.com", "source2.com"], + "context_used": [ + "Context from source 1", + "Context from source 2" + ], + "token_usage": { + "prompt_tokens": 200, + "completion_tokens": 100, + "total_tokens": 300 + } } }) -class FactCheckRequest(BaseModel): - content: str = Field( - ..., - min_length=10, - max_length=1000, - description="The claim to be fact-checked" - ) - language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") - max_results_per_source: int = Field(default=10, ge=1, le=50) - - @validator('content') - def validate_content(cls, v): - if not v.strip(): - raise ValueError("Content cannot be empty or just whitespace") - return v.strip() \ No newline at end of file +# Backwards compatibility aliases +FactCheckRequest = GoogleFactCheckRequest +FactCheckResponse = GoogleFactCheckResponse \ No newline at end of file diff --git a/app/services/openai_client.py b/app/services/openai_client.py new file mode 100644 index 0000000..22541bb --- /dev/null +++ b/app/services/openai_client.py @@ -0,0 +1,173 @@ +from langchain_community.document_loaders import AsyncHtmlLoader +from langchain_community.document_transformers import BeautifulSoupTransformer +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_core.documents import Document +from typing import List, Dict, Any +import numpy as np +import logging as logger +import openai +import json + +class OpenAIClient: + def __init__(self, api_key: str): + """ + Initialize OpenAI client with the provided API key. + """ + openai.api_key = api_key + + async def generate_text_response(self, system_prompt: str, user_prompt: str, max_tokens: int) -> dict: + """ + Generate a response using OpenAI's chat completion API. + """ + try: + response = openai.ChatCompletion.create( + model="gpt-4", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ], + max_tokens=max_tokens + ) + content = response['choices'][0]['message']['content'] + # Parse the JSON string into a dictionary + parsed_content = json.loads(content) + + return { + "response": parsed_content, # Now returns a dictionary instead of string + "prompt_tokens": response['usage']['prompt_tokens'], + "completion_tokens": response['usage']['completion_tokens'], + "total_tokens": response['usage']['total_tokens'] + } + except json.JSONDecodeError as e: + raise Exception(f"Failed to parse OpenAI response as JSON: {str(e)}") + except Exception as e: + raise Exception(f"OpenAI text generation error: {str(e)}") + + def get_embeddings(self, texts: List[str]) -> List[List[float]]: + """ + Retrieve embeddings for a list of texts using OpenAI's embedding API. + """ + try: + response = openai.Embedding.create( + input=texts, + model="text-embedding-ada-002" + ) + embeddings = [data['embedding'] for data in response['data']] + return embeddings + except Exception as e: + raise Exception(f"OpenAI embedding error: {str(e)}") + +class AIFactChecker: + def __init__(self, openai_client: OpenAIClient): + """Initialize the fact checker with OpenAI client.""" + self.openai_client = openai_client + self.text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, + chunk_overlap=200, + length_function=len, + separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""] + ) + + async def scrape_webpage(self, url: str) -> List[Document]: + """Scrape webpage content using LangChain's AsyncHtmlLoader.""" + try: + loader = AsyncHtmlLoader([url]) + docs = await loader.aload() + + bs_transformer = BeautifulSoupTransformer() + docs_transformed = bs_transformer.transform_documents(docs) + docs_chunks = self.text_splitter.split_documents(docs_transformed) + + logger.info(f"Successfully scraped webpage | chunks={len(docs_chunks)}") + return docs_chunks + + except Exception as e: + logger.error(f"Error scraping webpage | url={url} | error={str(e)}") + raise + + def find_relevant_chunks( + self, + query_embedding: List[float], + doc_embeddings: List[List[float]], + docs: List[Document] + ) -> List[Document]: + """Find most relevant document chunks using cosine similarity.""" + try: + query_array = np.array(query_embedding) + chunks_array = np.array(doc_embeddings) + + similarities = np.dot(chunks_array, query_array) / ( + np.linalg.norm(chunks_array, axis=1) * np.linalg.norm(query_array) + ) + + top_indices = np.argsort(similarities)[-5:][::-1] + return [docs[i] for i in top_indices] + + except Exception as e: + logger.error(f"Error finding relevant chunks | error={str(e)}") + raise + + async def verify_fact(self, query: str, relevant_docs: List[Document]) -> Dict[str, Any]: + """Verify fact using OpenAI's API with context from relevant documents.""" + try: + context = "\n\n".join([doc.page_content for doc in relevant_docs]) + + system_prompt = """You are a professional fact-checking assistant. Analyze the provided context + and determine if the given statement is true, false, or if there isn't enough information. + + Provide your response in the following JSON format: + { + "verdict": "True/False/Insufficient Information", + "confidence": "High/Medium/Low", + "evidence": "Direct quotes or evidence from the context", + "reasoning": "Your detailed analysis and reasoning", + "missing_info": "Any important missing information (if applicable)" + }""" + + user_prompt = f"""Context: + {context} + + Statement to verify: "{query}" + + Analyze the statement based on the provided context and return your response in the specified JSON format.""" + + response = await self.openai_client.generate_text_response( + system_prompt=system_prompt, + user_prompt=user_prompt, + max_tokens=800 + ) + + sources = list(set([doc.metadata.get('source', 'Unknown source') for doc in relevant_docs])) + + return { + "verification_result": response["response"], # This is now a dictionary + "sources": sources, + "context_used": [doc.page_content for doc in relevant_docs], + "token_usage": { + "prompt_tokens": response["prompt_tokens"], + "completion_tokens": response["completion_tokens"], + "total_tokens": response["total_tokens"] + } + } + + except Exception as e: + logger.error(f"Error verifying fact | error={str(e)}") + raise + + async def check_fact(self, url: str, query: str) -> Dict[str, Any]: + """Main method to check a fact against a webpage.""" + try: + docs = await self.scrape_webpage(url) + + doc_texts = [doc.page_content for doc in docs] + doc_embeddings = self.openai_client.get_embeddings(doc_texts) + query_embedding = self.openai_client.get_embeddings([query]) + + relevant_docs = self.find_relevant_chunks(query_embedding[0], doc_embeddings, docs) + verification_result = await self.verify_fact(query, relevant_docs) + + return verification_result + + except Exception as e: + logger.error(f"Error checking fact | error={str(e)}") + raise \ No newline at end of file diff --git a/main.py b/main.py index 6b79e28..25d68c4 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,8 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from app.api.fact_check import fact_check_router +from app.api.ai_fact_check import aifact_check_router +from app.api.scrap_websites import scrap_websites_router from app.config import FRONTEND_URL # Initialize FastAPI app @@ -39,6 +41,8 @@ async def health_check(): return {"status": "healthy"} app.include_router(fact_check_router, prefix="") +app.include_router(aifact_check_router, prefix="") +app.include_router(scrap_websites_router, prefix="") # Include routers (uncomment and modify as needed) # from routes import some_router From d5b977e1d49d399a2f544e919755d3b9d81cd9c6 Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Sat, 14 Dec 2024 18:19:37 +0600 Subject: [PATCH 3/5] base code added --- .../__pycache__/fact_check.cpython-312.pyc | Bin 4224 -> 5851 bytes app/api/ai_fact_check.py | 2 - app/api/fact_check.py | 51 +- app/api/scrap_websites.py | 265 ++++++-- app/api/scrap_websites2.py | 261 ++++++++ .../fact_check_models.cpython-312.pyc | Bin 9611 -> 9422 bytes app/models/fact_check_models.py | 7 - app/services/openai_client.py | 1 - .../fact_checker_website.cpython-312.pyc | Bin 3341 -> 3152 bytes app/websites/fact_checker_website.py | 13 - output.json | 595 ++++++++++++++++++ search_response_altnews_in.html | 28 + search_response_bbc_com.html | 28 + search_response_en_prothomalo_com.html | 28 + 14 files changed, 1194 insertions(+), 85 deletions(-) create mode 100644 app/api/scrap_websites2.py create mode 100644 output.json create mode 100644 search_response_altnews_in.html create mode 100644 search_response_bbc_com.html create mode 100644 search_response_en_prothomalo_com.html diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index d2530f5d85725dc2f22291a59760ae01d00a57cd..7304a380d4674133d015ebad37508110a0b5cb53 100644 GIT binary patch delta 2557 zcmbtWS!^4}8J^igyve0VQ=&yu%R>~`t>oHrqDU5GJ64>-I*g=25H&5k=B#Z>6s69t zteB-rwKjqv^@Gwy8wXX<1_>IYQUC|;Lm&F$BtQaK0mR4vg{^@WMGN$ia#SR64|Ha! zHSNHCC^`iH{g3ZI_|MGGU5fvq$9vP`aRRblIrr_eJ?*c1+hOqa?)eG=$X*D|v`x0n zv`@A}zyJlVQXE;6VFo-2kmC|SPKBL1)X?5fSJ{R%*`b7|2JW{sc;r%i$gTME@FCQ* z!mV|Atq{nkc#xp@&TzfeBW6(z>-Jg3MwckWr`d-IB_89vn%vkS^; zHLqO{FfTwD)~DGR4C=43FO7yt5r!w5%@t%-%_)UKHY-6KAzBp@aui1i#X*Y%sKGI8FGKZ+187lwl8>E2!dbZH zUu6i4tVOrh+28;uyH{yivMvH-`#x#i&)7HES9l&Qddi+D(KJ0kmLYD@MO&zXTD7EQ znMzq)^J%qfs=KMPILn3tD2Vo~Fb_{Pm9g;6CkoLHqBbr2|0i!p;T8TYM0;1bMX%Wg)4qE9<;5AHVK;L$tG8r%O4}#qU#5)1-JlSW?uvgka)q%Ul1>P z!OTUx5{oXa&5}_>>#+E;%c|h#;1`U=(;SVsB$oZF8Qx@;R*(dcaGpSm6vH@wksbG2Kjq*Ya9HF`QUApH~(}WKA*Hd0dbk&Bw)Fgc3A2dSr>cp#;iQ%cb%FrLLCn#cL#S=tFQA(h>342rmmpiBC<9OLj2 z{Wp$S$n0ISUm{l2`GTf@?ARB=gEX=;H(NZ@h>ag73SDrPpiTd=^SFN7*$$)n-<^{u z=(WbJ6dsiG*=BFEjc}M=1cNQkE?{~F@F^;`kWMQ_geR!LO)D2vecUCntyt1au8S4J zySpR!D?^`zoSQAGieV?I%r=vphjpeBt~rLmeMS0#Dl^mJ7!Y{e@Bx6{4v7TzknU96>ttLfp*^wF)(__p8C6{-VA zs%_ihjkGS?cSMlPR1V?ni<*5oVX=CxfMv%0{zuMe=Tsb8aVmMGoJ;LFKh+lwP2_lbldIU2p;%t@|t`-eXV_?b+G0>{F(dkKk6(o{!_g|Ht0vk zTw_jn!<#%AU~U|MCxw<92Zss&!GT^vKNLfhKFSl!hePSn2>2)gN5kAl$&of9d@}fW zh*)k4kf2W83^+zZ{LKhURh@Rqcc(`E{JN`!Xx9Ba(X5B4bv+D;_j<%JW@Fd8+Qt}m zeTZT{Wq6|b)J8EH!xmwc%{Iu{LY zZdTRI!-u~~aVAHgyG-@cd9PcbmK(Ye14 VOy_2tvs=-8~AqGOMO=m!$P%^vxNQxr% z$U!p~4|wosiirsa6XOL*;9xY~G^-~QFCGj(CKBSo**2{l{3i3|d*8fy`zHIoG%YkL z?_}8p7~Z}b7@l@NR_dVbaeV4N2QVyJKEptO14)1!1%RBGA8jtX2f4_Xqk%dc7;QUv zQr5_YRU~512>cIA%eED{QUNJgMk@BJLpJ0t*s3iA?{Wzb?S|i1lUyH!we%@p4>kIR zAG}h_zG2F#R3@9&^$gBtQ>g?bLFVd^Q8x)OCf*8JLYEos6~J$f7h+q05eb*hDYA`- z3X>1n-hIiheztia4ACFLx3C^J1m>22+=jiXJ;+P5&h{n^`SJqtqX4R17Um`6a5ck) z*jlNpJaS$(BJ_hZ5^bzxP=qJ2&vGk($ymGDRadh9{U#4`b7=PmPbK$=&B5q0uAi z09{v)^D2qcpX$RNa)G^o^e{3V1nZNz2_5Sy3#}Fpp!CB{F>?oJ#!^G+jG>d;EUS)f zJ8N4;rXnzBQ{$OVgy^2{0lY@%{N41czoE^VQqwk>(~VN^ FactCheckResponse: """ - Check facts using multiple fact-checking sources + Check facts using multiple fact-checking sources and fallback to web search """ all_results = [] verified_results = [] @@ -78,12 +79,55 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: contexts_used.append(review["textualRating"]) except HTTPException: - raise + continue except Exception as e: # Log the error but continue with other sources print(f"Error processing {source.domain}: {str(e)}") continue + # If no results found, try searching websites + if not all_results: + try: + # Create search request + search_request = SearchRequest( + search_text=request.content, + source_types=["fact_checkers"] + ) + + # Perform website search + search_response = await search_websites(search_request) + + # If AI fact check results are available, use them + if search_response.ai_fact_check_result: + # Create a claim from AI fact check result + ai_claim = { + "text": request.content, + "claimant": "AI Analysis", + "claimDate": datetime.now().isoformat(), + "claimReview": [{ + "publisher": { + "name": "AI Fact Checker", + "site": "ai-fact-check" + }, + "textualRating": search_response.ai_fact_check_result.verification_result["verdict"], + "title": "AI Fact Check Analysis", + "reviewDate": datetime.now().isoformat(), + "url": "" + }] + } + + validated_claim = Claim(**ai_claim).dict() + all_results.append(validated_claim) + + # Add sources and contexts + all_sources_list.extend(search_response.results.keys()) + if search_response.ai_fact_check_result.verification_result["evidence"]: + contexts_used.extend(search_response.ai_fact_check_result.verification_result["evidence"]) + + except Exception as e: + print(f"Error during website search: {str(e)}") + + # If still no results found after searching websites if not all_results: raise HTTPException( status_code=404, @@ -99,7 +143,7 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: "verdict": "Insufficient Information", # Default verdict "confidence": "Low", "evidence": contexts_used, - "reasoning": "Based on available fact checks", + "reasoning": "Based on available fact checks and web search results", "missing_info": "Additional verification may be needed" } @@ -117,7 +161,6 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: results=all_results, verification_result=verification_result, sources=list(set(all_sources_list)), - context_used=contexts_used, token_usage=token_usage, summary={ "total_sources": len(set(all_sources_list)), diff --git a/app/api/scrap_websites.py b/app/api/scrap_websites.py index 8a1f48f..93fb31a 100644 --- a/app/api/scrap_websites.py +++ b/app/api/scrap_websites.py @@ -1,18 +1,41 @@ from fastapi import APIRouter, HTTPException from pydantic import BaseModel -from typing import List, Dict -import requests -from bs4 import BeautifulSoup +from typing import List, Dict, Optional +from urllib.parse import urlencode, urlparse import urllib.parse import numpy as np +from time import sleep +import logging +import requests +from bs4 import BeautifulSoup +import re from app.services.openai_client import OpenAIClient from app.config import OPENAI_API_KEY +from app.websites.fact_checker_website import SOURCES, get_all_sources +from app.api.ai_fact_check import ai_fact_check +from app.models.fact_check_models import ( + AIFactCheckRequest, + AIFactCheckResponse, + VerificationResult, + TokenUsage +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) scrap_websites_router = APIRouter() +# Configuration for scraping +MAX_RETRIES = 2 +RETRY_DELAY = 2 + class SearchRequest(BaseModel): search_text: str - site_domains: List[str] + source_types: List[str] = ["fact_checkers"] class UrlSimilarityInfo(BaseModel): url: str @@ -22,78 +45,121 @@ class UrlSimilarityInfo(BaseModel): class SearchResponse(BaseModel): results: Dict[str, List[str]] error_messages: Dict[str, str] - url_similarities: Dict[str, List[UrlSimilarityInfo]] + ai_fact_check_result: Optional[Dict] = None def extract_url_text(url: str) -> str: """Extract and process meaningful text from URL path with improved cleaning""" + logger.debug(f"Extracting text from URL: {url}") try: - # Parse the URL and get the path parsed = urllib.parse.urlparse(url) path = parsed.path - - # Remove common URL parts and file extensions path = path.replace('.html', '').replace('/index', '').replace('.php', '') - - # Split path into segments segments = [seg for seg in path.split('/') if seg] - - # Remove dates and numbers cleaned_segments = [] for segment in segments: - # Replace hyphens and underscores with spaces segment = segment.replace('-', ' ').replace('_', ' ') - - # Filter out segments that are just dates or numbers if not (segment.replace(' ', '').isdigit() or all(part.isdigit() for part in segment.split() if part)): cleaned_segments.append(segment) - # Remove very common words that don't add meaning common_words = { 'www', 'live', 'news', 'intl', 'index', 'world', 'us', 'uk', 'updates', 'update', 'latest', 'breaking', 'new', 'article' } - # Join segments and split into words text = ' '.join(cleaned_segments) words = [word.lower() for word in text.split() if word.lower() not in common_words and len(word) > 1] - return ' '.join(words) - except Exception: + result = ' '.join(words) + logger.debug(f"Extracted text: {result}") + return result + except Exception as e: + logger.error(f"Error extracting text from URL {url}: {str(e)}") return '' -def google_search_scraper(search_text: str, site_domain: str) -> List[str]: - query = f"{search_text} \"site:{site_domain}\"" - encoded_query = urllib.parse.quote(query) - base_url = "https://www.google.com/search" - url = f"{base_url}?q={encoded_query}" +def extract_search_results(html_content): + """Extract URLs using multiple selectors and patterns""" + soup = BeautifulSoup(html_content, 'html.parser') + urls = set() # Using set to avoid duplicates + + # Multiple CSS selectors to try + selectors = [ + 'div.g div.yuRUbf > a', # Main result links + 'div.g a.l', # Alternative link format + 'div.rc a', # Another possible format + 'div[class*="g"] > div > div > div > a', # Broader match + 'a[href^="http"]' # Any http link + ] + + for selector in selectors: + try: + elements = soup.select(selector) + for element in elements: + url = element.get('href') + if url and url.startswith('http') and not url.startswith('https://www.google.com'): + urls.add(url) + except Exception as e: + logger.debug(f"Error with selector {selector}: {str(e)}") + + # Also try finding URLs in the raw HTML using regex + url_pattern = r'href="(https?://[^"]+)"' + raw_urls = re.findall(url_pattern, html_content) + for url in raw_urls: + if not url.startswith('https://www.google.com'): + urls.add(url) + + return list(urls) + +def google_search_scraper(search_text: str, site_domain: str, retry_count: int = 0) -> List[str]: + """Scrape Google search results with multiple query formats""" + logger.info(f"Searching for '{search_text}' on domain: {site_domain}") headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Referer': 'https://www.google.com/', + 'DNT': '1' } - try: - response = requests.get(url, headers=headers) - response.raise_for_status() - - soup = BeautifulSoup(response.content, 'html.parser') - search_results = soup.find_all('div', class_='g') - - urls = [] - for result in search_results[:5]: - link = result.find('a') - if link and 'href' in link.attrs: - url = link['href'] - if url.startswith('http'): - urls.append(url) - - return urls[:5] - - except requests.RequestException as e: - raise HTTPException(status_code=500, detail=f"Error scraping {site_domain}: {str(e)}") + # Try different query formats + query_formats = [ + f"{search_text} site:{site_domain}", + f"site:{site_domain} {search_text}", + f"\"{search_text}\" site:{site_domain}" + ] + + all_urls = set() + + for query in query_formats: + try: + google_url = f"https://www.google.com/search?q={urlencode({'q': query})}" + logger.debug(f"Trying query format: {query}") + + response = requests.get(google_url, headers=headers) + + if response.status_code == 200: + urls = extract_search_results(response.text) + domain_urls = [url for url in urls if site_domain in urlparse(url).netloc] + all_urls.update(domain_urls) + else: + logger.warning(f"Received status code {response.status_code} for query format: {query}") + + sleep(2) # Delay between requests + + except Exception as e: + logger.error(f"Error processing query format '{query}': {str(e)}") + if retry_count < MAX_RETRIES: + sleep(RETRY_DELAY) + return google_search_scraper(search_text, site_domain, retry_count + 1) + + valid_urls = list(all_urls) + logger.info(f"Found {len(valid_urls)} unique URLs for domain: {site_domain}") + return valid_urls[:5] # Return up to 5 URLs def calculate_similarity(query_embedding: List[float], url_embedding: List[float]) -> float: + """Calculate cosine similarity between two embeddings""" query_array = np.array(query_embedding) url_array = np.array(url_embedding) @@ -102,59 +168,142 @@ def calculate_similarity(query_embedding: List[float], url_embedding: List[float ) return float(similarity) + @scrap_websites_router.post("/search", response_model=SearchResponse) async def search_websites(request: SearchRequest): + logger.info(f"Starting search with query: {request.search_text}") + logger.info(f"Source types requested: {request.source_types}") + results = {} error_messages = {} - url_similarities = {} # Initialize OpenAI client + logger.debug("Initializing OpenAI client") openai_client = OpenAIClient(OPENAI_API_KEY) + # Get domains based on requested source types + domains = [] + for source_type in request.source_types: + if source_type in SOURCES: + domains.extend([source.domain for source in SOURCES[source_type]]) + + if not domains: + logger.warning("No valid source types provided. Using all available domains.") + domains = [source.domain for source in get_all_sources()] + + logger.info(f"Processing {len(domains)} domains") + # Enhance search text with key terms search_context = request.search_text + logger.debug("Getting query embedding from OpenAI") query_embedding = openai_client.get_embeddings([search_context])[0] # Higher similarity threshold for better filtering SIMILARITY_THRESHOLD = 0.75 + MAX_URLS_PER_DOMAIN = 2 # Adjusted to ensure total stays under 5 + TOTAL_MAX_URLS = 5 # Maximum URLs allowed for AIFactCheckRequest - for domain in request.site_domains: + total_urls_collected = 0 + for domain in domains[:3]: # Limit to 3 domains for testing + if total_urls_collected >= TOTAL_MAX_URLS: + break + + logger.info(f"Processing domain: {domain}") try: urls = google_search_scraper(request.search_text, domain) - url_sims = [] valid_urls = [] + logger.debug(f"Found {len(urls)} URLs for domain {domain}") + for url in urls: + if len(valid_urls) >= MAX_URLS_PER_DOMAIN or total_urls_collected >= TOTAL_MAX_URLS: + break + url_text = extract_url_text(url) - # Skip URLs with no meaningful text extracted if not url_text: + logger.debug(f"No meaningful text extracted from URL: {url}") continue - + + logger.debug("Getting URL embedding from OpenAI") url_embedding = openai_client.get_embeddings([url_text])[0] similarity = calculate_similarity(query_embedding, url_embedding) - url_sims.append(UrlSimilarityInfo( - url=url, - similarity=similarity, - extracted_text=url_text - )) + logger.debug(f"Similarity score for {url}: {similarity}") if similarity >= SIMILARITY_THRESHOLD: valid_urls.append(url) + total_urls_collected += 1 results[domain] = valid_urls - url_similarities[domain] = sorted(url_sims, - key=lambda x: x.similarity, - reverse=True) + logger.info(f"Successfully processed domain {domain}. Found {len(valid_urls)} valid URLs") except HTTPException as e: + logger.error(f"HTTP Exception for domain {domain}: {str(e.detail)}") error_messages[domain] = str(e.detail) except Exception as e: + logger.error(f"Unexpected error for domain {domain}: {str(e)}") error_messages[domain] = f"Unexpected error for {domain}: {str(e)}" + + sleep(1) # Add delay between processing different domains + logger.info("Search completed") + logger.debug(f"Results found for {len(results)} domains") + logger.debug(f"Errors encountered for {len(error_messages)} domains") + + # Collect all valid URLs from results + all_valid_urls = [] + for domain_urls in results.values(): + all_valid_urls.extend(domain_urls) + + logger.info(f"Total valid URLs collected: {len(all_valid_urls)}") + + # Create request body for AI fact check + if all_valid_urls: + fact_check_request = AIFactCheckRequest( + content=request.search_text, + urls=all_valid_urls[:TOTAL_MAX_URLS] # Ensure we don't exceed the limit + ) + + logger.info("Calling AI fact check service") + try: + ai_response = await ai_fact_check(fact_check_request) + logger.info("AI fact check completed successfully") + + # Format AI fact check response + formatted_response = { + "query": ai_response.query, + "token_usage": { + "prompt_tokens": ai_response.token_usage.prompt_tokens, + "completion_tokens": ai_response.token_usage.completion_tokens, + "total_tokens": ai_response.token_usage.total_tokens + }, + "sources": ai_response.sources, + "verification_result": { + url: { + "verdict": result.verdict, + "confidence": result.confidence, + "evidence": result.evidence, + "reasoning": result.reasoning, + "missing_info": result.missing_info + } for url, result in ai_response.verification_result.items() + } + } + + # Return response with AI fact check results + return SearchResponse( + results=results, + error_messages=error_messages, + ai_fact_check_result=formatted_response + ) + + except Exception as e: + logger.error(f"Error during AI fact check: {str(e)}") + error_messages["ai_fact_check"] = f"Error during fact checking: {str(e)}" + + # Return response without AI fact check if no valid URLs or error occurred return SearchResponse( results=results, error_messages=error_messages, - url_similarities=url_similarities + ai_fact_check_result=None ) \ No newline at end of file diff --git a/app/api/scrap_websites2.py b/app/api/scrap_websites2.py new file mode 100644 index 0000000..17542c6 --- /dev/null +++ b/app/api/scrap_websites2.py @@ -0,0 +1,261 @@ +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel +from typing import List, Dict, Optional +import requests +from bs4 import BeautifulSoup +import urllib.parse +import numpy as np +from time import sleep +import logging +from app.services.openai_client import OpenAIClient +from app.config import OPENAI_API_KEY +from app.websites.fact_checker_website import SOURCES, get_all_sources +from app.api.ai_fact_check import ai_fact_check +from app.models.fact_check_models import AIFactCheckRequest, AIFactCheckResponse + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +scrap_websites_router = APIRouter() + +# Configuration for rate limiting +RATE_LIMIT_DELAY = 2 # Delay between requests in seconds +MAX_RETRIES = 1 # Maximum number of retries per domain +RETRY_DELAY = 1 # Delay between retries in seconds + +class SearchRequest(BaseModel): + search_text: str + source_types: List[str] = ["fact_checkers"] + +class UrlSimilarityInfo(BaseModel): + url: str + similarity: float + extracted_text: str + +class SearchResponse(BaseModel): + results: Dict[str, List[str]] + error_messages: Dict[str, str] + ai_fact_check_result: Optional[AIFactCheckResponse] = None + +def extract_url_text(url: str) -> str: + """Extract and process meaningful text from URL path with improved cleaning""" + logger.debug(f"Extracting text from URL: {url}") + try: + parsed = urllib.parse.urlparse(url) + path = parsed.path + path = path.replace('.html', '').replace('/index', '').replace('.php', '') + segments = [seg for seg in path.split('/') if seg] + cleaned_segments = [] + for segment in segments: + segment = segment.replace('-', ' ').replace('_', ' ') + if not (segment.replace(' ', '').isdigit() or + all(part.isdigit() for part in segment.split() if part)): + cleaned_segments.append(segment) + + common_words = { + 'www', 'live', 'news', 'intl', 'index', 'world', 'us', 'uk', + 'updates', 'update', 'latest', 'breaking', 'new', 'article' + } + + text = ' '.join(cleaned_segments) + words = [word.lower() for word in text.split() + if word.lower() not in common_words and len(word) > 1] + + result = ' '.join(words) + logger.debug(f"Extracted text: {result}") + return result + except Exception as e: + logger.error(f"Error extracting text from URL {url}: {str(e)}") + return '' + +def google_search_scraper(search_text: str, site_domain: str, retry_count: int = 0) -> List[str]: + """Scrape Google search results with retry logic and rate limiting""" + logger.info(f"Searching for '{search_text}' on domain: {site_domain} (Attempt {retry_count + 1}/{MAX_RETRIES})") + + if retry_count >= MAX_RETRIES: + logger.error(f"Max retries exceeded for domain: {site_domain}") + raise HTTPException( + status_code=429, + detail=f"Max retries exceeded for {site_domain}" + ) + + query = f"{search_text} \"site:{site_domain}\"" + encoded_query = urllib.parse.quote(query) + base_url = "https://www.google.com/search" + url = f"{base_url}?q={encoded_query}" + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + + try: + logger.debug(f"Waiting {RATE_LIMIT_DELAY} seconds before request") + sleep(RATE_LIMIT_DELAY) + + logger.debug(f"Making request to Google Search for domain: {site_domain}") + response = requests.get(url, headers=headers) + + if response.status_code == 429 or "sorry/index" in response.url: + logger.warning(f"Rate limit hit for domain {site_domain}. Retrying after delay...") + sleep(RETRY_DELAY) + return google_search_scraper(search_text, site_domain, retry_count + 1) + + response.raise_for_status() + + soup = BeautifulSoup(response.content, 'html.parser') + search_results = soup.find_all('div', class_='g') + + urls = [] + for result in search_results[:3]: + link = result.find('a') + if link and 'href' in link.attrs: + url = link['href'] + if url.startswith('http'): + urls.append(url) + + logger.info(f"Found {len(urls)} results for domain: {site_domain}") + return urls[:5] + + except requests.RequestException as e: + if retry_count < MAX_RETRIES: + logger.warning(f"Request failed for {site_domain}. Retrying... Error: {str(e)}") + sleep(RETRY_DELAY) + return google_search_scraper(search_text, site_domain, retry_count + 1) + logger.error(f"All retries failed for domain {site_domain}. Error: {str(e)}") + raise HTTPException( + status_code=500, + detail=f"Error scraping {site_domain}: {str(e)}" + ) + +def calculate_similarity(query_embedding: List[float], url_embedding: List[float]) -> float: + """Calculate cosine similarity between two embeddings""" + query_array = np.array(query_embedding) + url_array = np.array(url_embedding) + + similarity = np.dot(url_array, query_array) / ( + np.linalg.norm(url_array) * np.linalg.norm(query_array) + ) + return float(similarity) + +@scrap_websites_router.post("/search", response_model=SearchResponse) +async def search_websites(request: SearchRequest): + logger.info(f"Starting search with query: {request.search_text}") + logger.info(f"Source types requested: {request.source_types}") + + results = {} + error_messages = {} + url_similarities = {} + + # Initialize OpenAI client + logger.debug("Initializing OpenAI client") + openai_client = OpenAIClient(OPENAI_API_KEY) + + # Get domains based on requested source types + domains = [] + for source_type in request.source_types: + if source_type in SOURCES: + domains.extend([source.domain for source in SOURCES[source_type]]) + + if not domains: + logger.warning("No valid source types provided. Using all available domains.") + domains = [source.domain for source in get_all_sources()] + + logger.info(f"Processing {len(domains)} domains") + + # Enhance search text with key terms + search_context = request.search_text + logger.debug("Getting query embedding from OpenAI") + query_embedding = openai_client.get_embeddings([search_context])[0] + + # Higher similarity threshold for better filtering + SIMILARITY_THRESHOLD = 0.75 + + for domain in domains: + logger.info(f"Processing domain: {domain}") + try: + urls = google_search_scraper(request.search_text, domain) + url_sims = [] + valid_urls = [] + + logger.debug(f"Found {len(urls)} URLs for domain {domain}") + + for url in urls: + url_text = extract_url_text(url) + + if not url_text: + logger.debug(f"No meaningful text extracted from URL: {url}") + continue + + logger.debug("Getting URL embedding from OpenAI") + url_embedding = openai_client.get_embeddings([url_text])[0] + similarity = calculate_similarity(query_embedding, url_embedding) + + logger.debug(f"Similarity score for {url}: {similarity}") + + url_sims.append(UrlSimilarityInfo( + url=url, + similarity=similarity, + extracted_text=url_text + )) + + if similarity >= SIMILARITY_THRESHOLD: + valid_urls.append(url) + + results[domain] = valid_urls + url_similarities[domain] = sorted(url_sims, + key=lambda x: x.similarity, + reverse=True) + + logger.info(f"Successfully processed domain {domain}. Found {len(valid_urls)} valid URLs") + + except HTTPException as e: + logger.error(f"HTTP Exception for domain {domain}: {str(e.detail)}") + error_messages[domain] = str(e.detail) + except Exception as e: + logger.error(f"Unexpected error for domain {domain}: {str(e)}") + error_messages[domain] = f"Unexpected error for {domain}: {str(e)}" + + logger.info("Search completed") + logger.debug(f"Results found for {len(results)} domains") + logger.debug(f"Errors encountered for {len(error_messages)} domains") + + # Collect all valid URLs from results + all_valid_urls = [] + for domain_urls in results.values(): + all_valid_urls.extend(domain_urls) + + logger.info(f"Total valid URLs collected: {len(all_valid_urls)}") + + # Create request body for AI fact check + if all_valid_urls: + fact_check_request = AIFactCheckRequest( + content=request.search_text, + urls=all_valid_urls + ) + + logger.info("Calling AI fact check service") + try: + ai_response = await ai_fact_check(fact_check_request) + logger.info("AI fact check completed successfully") + + # Return response with AI fact check results + return SearchResponse( + results=results, + error_messages=error_messages, + ai_fact_check_result=ai_response + ) + + except Exception as e: + logger.error(f"Error during AI fact check: {str(e)}") + error_messages["ai_fact_check"] = f"Error during fact checking: {str(e)}" + + # Return response without AI fact check if no valid URLs or error occurred + return SearchResponse( + results=results, + error_messages=error_messages, + ai_fact_check_result=None + ) \ No newline at end of file diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc index 239346e1dd0ca70c102289a9908f973365f9b466..6df8e42d278ee3c78aa5452c872101d3e81faf2c 100644 GIT binary patch delta 582 zcmYL_!E4iC6u`f)jZKy<$;z^HZIU)wyJ_4|h7<8NJP6f+!<$kM!Z*6zMtYEn7ZKfN z6}!GeJp}aTK|#j646hypK_>H%i{MdjtIHp-_fnW2e82a-$9wR5U+3ocy8cbmREDRu zcCY!s-PLuQH(+<92pgITOZNDM3|p0jB+iJOkcA>fqO4HGXfq?mqU<{VpuOib&e&b0 ztp^75>_zx#H>@BVKErCbN!TLHBjC5@!ltue*D-m@;!*v{jW}-~N_MQ}yVdPGhmr-q z9A6t4im4uD)rDNbRLHxwI))Q;jz6V(x7>%bVp3S5fI_mE62&MZN|B68_NkKEb^>?r z*e&yXxQ(-6i}0M#fyK%dUmi+!sj@@p5mMb$9AKw%+jvDqLWH-q5C1A%*z%mh=`gN?QpEu;N?12TebjdqJ}=5r4nZ zi*(oL@o&Qpz5;0|I@E*x>1Cr&U4$=qTaUo3-kznV#kHLHSa`FeWS9#jJEOVHl5=I5W>5u<_WO J`@!<*z9(~9k=y_P delta 798 zcmZvaO=uHA6vuayO*Yvyo2*Gc(*5Wrp^d3RZ52wPg0@f^?I{PLMU;>kV^Nd3n--gc zP{fbgR#L}9grImvcJ?v9|C?{AwaemH zzh7WrOn;ust!P`KD6

%NrpJet}$&gA>?|oe?;IImC@abTF7lF62f$@*pn~s!rrX z9XSW`=U60Gxq0?o;4Mo_GR&N3Ihd5lefgZY-;piki}+sg`l5*u`5<2%DXD3E8!u(@ zMg8cTe#0o-(apl5k;P_`Gaf+=<1v*dR91M^dV#U#0QrwN#5gR%P*DVtFwDt zmvy46Y;GoDk8CK>kfn~-l=BVA8y4zL->}B{hEAEk&=ifEW$nK9%)=d+Nshwmsm zTmrTbxvNi&L2p7RO!V;!=jtL4Tq4~RE56R6OmRKCD8R|$$` zMuG_|n5es)s#q78SijYO($eD%o>RoekV2i%s1w>>Co1G1_TJqzu|i(QC)hXSM|^us tV);tSW+2!D36`&EXKeKy_%S-w8aGU34*6STsozB5wO+TXN;)xiJ& diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py index bec9977..0949e51 100644 --- a/app/models/fact_check_models.py +++ b/app/models/fact_check_models.py @@ -140,7 +140,6 @@ class BaseFactCheckResponse(BaseModel): query: str token_usage: TokenUsage sources: List[str] - context_used: List[str] model_config = ConfigDict(json_schema_extra={ "example": { @@ -151,7 +150,6 @@ class BaseFactCheckResponse(BaseModel): "total_tokens": 150 }, "sources": ["source1.com", "source2.com"], - "context_used": ["Relevant context from sources"] } }) @@ -183,7 +181,6 @@ class GoogleFactCheckResponse(BaseFactCheckResponse): "reasoning": "Detailed analysis" }, "sources": ["factchecker.com"], - "context_used": ["Relevant context"], "token_usage": { "prompt_tokens": 100, "completion_tokens": 50, @@ -219,10 +216,6 @@ class AIFactCheckResponse(BaseFactCheckResponse): } }, "sources": ["source1.com", "source2.com"], - "context_used": [ - "Context from source 1", - "Context from source 2" - ], "token_usage": { "prompt_tokens": 200, "completion_tokens": 100, diff --git a/app/services/openai_client.py b/app/services/openai_client.py index 22541bb..07b6ae3 100644 --- a/app/services/openai_client.py +++ b/app/services/openai_client.py @@ -142,7 +142,6 @@ class AIFactChecker: return { "verification_result": response["response"], # This is now a dictionary "sources": sources, - "context_used": [doc.page_content for doc in relevant_docs], "token_usage": { "prompt_tokens": response["prompt_tokens"], "completion_tokens": response["completion_tokens"], diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc index 5fe153f4fe7eb6088bf019108f8b358e5f779ac9..095e86372ccf3e3756b303b4997437a8fc325911 100644 GIT binary patch delta 135 zcmeB`x*)-KnwOW00SNLlV$)N(Hu6nj6lShsPfAMGOU}=&;!4aZ$xAIS-dxD!#>S|& zc?X9p6Jyxqe_V--fs+%s?=$Y4?7$<*#v;!3jg@_JJdZRhyBOOCk;(NusvN?y+)NGZ lAM7Tt<#Ck`W&%q5{3IYO%vQ_|v~IDCqaDjZRv9N|HUJ%_BnAKg delta 367 zcmca0(JRGwnwOW00SL}nN2Xur-pDtFv0k8xH!U%_Bsn8BIa@EkD7}iSI4{2-wOB7X zKevjnAU`LwBom|n%;zpjEiFkcf=IF_B_)GJxe^QVQp+KNJf+2nCHX0dl@RrLl_iccV%Ksp6tYx$QUgW`RL)XNO1A9w`CxxTTAh_OB35&Fo?z$r1KY=zf# z9p{TW&LdG58STOxlBNrpP#gZh1rU^fyOVE OakOJO$SUK+%mx6KPJU_t diff --git a/app/websites/fact_checker_website.py b/app/websites/fact_checker_website.py index d6fae44..571b333 100644 --- a/app/websites/fact_checker_website.py +++ b/app/websites/fact_checker_website.py @@ -8,21 +8,8 @@ SOURCES = { "fact_checkers": [ FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) for domain in [ - "factcheck.org", - "snopes.com", - "politifact.com", - "reuters.com", "bbc.com", - "apnews.com", - "usatoday.com", - "nytimes.com", - "washingtonpost.com", - "afp.com", - "fullfact.org", - "truthorfiction.com", - "leadstories.com", "altnews.in", - "boomlive.in", "en.prothomalo.com" ] ], diff --git a/output.json b/output.json new file mode 100644 index 0000000..8360403 --- /dev/null +++ b/output.json @@ -0,0 +1,595 @@ +{ + "kind": "customsearch#search", + "url": { + "type": "application/json", + "template": "https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={safe?}&cx={cx?}&sort={sort?}&filter={filter?}&gl={gl?}&cr={cr?}&googlehost={googleHost?}&c2coff={disableCnTwTranslation?}&hq={hq?}&hl={hl?}&siteSearch={siteSearch?}&siteSearchFilter={siteSearchFilter?}&exactTerms={exactTerms?}&excludeTerms={excludeTerms?}&linkSite={linkSite?}&orTerms={orTerms?}&dateRestrict={dateRestrict?}&lowRange={lowRange?}&highRange={highRange?}&searchType={searchType}&fileType={fileType?}&rights={rights?}&imgSize={imgSize?}&imgType={imgType?}&imgColorType={imgColorType?}&imgDominantColor={imgDominantColor?}&alt=json" + }, + "queries": { + "request": [ + { + "title": "Google Custom Search - Sheikh Hasina resigned as a Prime Minister of Bangladesh", + "totalResults": "758000", + "searchTerms": "Sheikh Hasina resigned as a Prime Minister of Bangladesh", + "count": 10, + "startIndex": 1, + "inputEncoding": "utf8", + "outputEncoding": "utf8", + "safe": "off", + "cx": "d437f1eb581de4590" + } + ], + "nextPage": [ + { + "title": "Google Custom Search - Sheikh Hasina resigned as a Prime Minister of Bangladesh", + "totalResults": "758000", + "searchTerms": "Sheikh Hasina resigned as a Prime Minister of Bangladesh", + "count": 10, + "startIndex": 11, + "inputEncoding": "utf8", + "outputEncoding": "utf8", + "safe": "off", + "cx": "d437f1eb581de4590" + } + ] + }, + "context": { + "title": "Prothom Alo" + }, + "searchInformation": { + "searchTime": 0.513164, + "formattedSearchTime": "0.51", + "totalResults": "758000", + "formattedTotalResults": "758,000" + }, + "items": [ + { + "kind": "customsearch#result", + "title": "Sheikh Hasina: Euphoria in Bangladesh after PM flees country", + "htmlTitle": "\u003cb\u003eSheikh Hasina\u003c/b\u003e: Euphoria in \u003cb\u003eBangladesh\u003c/b\u003e after PM flees country", + "link": "https://www.bbc.com/news/articles/clywww69p2vo", + "displayLink": "www.bbc.com", + "snippet": "Aug 5, 2024 ... Bangladeshi Prime Minister Sheikh Hasina has resigned after weeks of deadly anti-government protests, putting an end to her more than two decades dominating ...", + "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e \u003cb\u003eBangladeshi Prime Minister Sheikh Hasina\u003c/b\u003e has \u003cb\u003eresigned\u003c/b\u003e after weeks of deadly anti-government protests, putting an end to her more than two decades dominating ...", + "formattedUrl": "https://www.bbc.com/news/articles/clywww69p2vo", + "htmlFormattedUrl": "https://www.bbc.com/news/articles/clywww69p2vo", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ2noEFH2T-yJo4oB7DU_MF2FqAUzIHU5paMXHka1ny_vMi037f2gtOZ3of&s", + "width": "300", + "height": "168" + } + ], + "metatags": [ + { + "msapplication-tilecolor": "#da532c", + "og:image": "https://ichef.bbci.co.uk/news/1024/branded_news/db85/live/388ebc30-5367-11ef-aebc-6de4d31bf5cd.jpg", + "apple-itunes-app": "app-id=364147881, app-argument=https://www.bbc.com/news/articles/clywww69p2vo", + "twitter:title": "Sheikh Hasina: Euphoria in Bangladesh after PM flees country", + "twitter:card": "summary_large_image", + "og:image:alt": "Protesters storming Prime Minister Sheikh Hasina's palace after she fled the country", + "theme-color": "#ffffff", + "al:ios:app_name": "BBC: World News & Stories", + "og:title": "Sheikh Hasina: Euphoria in Bangladesh after PM flees country", + "al:android:package": "bbc.mobile.news.ww", + "al:ios:url": "bbcx://news/articles/clywww69p2vo", + "al:web:url": "https://bbc.com/news/articles/clywww69p2vo", + "og:description": "President Mohammed Shahabuddin ordered the release of a jailed former prime minister.", + "version": "2.12.0+20", + "al:ios:app_store_id": "364147881", + "twitter:image:src": "https://ichef.bbci.co.uk/news/1024/branded_news/db85/live/388ebc30-5367-11ef-aebc-6de4d31bf5cd.jpg", + "al:android:url": "bbcx://news/articles/clywww69p2vo", + "next-head-count": "36", + "twitter:image:alt": "Protesters storming Prime Minister Sheikh Hasina's palace after she fled the country", + "viewport": "width=device-width", + "twitter:description": "President Mohammed Shahabuddin ordered the release of a jailed former prime minister.", + "al:android:app_name": "BBC: World News & Stories" + } + ], + "cse_image": [ + { + "src": "https://ichef.bbci.co.uk/news/1024/branded_news/db85/live/388ebc30-5367-11ef-aebc-6de4d31bf5cd.jpg" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Bangladesh: Prime Minister Hasina Resigns amid Mass Protests ...", + "htmlTitle": "\u003cb\u003eBangladesh\u003c/b\u003e: \u003cb\u003ePrime Minister Hasina Resigns\u003c/b\u003e amid Mass Protests ...", + "link": "https://www.hrw.org/news/2024/08/06/bangladesh-prime-minister-hasina-resigns-amid-mass-protests", + "displayLink": "www.hrw.org", + "snippet": "Aug 6, 2024 ... (London) – Bangladesh Prime Minister Sheikh Hasina resigned on August 5, 2024, and fled the country after weeks of student protests, ...", + "htmlSnippet": "Aug 6, 2024 \u003cb\u003e...\u003c/b\u003e (London) – \u003cb\u003eBangladesh Prime Minister Sheikh Hasina resigned\u003c/b\u003e on August 5, 2024, and fled the country after weeks of student protests, ...", + "formattedUrl": "https://www.hrw.org/.../bangladesh-prime-minister-hasina-resigns-amid-ma...", + "htmlFormattedUrl": "https://www.hrw.org/.../\u003cb\u003ebangladesh\u003c/b\u003e-\u003cb\u003eprime\u003c/b\u003e-\u003cb\u003eminister\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e-resigns-amid-ma...", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT7Rd-kZwml7ax4Q_93QFbon2bmbwYliEYvMil6qgM0xEG6tV72lS_iclM&s", + "width": "310", + "height": "163" + } + ], + "metatags": [ + { + "og:image": "https://www.hrw.org/sites/default/files/styles/opengraph/public/media_2024/08/202408asia_bangladesh_Sheikh%20Hasina.jpg?h=888143e8&itok=IKUTUc3F", + "og:image:alt": "Bangladesh’s former Prime Minister Sheikh Hasina addresses the media in Mirpur after the anti-quota protests.", + "article:published_time": "2024-08-06T14:00:00-0400", + "twitter:card": "summary_large_image", + "twitter:title": "Bangladesh: Prime Minister Hasina Resigns amid Mass Protests", + "og:site_name": "Human Rights Watch", + "twitter:site:id": "14700316", + "handheldfriendly": "true", + "og:title": "Bangladesh: Prime Minister Hasina Resigns amid Mass Protests", + "google": "H_DzcJuJMJKVAO6atlPsK4HHr2WienspT6e74P5fVFY", + "og:updated_time": "2024-08-08T10:24:02-0400", + "og:description": "Bangladesh Prime Minister Sheikh Hasina resigned on August 5, 2024, and fled the country after weeks of student protests.", + "og:image:secure_url": "https://www.hrw.org/sites/default/files/styles/opengraph/public/media_2024/08/202408asia_bangladesh_Sheikh%20Hasina.jpg?h=888143e8&itok=IKUTUc3F", + "article:publisher": "https://www.facebook.com/HumanRightsWatch", + "twitter:image": "https://www.hrw.org/sites/default/files/styles/opengraph/public/media_2024/08/202408asia_bangladesh_Sheikh%20Hasina.jpg?h=888143e8&itok=IKUTUc3F", + "twitter:image:alt": "Bangladesh’s former Prime Minister Sheikh Hasina addresses the media in Mirpur after the anti-quota protests.", + "twitter:site": "@hrw", + "article:modified_time": "2024-08-08T10:24:02-0400", + "viewport": "width=device-width, initial-scale=1.0", + "twitter:description": "Bangladesh Prime Minister Sheikh Hasina resigned on August 5, 2024, and fled the country after weeks of student protests.", + "mobileoptimized": "width", + "og:url": "https://www.hrw.org/news/2024/08/06/bangladesh-prime-minister-hasina-resigns-amid-mass-protests" + } + ], + "cse_image": [ + { + "src": "https://www.hrw.org/sites/default/files/styles/opengraph/public/media_2024/08/202408asia_bangladesh_Sheikh%20Hasina.jpg?h=888143e8&itok=IKUTUc3F" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Bangladesh wakes up to new uncertain future after PM Sheikh ...", + "htmlTitle": "\u003cb\u003eBangladesh\u003c/b\u003e wakes up to new uncertain future after PM \u003cb\u003eSheikh\u003c/b\u003e ...", + "link": "https://www.bbc.com/news/live/ckdgg87lnkdt", + "displayLink": "www.bbc.com", + "snippet": "Aug 5, 2024 ... Yesterday's historic events saw Bangladesh's Prime Minister Sheikh Hasina resign from power and flee the country. Today, government ...", + "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e Yesterday's historic events saw \u003cb\u003eBangladesh's Prime Minister Sheikh Hasina resign\u003c/b\u003e from power and flee the country. Today, government ...", + "formattedUrl": "https://www.bbc.com/news/live/ckdgg87lnkdt", + "htmlFormattedUrl": "https://www.bbc.com/news/live/ckdgg87lnkdt", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ9V5V2pFKUOVvlosPa5swslIzMQnDiFW21RkSxNXvXxhrcyvRNZMc2bqXE&s", + "width": "300", + "height": "168" + } + ], + "metatags": [ + { + "og:image": "https://static.files.bbci.co.uk/ws/simorgh-assets/public/news/images/metadata/poster-1024x576.png", + "theme-color": "#FFFFFF", + "og:type": "article", + "twitter:title": "Bangladesh wakes up to new uncertain future after PM Sheikh Hasina's dramatic resignation", + "og:site_name": "BBC News", + "twitter:url": "https://www.bbc.com/news/live/ckdgg87lnkdt", + "og:title": "Bangladesh wakes up to new uncertain future after PM Sheikh Hasina's dramatic resignation", + "msapplication-tileimage": "https://static.files.bbci.co.uk/core/website/assets/static/icons/windows-phone/news/windows-phone-icon-270x270.23502b4459eb7a6ab2ab.png", + "og:description": "Looting and disorder have been reported in the South Asian nation, a day after mass protests forced Ms Hasina to flee and resign.", + "fb:pages": "1143803202301544,317278538359186,1392506827668140,742734325867560,185246968166196,156060587793370,137920769558355,193435954068976,21263239760,156400551056385,929399697073756,154344434967,228735667216,80758950658,260212261199,294662213128,1086451581439054,283348121682053,295830058648,239931389545417,304314573046,310719525611571,647687225371774,1159932557403143,286567251709437,1731770190373618,125309456546,163571453661989,285361880228,512423982152360,238003846549831,176663550714,260967092113,118450564909230,100978706649892,15286229625,122103087870579,120655094632228,102814153147070,124715648647,153132638110668,150467675018739", + "twitter:creator": "@BBCWorld", + "article:author": "https://www.facebook.com/bbcnews", + "twitter:image": "https://static.files.bbci.co.uk/ws/simorgh-assets/public/news/images/metadata/poster-1024x576.png", + "fb:app_id": "1609039196070050", + "twitter:site": "@BBCWorld", + "viewport": "width=device-width, initial-scale=1", + "twitter:description": "Looting and disorder have been reported in the South Asian nation, a day after mass protests forced Ms Hasina to flee and resign.", + "og:locale": "en_GB", + "og:image_alt": "BBC News", + "fb:admins": "100004154058350", + "og:url": "https://www.bbc.com/news/live/ckdgg87lnkdt", + "format-detection": "telephone=no" + } + ], + "cse_image": [ + { + "src": "https://static.files.bbci.co.uk/ws/simorgh-assets/public/news/images/metadata/poster-1024x576.png" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Bangladesh protests: PM Sheikh Hasina flees to India as ...", + "htmlTitle": "\u003cb\u003eBangladesh\u003c/b\u003e protests: PM \u003cb\u003eSheikh Hasina\u003c/b\u003e flees to India as ...", + "link": "https://www.cnn.com/2024/08/05/asia/bangladesh-prime-minister-residence-stormed-intl/index.html", + "displayLink": "www.cnn.com", + "snippet": "Aug 6, 2024 ... The prime minister of Bangladesh, Sheikh Hasina, resigned and fled to neighboring India on Monday after protesters stormed her official ...", + "htmlSnippet": "Aug 6, 2024 \u003cb\u003e...\u003c/b\u003e The \u003cb\u003eprime minister of Bangladesh\u003c/b\u003e, \u003cb\u003eSheikh Hasina\u003c/b\u003e, \u003cb\u003eresigned\u003c/b\u003e and fled to neighboring India on Monday after protesters stormed her official ...", + "formattedUrl": "https://www.cnn.com/2024/08/05/.../bangladesh-prime-minister.../index.ht...", + "htmlFormattedUrl": "https://www.cnn.com/2024/08/05/.../\u003cb\u003ebangladesh\u003c/b\u003e-\u003cb\u003eprime\u003c/b\u003e-\u003cb\u003eminister\u003c/b\u003e.../index.ht...", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcScyayfP1an0tjs821kLSqSGIsgUFwc02vkRXh6ERXuqeV7xOEt3sC__sM&s", + "width": "300", + "height": "168" + } + ], + "metatags": [ + { + "og:image": "https://media.cnn.com/api/v1/images/stellar/prod/ap24218390125876-2.jpg?c=16x9&q=w_800,c_fill", + "twitter:title": "Bangladesh prime minister flees to India as anti-government protesters storm her residence | CNN", + "og:type": "article", + "twitter:card": "summary_large_image", + "article:published_time": "2024-08-05T10:01:00.074Z", + "og:site_name": "CNN", + "author": "Isaac Yee, Tanbirul Miraj Ripon", + "og:title": "Bangladesh prime minister flees to India as anti-government protesters storm her residence | CNN", + "meta-section": "world", + "type": "article", + "og:description": "The prime minister of Bangladesh, Sheikh Hasina, resigned and fled to neighboring India on Monday after protesters stormed her official residence after weeks of deadly anti-government demonstrations in the South Asian nation.", + "twitter:image": "https://media.cnn.com/api/v1/images/stellar/prod/ap24218390125876-2.jpg?c=16x9&q=w_800,c_fill", + "article:publisher": "https://www.facebook.com/CNN", + "fb:app_id": "80401312489", + "twitter:site": "@CNN", + "article:modified_time": "2024-08-06T05:24:05.249Z", + "viewport": "width=device-width,initial-scale=1,shrink-to-fit=no", + "twitter:description": "The prime minister of Bangladesh, Sheikh Hasina, resigned and fled to neighboring India on Monday after protesters stormed her official residence after weeks of deadly anti-government demonstrations in the South Asian nation.", + "template_type": "article_leaf", + "theme": "world", + "og:url": "https://www.cnn.com/2024/08/05/asia/bangladesh-prime-minister-residence-stormed-intl/index.html" + } + ], + "cse_image": [ + { + "src": "https://media.cnn.com/api/v1/images/stellar/prod/ap24218390125876-2.jpg?c=16x9&q=w_800,c_fill" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Why did Bangladesh PM Sheikh Hasina resign and where is she ...", + "htmlTitle": "Why did \u003cb\u003eBangladesh\u003c/b\u003e PM \u003cb\u003eSheikh Hasina resign\u003c/b\u003e and where is she ...", + "link": "https://www.reuters.com/world/asia-pacific/why-did-bangladesh-pm-sheikh-hasina-resign-where-is-she-now-2024-08-06/", + "displayLink": "www.reuters.com", + "snippet": "Aug 6, 2024 ... Aug 7 (Reuters) - Sheikh Hasina resigned as Bangladesh's prime minister and fled the country on Monday following weeks of dedly protests ...", + "htmlSnippet": "Aug 6, 2024 \u003cb\u003e...\u003c/b\u003e Aug 7 (Reuters) - \u003cb\u003eSheikh Hasina resigned\u003c/b\u003e as \u003cb\u003eBangladesh's prime minister\u003c/b\u003e and fled the country on Monday following weeks of dedly protests ...", + "formattedUrl": "https://www.reuters.com/.../why-did-bangladesh-pm-sheikh-hasina-resign-...", + "htmlFormattedUrl": "https://www.reuters.com/.../why-did-\u003cb\u003ebangladesh\u003c/b\u003e-pm-\u003cb\u003esheikh\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e-resign-...", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR_IDuyjGdce77t1tWrSwheC6g8XSyuUQKn_KxA0H9x3eCRV4kretMyY0_J&s", + "width": "310", + "height": "162" + } + ], + "metatags": [ + { + "apple-itunes-app": "app-id=602660809, app-argument=https://www.reuters.com/world/asia-pacific/why-did-bangladesh-pm-sheikh-hasina-resign-where-is-she-now-2024-08-06/?id=E5O5XBJMZBPTDAUM7I6BFYX4UA", + "og:image": "https://www.reuters.com/resizer/v2/QSLRZINWOVJ25LWOYIOOXO4L6A.jpg?auth=581b869970d6c61101b4e8bba552bd5ae55ec08c8a333a33ef63a72f57b8f0c4&height=1005&width=1920&quality=80&smart=true", + "analytics:page_layout": "regular-article", + "article:published_time": "2024-08-07T03:23:35Z", + "og:image:width": "1200", + "twitter:card": "summary_large_image", + "og:site_name": "Reuters", + "og:article:modified_time": "2024-08-07T03:51:39.907Z", + "ccbot": "nofollow", + "analytics:ad_layout": "leaderboard, right rail, sponsored", + "analyticsattributes.topicchannel": "World", + "title": "Why did Bangladesh PM Sheikh Hasina resign and where is she now? | Reuters", + "og:description": "Sheikh Hasina resigned as Bangladesh's prime minister and fled the country on Monday following weeks of dedly protests that began as demonstrations by students against government job quotas but surged into a movement demanding her resignation.", + "twitter:creator": "@Reuters", + "twitter:image": "https://www.reuters.com/resizer/v2/QSLRZINWOVJ25LWOYIOOXO4L6A.jpg?auth=581b869970d6c61101b4e8bba552bd5ae55ec08c8a333a33ef63a72f57b8f0c4&height=1005&width=1920&quality=80&smart=true", + "twitter:image:alt": "Bangladeshi Prime Minister Sheikh Hasina reviews an honour guard at the Government House, during her visit to Thailand, in Bangkok, Thailand, April 26, 2024. REUTERS/Athit Perawongmetha/File Photo", + "twitter:site": "@Reuters", + "article:modified_time": "2024-08-07T03:51:39.907Z", + "fb:admins": "988502044532272", + "article:content_tier": "metered", + "og:type": "article", + "article:section": "Asia Pacific", + "og:image:alt": "Bangladeshi Prime Minister Sheikh Hasina reviews an honour guard at the Government House, during her visit to Thailand, in Bangkok, Thailand, April 26, 2024. REUTERS/Athit Perawongmetha/File Photo", + "twitter:title": "Why did Bangladesh PM Sheikh Hasina resign and where is she now?", + "ad:template": "article", + "og:image:url": "https://www.reuters.com/resizer/v2/QSLRZINWOVJ25LWOYIOOXO4L6A.jpg?auth=581b869970d6c61101b4e8bba552bd5ae55ec08c8a333a33ef63a72f57b8f0c4&height=1005&width=1920&quality=80&smart=true", + "dcsext.dartzone": "/4735792/reuters.com/world/apac/article", + "og:title": "Why did Bangladesh PM Sheikh Hasina resign and where is she now?", + "dcsext.channellist": "World;World;Asia Pacific;Asian Markets", + "og:image:height": "628", + "og:article:published_time": "2024-08-07T03:23:35Z", + "og:updated_time": "2024-08-07T03:51:39.907Z", + "fb:pages": "114050161948682", + "article:author": "Sudipto Ganguly", + "article:tag": "MTVID,EXPLN,TOPNWS,ANLINS,CIV,CWP,DIP,DLI,ECI,ECO,EDU,GEN,JOB,MCE,MPLT,MPOP,NEWS1,POL,RACR,SOCI,TOPCMB,VIO,SASIA,IN,PK,ASXPAC,BD,EMRG,ASIA,PACKAGE:US-TOP-NEWS,PACKAGE:WORLD-NEWS", + "analyticsattributes.topicsubchannel": "Asia Pacific", + "fb:app_id": "988502044532272", + "og:locale:alternate": "en_US", + "viewport": "width=device-width, initial-scale=1", + "twitter:description": "Sheikh Hasina resigned as Bangladesh's prime minister and fled the country on Monday following weeks of dedly protests that began as demonstrations by students against government job quotas but surged into a movement demanding her resignation.", + "og:locale": "en_US", + "og:url": "https://www.reuters.com/world/asia-pacific/why-did-bangladesh-pm-sheikh-hasina-resign-where-is-she-now-2024-08-06/" + } + ], + "cse_image": [ + { + "src": "https://www.reuters.com/resizer/v2/QSLRZINWOVJ25LWOYIOOXO4L6A.jpg?auth=581b869970d6c61101b4e8bba552bd5ae55ec08c8a333a33ef63a72f57b8f0c4&height=1005&width=1920&quality=80&smart=true" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Bangladesh's 'Gen Z revolution' toppled PM Sheikh Hasina. Why did ...", + "htmlTitle": "\u003cb\u003eBangladesh's\u003c/b\u003e 'Gen Z revolution' toppled PM \u003cb\u003eSheikh Hasina\u003c/b\u003e. Why did ...", + "link": "https://www.cnn.com/2024/08/06/asia/bangladesh-protests-hasina-resignation-explainer-intl-hnk/index.html", + "displayLink": "www.cnn.com", + "snippet": "Aug 6, 2024 ... People celebrate the resignation of Prime Minister Sheikh Hasina in Dhaka, Bangladesh, on August 5, 2024. Mohammad Ponir Hossain/Reuters. CNN —.", + "htmlSnippet": "Aug 6, 2024 \u003cb\u003e...\u003c/b\u003e People celebrate the \u003cb\u003eresignation\u003c/b\u003e of \u003cb\u003ePrime Minister Sheikh Hasina\u003c/b\u003e in Dhaka, \u003cb\u003eBangladesh\u003c/b\u003e, on August 5, 2024. Mohammad Ponir Hossain/Reuters. CNN —.", + "formattedUrl": "https://www.cnn.com/2024/08/06/asia/bangladesh...hasina.../index.html", + "htmlFormattedUrl": "https://www.cnn.com/2024/08/06/asia/\u003cb\u003ebangladesh\u003c/b\u003e...\u003cb\u003ehasina\u003c/b\u003e.../index.html", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTOW5T__EO6GShxs6es-aGavTBFUU2GCU-SyqlBE3t5d0hFX5WugbjKA-JH&s", + "width": "300", + "height": "168" + } + ], + "metatags": [ + { + "og:image": "https://media.cnn.com/api/v1/images/stellar/prod/2024-08-05t184829z-2105365796-rc2l99a18sqr-rtrmadp-3-bangladesh-protests.jpg?c=16x9&q=w_800,c_fill", + "twitter:title": "Bangladesh’s ‘Gen Z revolution’ toppled a veteran leader. Why did they hit the streets and what happens now? | CNN", + "og:type": "article", + "twitter:card": "summary_large_image", + "article:published_time": "2024-08-06T08:16:31.519Z", + "og:site_name": "CNN", + "author": "Helen Regan", + "og:title": "Bangladesh’s ‘Gen Z revolution’ toppled a veteran leader. Why did they hit the streets and what happens now? | CNN", + "meta-section": "world", + "type": "article", + "og:description": "Inside Bangladesh it’s being dubbed a Gen Z revolution – a protest movement that pitted mostly young student demonstrators against a 76-year-old leader who had dominated her nation for decades and turned increasingly authoritarian in recent years.", + "twitter:image": "https://media.cnn.com/api/v1/images/stellar/prod/2024-08-05t184829z-2105365796-rc2l99a18sqr-rtrmadp-3-bangladesh-protests.jpg?c=16x9&q=w_800,c_fill", + "article:publisher": "https://www.facebook.com/CNN", + "fb:app_id": "80401312489", + "twitter:site": "@CNN", + "article:modified_time": "2024-08-07T03:48:11.066Z", + "viewport": "width=device-width,initial-scale=1,shrink-to-fit=no", + "twitter:description": "Inside Bangladesh it’s being dubbed a Gen Z revolution – a protest movement that pitted mostly young student demonstrators against a 76-year-old leader who had dominated her nation for decades and turned increasingly authoritarian in recent years.", + "template_type": "article_leaf", + "theme": "world", + "og:url": "https://www.cnn.com/2024/08/06/asia/bangladesh-protests-hasina-resignation-explainer-intl-hnk/index.html" + } + ], + "cse_image": [ + { + "src": "https://media.cnn.com/api/v1/images/stellar/prod/2024-08-05t184829z-2105365796-rc2l99a18sqr-rtrmadp-3-bangladesh-protests.jpg?c=16x9&q=w_800,c_fill" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Bangladesh PM Sheikh Hasina resigns, ending 15 years in power ...", + "htmlTitle": "\u003cb\u003eBangladesh\u003c/b\u003e PM \u003cb\u003eSheikh Hasina resigns\u003c/b\u003e, ending 15 years in power ...", + "link": "https://www.npr.org/2024/08/05/g-s1-15332/bangladesh-protests", + "displayLink": "www.npr.org", + "snippet": "Aug 5, 2024 ... DHAKA, Bangladesh — Bangladesh's Prime Minister Sheikh Hasina resigned on Monday, ending 15 years in power as thousands of protesters defied ...", + "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e DHAKA, \u003cb\u003eBangladesh\u003c/b\u003e — \u003cb\u003eBangladesh's Prime Minister Sheikh Hasina resigned\u003c/b\u003e on Monday, ending 15 years in power as thousands of protesters defied ...", + "formattedUrl": "https://www.npr.org/2024/08/05/g-s1-15332/bangladesh-protests", + "htmlFormattedUrl": "https://www.npr.org/2024/08/05/g-s1-15332/\u003cb\u003ebangladesh\u003c/b\u003e-protests", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSqvTrTl13trd-nrF4oQvAQOY3z2N2MfxSSyZsmd4Pm6E_e0TTbu0ER6zE&s", + "width": "300", + "height": "168" + } + ], + "speakablespecification": [ + { + "cssselector": "[data-is-speakable]" + } + ], + "metatags": [ + { + "date": "2024-08-05", + "apple-itunes-app": "app-id=324906251, app-argument=https://www.npr.org/2024/08/05/g-s1-15332/bangladesh-protests", + "og:image": "https://npr.brightspotcdn.com/dims3/default/strip/false/crop/6043x3399+0+315/resize/1400/quality/100/format/jpeg/?url=http%3A%2F%2Fnpr-brightspot.s3.amazonaws.com%2Fba%2F99%2Ff772f9bd44ee9b1ddf5a4d9d1d98%2Fap24217447347066.jpg", + "og:type": "article", + "twitter:card": "summary_large_image", + "twitter:title": "Bangladesh PM Sheikh Hasina resigns, ending 15 years in power, as thousands protest", + "og:site_name": "NPR", + "cxenseparse:pageclass": "article", + "twitter:domain": "npr.org", + "cxenseparse:publishtime": "2024-08-05T04:07:23-04:00", + "og:title": "Bangladesh PM Sheikh Hasina resigns, ending 15 years in power, as thousands protest", + "rating": "General", + "og:description": "At least 95 people, including at least 14 police officers, died in clashes in the capital on Sunday. Broadband internet and mobile data services were cut off for about three hours on Monday.", + "fb:pages": "10643211755", + "twitter:image:src": "https://npr.brightspotcdn.com/dims3/default/strip/false/crop/6043x3399+0+315/resize/1400/quality/100/format/jpeg/?url=http%3A%2F%2Fnpr-brightspot.s3.amazonaws.com%2Fba%2F99%2Ff772f9bd44ee9b1ddf5a4d9d1d98%2Fap24217447347066.jpg", + "fb:app_id": "138837436154588", + "cxenseparse:author": "The Associated Press", + "twitter:site": "@NPR", + "article:modified_time": "2024-08-05T06:50:55-04:00", + "viewport": "width=device-width, initial-scale=1, shrink-to-fit=no", + "article:content_tier": "free", + "og:url": "https://www.npr.org/2024/08/05/g-s1-15332/bangladesh-protests", + "article:opinion": "false" + } + ], + "cse_image": [ + { + "src": "https://npr.brightspotcdn.com/dims3/default/strip/false/crop/6043x3399+0+315/resize/1400/quality/100/format/jpeg/?url=http%3A%2F%2Fnpr-brightspot.s3.amazonaws.com%2Fba%2F99%2Ff772f9bd44ee9b1ddf5a4d9d1d98%2Fap24217447347066.jpg" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Tens of thousands protest in Bangladesh to demand resignation of ...", + "htmlTitle": "Tens of thousands protest in \u003cb\u003eBangladesh\u003c/b\u003e to demand \u003cb\u003eresignation\u003c/b\u003e of ...", + "link": "https://www.cnn.com/2022/12/11/asia/bangladesh-protests-prime-minister-sheikh-hasina-intl-hnk/index.html", + "displayLink": "www.cnn.com", + "snippet": "Dec 11, 2022 ... Supporters of Bangladesh's opposition party protest against the government of Prime Minister Sheikh Hasina on December 10, 2022. Mamunur Rashid/ ...", + "htmlSnippet": "Dec 11, 2022 \u003cb\u003e...\u003c/b\u003e Supporters of \u003cb\u003eBangladesh's\u003c/b\u003e opposition party protest against the government of \u003cb\u003ePrime Minister Sheikh Hasina\u003c/b\u003e on December 10, 2022. Mamunur Rashid/ ...", + "formattedUrl": "https://www.cnn.com/.../bangladesh...prime-minister-sheikh-hasina.../index....", + "htmlFormattedUrl": "https://www.cnn.com/.../\u003cb\u003ebangladesh\u003c/b\u003e...\u003cb\u003eprime\u003c/b\u003e-\u003cb\u003eminister\u003c/b\u003e-\u003cb\u003esheikh\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e.../index....", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ-JqzYxoZHcQ5wWQhH5Xq-JrKFFyWbdfS339bDlIrhMrc2Y_9BznDwjN5u&s", + "width": "275", + "height": "183" + } + ], + "metatags": [ + { + "og:image": "https://media.cnn.com/api/v1/images/stellar/prod/221210230748-02-dhaka-protests-121022.jpg?c=16x9&q=w_800,c_fill", + "twitter:title": "Tens of thousands protest in Bangladesh to demand resignation of Prime Minister | CNN", + "og:type": "article", + "twitter:card": "summary_large_image", + "article:published_time": "2022-12-11T06:09:58Z", + "og:site_name": "CNN", + "author": "Vedika Sud,Yong Xiong", + "og:title": "Tens of thousands protest in Bangladesh to demand resignation of Prime Minister | CNN", + "meta-section": "world", + "type": "article", + "og:description": "Tens of thousands of protesters took to the streets of Dhaka on Saturday calling for the dissolution of parliament to make way for new elections, and demand the resignation of Bangladeshi Prime Minister Sheikh Hasina.", + "twitter:image": "https://media.cnn.com/api/v1/images/stellar/prod/221210230748-02-dhaka-protests-121022.jpg?c=16x9&q=w_800,c_fill", + "article:publisher": "https://www.facebook.com/CNN", + "article:tag": "asia, bangladesh, brand safety-nsf other, brand safety-nsf sensitive, british national party, civil disobedience, continents and regions, domestic alerts, domestic-international news, elections and campaigns, government and public administration, iab-elections, iab-politics, political figures - intl, political organizations, political parties - intl, politics, protests and demonstrations, resignations, sheikh hasina, society, south asia", + "fb:app_id": "80401312489", + "twitter:site": "@CNN", + "article:modified_time": "2022-12-11T06:09:58Z", + "viewport": "width=device-width,initial-scale=1,shrink-to-fit=no", + "twitter:description": "Tens of thousands of protesters took to the streets of Dhaka on Saturday calling for the dissolution of parliament to make way for new elections, and demand the resignation of Bangladeshi Prime Minister Sheikh Hasina.", + "template_type": "article_leaf", + "theme": "world", + "og:url": "https://www.cnn.com/2022/12/11/asia/bangladesh-protests-prime-minister-sheikh-hasina-intl-hnk/index.html" + } + ], + "cse_image": [ + { + "src": "https://media.cnn.com/api/v1/images/stellar/prod/221210230749-dhaka-protests-221207.jpg?q=w_1110,c_fill" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Timeline of events leading to the resignation of Bangladesh Prime ...", + "htmlTitle": "Timeline of events leading to the \u003cb\u003eresignation\u003c/b\u003e of \u003cb\u003eBangladesh Prime\u003c/b\u003e ...", + "link": "https://www.voanews.com/a/timeline-of-events-leading-to-the-resignation-of-bangladesh-prime-minister-sheikh-hasina/7731456.html", + "displayLink": "www.voanews.com", + "snippet": "Aug 5, 2024 ... Bangladesh Prime Minister Sheikh Hasina resigned and left the country Monday after clashes between student protesters and police left nearly 300 people dead.", + "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e \u003cb\u003eBangladesh Prime Minister Sheikh Hasina resigned\u003c/b\u003e and left the \u003cb\u003ecountry Monday\u003c/b\u003e after clashes between student protesters and police left nearly 300 people dead.", + "formattedUrl": "https://www.voanews.com/...bangladesh-prime-minister-sheikh-hasina/7731...", + "htmlFormattedUrl": "https://www.voanews.com/...\u003cb\u003ebangladesh\u003c/b\u003e-\u003cb\u003eprime\u003c/b\u003e-\u003cb\u003eminister\u003c/b\u003e-\u003cb\u003esheikh\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e/7731...", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS2o9D0XbnmDtsmWEVzDYCwGv4IHKkzATikOvXEDghsD_uzZj-G6_63zGyR&s", + "width": "311", + "height": "162" + } + ], + "metatags": [ + { + "msapplication-tilecolor": "#ffffff", + "apple-itunes-app": "app-id=632618796, app-argument=//7731456.ltr", + "og:image": "https://gdb.voanews.com/28CFE9FB-9B7B-4474-8342-C7BC5434B54A.jpg", + "og:type": "article", + "og:image:width": "308", + "twitter:card": "summary_large_image", + "og:site_name": "Voice of America", + "msvalidate.01": "3286EE554B6F672A6F2E608C02343C0E", + "author": "Sabir Mustafa", + "apple-mobile-web-app-title": "VOA", + "og:title": "Timeline of events leading to the resignation of Bangladesh Prime Minister Sheikh Hasina", + "msapplication-tileimage": "/Content/responsive/VOA/img/webApp/ico-144x144.png", + "fb:pages": "36235438073", + "og:description": "Hasina resigns after weeks of clashes between student protesters and police leave nearly 300 dead", + "article:publisher": "https://www.facebook.com/voiceofamerica", + "twitter:image": "https://gdb.voanews.com/28CFE9FB-9B7B-4474-8342-C7BC5434B54A.jpg", + "fb:app_id": "362002700549372", + "apple-mobile-web-app-status-bar-style": "black", + "twitter:site": "@voanews", + "viewport": "width=device-width, initial-scale=1.0", + "twitter:description": "Hasina resigns after weeks of clashes between student protesters and police leave nearly 300 dead", + "og:url": "https://www.voanews.com/a/timeline-of-events-leading-to-the-resignation-of-bangladesh-prime-minister-sheikh-hasina/7731456.html" + } + ], + "cse_image": [ + { + "src": "https://gdb.voanews.com/28CFE9FB-9B7B-4474-8342-C7BC5434B54A.jpg" + } + ] + } + }, + { + "kind": "customsearch#result", + "title": "Bangladesh's Sheikh Hasina forced to resign: What happened and ...", + "htmlTitle": "\u003cb\u003eBangladesh's Sheikh Hasina\u003c/b\u003e forced to \u003cb\u003eresign\u003c/b\u003e: What happened and ...", + "link": "https://www.aljazeera.com/news/2024/8/5/bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next", + "displayLink": "www.aljazeera.com", + "snippet": "Aug 5, 2024 ... Bangladesh Prime Minister Sheikh Hasina has stepped down from office, ending 15 years of what the opposition says was “authoritarian rule” and sparking ...", + "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e \u003cb\u003eBangladesh Prime Minister Sheikh Hasina\u003c/b\u003e has \u003cb\u003estepped down\u003c/b\u003e from office, ending 15 years of what the opposition says was “authoritarian rule” and sparking ...", + "formattedUrl": "https://www.aljazeera.com/.../bangladeshs-sheikh-hasina-forced-to-resign-w...", + "htmlFormattedUrl": "https://www.aljazeera.com/.../\u003cb\u003ebangladesh\u003c/b\u003es-\u003cb\u003esheikh\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e-forced-to-resign-w...", + "pagemap": { + "cse_thumbnail": [ + { + "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS2uyLUKVFCDpJ-_MjZ6dRKW5_LC1zknAIICxM5ZcVuAZtYqupigTOI_l_0&s", + "width": "259", + "height": "194" + } + ], + "metatags": [ + { + "pagetype": "Article Page", + "og:image": "https://www.aljazeera.com/wp-content/uploads/2024/08/AP24218390076912-1722855595.jpg?resize=1920%2C1440", + "apple-itunes-app": "app-id=1534955972", + "twitter:card": "summary_large_image", + "og:site_name": "Al Jazeera", + "postlabel": "Explainer", + "twitter:url": "https://www.aljazeera.com/news/2024/8/5/bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next", + "pagesection": "Explainer,News,Sheikh Hasina", + "channel": "aje", + "publisheddate": "2024-08-05T15:14:49", + "postid": "3096869", + "source": "Al Jazeera", + "og:description": "Prime minister reportedly flees to India after weeks of antigovernment protests.", + "taxonomyterms": "News, Sheikh Hasina, Asia, Bangladesh", + "lastdate": "2024-08-05T15:40:26", + "primarytopic": "News", + "twitter:image:alt": "Sheikh Hasina forced to resign: What happened and what’s next?", + "sourcetaxonomy": "Al Jazeera", + "internalreporting": "Break it down for me", + "where": "Asia, Bangladesh", + "primarytag": "Sheikh Hasina", + "ga4": "G-XN9JB9Q0M1", + "twitter:account_id": "5536782", + "og:type": "article", + "twitter:title": "Sheikh Hasina forced to resign: What happened and what’s next?", + "taxonomy-tags": "News, Sheikh Hasina", + "topics": "News", + "og:title": "Sheikh Hasina forced to resign: What happened and what’s next?", + "tags": "Sheikh Hasina", + "contenttype": "post", + "twitter:image:src": "https://www.aljazeera.com/wp-content/uploads/2024/08/AP24218390076912-1722855595.jpg?resize=1920%2C1440", + "articleslug": "bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next", + "postlink": "/news/2024/8/5/bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next", + "viewport": "width=device-width,initial-scale=1,shrink-to-fit=no", + "twitter:description": "Prime minister reportedly flees to India after weeks of antigovernment protests.", + "pagetitle": "Bangladesh’s Sheikh Hasina forced to resign: What happened and what’s next?", + "og:url": "https://www.aljazeera.com/news/2024/8/5/bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next" + } + ], + "cse_image": [ + { + "src": "https://www.aljazeera.com/wp-content/uploads/2024/08/AP24218390076912-1722855595.jpg?resize=1920%2C1440" + } + ] + } + } + ] +} diff --git a/search_response_altnews_in.html b/search_response_altnews_in.html new file mode 100644 index 0000000..a3ee576 --- /dev/null +++ b/search_response_altnews_in.html @@ -0,0 +1,28 @@ +q="Indian flag was drawn in BUET campus" site:altnews.in - Google Search

যদি কয়েক সেকেন্ডের মধ্যে আপনি অন্য সাইটে না যান তাহলে এখানে ক্লিক করুন

অ্যাক্সেসিবিলিটি লিঙ্ক

প্রায় ০টি ফলাফল (০.১৮ সেকেন্ড) 

আপনার সার্চ - q="Indian flag was drawn in BUET campus" site:altnews.in - কোনো পৃষ্ঠাতে পাওয়া যায়নি।

পরামর্শ:

  • দেখুন যে সব বানান ঠিক আছে কিনা
  • অন্য বিষয়বস্তু ব্যবহার করে চেষ্টা করুন
  • আরও সাধারণ বিষয়বস্তু দিয়ে চেষ্টা করুন
  • স্বল্প বিষয়বস্তু দিয়ে চেষ্টা করুন
Google অ্যাপ্লিকেশানগুলি
\ No newline at end of file diff --git a/search_response_bbc_com.html b/search_response_bbc_com.html new file mode 100644 index 0000000..4c6857e --- /dev/null +++ b/search_response_bbc_com.html @@ -0,0 +1,28 @@ +q="Indian flag BUET" site:bbc.com - Google Search

অ্যাক্সেসিবিলিটি লিঙ্ক

প্রায় ০টি ফলাফল (০.১৬ সেকেন্ড) 

আপনার সার্চ - q="Indian flag BUET" site:bbc.com - কোনো পৃষ্ঠাতে পাওয়া যায়নি।

পরামর্শ:

  • দেখুন যে সব বানান ঠিক আছে কিনা
  • অন্য বিষয়বস্তু ব্যবহার করে চেষ্টা করুন
  • আরও সাধারণ বিষয়বস্তু দিয়ে চেষ্টা করুন
  • স্বল্প বিষয়বস্তু দিয়ে চেষ্টা করুন
Google অ্যাপ্লিকেশানগুলি
\ No newline at end of file diff --git a/search_response_en_prothomalo_com.html b/search_response_en_prothomalo_com.html new file mode 100644 index 0000000..298364a --- /dev/null +++ b/search_response_en_prothomalo_com.html @@ -0,0 +1,28 @@ +q="flag BUET campus" site:en.prothomalo.com - Google Search

অ্যাক্সেসিবিলিটি লিঙ্ক

প্রায় ০টি ফলাফল (০.১৩ সেকেন্ড) 

আপনার সার্চ - q="flag BUET campus" site:en.prothomalo.com - কোনো পৃষ্ঠাতে পাওয়া যায়নি।

পরামর্শ:

  • দেখুন যে সব বানান ঠিক আছে কিনা
  • অন্য বিষয়বস্তু ব্যবহার করে চেষ্টা করুন
  • আরও সাধারণ বিষয়বস্তু দিয়ে চেষ্টা করুন
  • স্বল্প বিষয়বস্তু দিয়ে চেষ্টা করুন
Google অ্যাপ্লিকেশানগুলি
\ No newline at end of file From 790d58402a05801b292d06d96f5f537ac3866ac0 Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Sun, 15 Dec 2024 18:22:04 +0600 Subject: [PATCH 4/5] base code added updated --- app/__pycache__/config.cpython-312.pyc | Bin 507 -> 576 bytes .../__pycache__/fact_check.cpython-312.pyc | Bin 5851 -> 6289 bytes app/api/fact_check.py | 124 ++-- app/api/scrap_websites.py | 533 ++++++++++-------- app/config.py | 1 + .../fact_check_models.cpython-312.pyc | Bin 9422 -> 9422 bytes app/models/scrap_websites_models.py | 43 ++ 7 files changed, 390 insertions(+), 311 deletions(-) create mode 100644 app/models/scrap_websites_models.py diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index 91b068866373f8d9210744c00633448a6572698b..22a68c5e11d594851135abcde42a1e5f1ce57cb2 100644 GIT binary patch delta 120 zcmey(e1L`bG%qg~0}%A=h)Xw{$ScXXXQKN2iGJeT?2kCF&czSZzzD>}F_V`w8VTGG7M*T2(dvSn T!9`)i3k(92Z!*fU@B-xlDnA{D delta 72 zcmX@W@|&6WG%qg~0}y=7k4$Ho$ScV>Z=(8qmP$rV_K7c?CfhK|F>*|fX4IS9$LPYv X2UN!h#Kk_7?=TuomSd7*;Q?|1xONcE diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index 7304a380d4674133d015ebad37508110a0b5cb53..98c2526f8272844d81c35b9bb2a51494f76a6e41 100644 GIT binary patch literal 6289 zcmbVQU2qdew(c3tjQ%x}C0Vv5+cGvb#(xCNPaq)}+t>zTCpJ)vvbj@2Gqz>)_}NEq4q*u?hO>X3#pI-s`h~_Y?W`_`@mz!vK3PI#lqS3?5uAkTbqYX)qN1)6olP} zdwQgiO_uCKTBV-Tf2U7(pFZb1{Xb5p4MBSA zjJT3~#GUj+JjsekMbaDb>TTwPFX@l?la-N53eiZSZk9xe1ZXsXM9XO;S|!ueCPUQK z?@d#Yj#P`b(i2TP}l-$V)EKExE@T(Q}$9%MqHT zTPwaM>{@Bf+Vk4bXgZ@vSaS{@I`mrq$(S^)#M7zaR!XyoQAJYXNeMbKeesy0F$d$a z0wveL$jHFKegQ@c2l|g{HKp>tp58-3?_huL0by^?Sif+1^k6HiRSu-n6A5WwG^X@U zO0nak(oZsytZ0FMsF$bHDOu9k-b6H>)SUeor}3y%sx<>&+v$TKb}u}NVZ zmmNhqx*jgh6qxMoTpo6@X;5KBa5SrB9*bU1wK>5i_+bW-ZdDDosMZni5b z%G2>gB8t1nhU%x(6`h`je_XFAZgJ=IEQYIw=Boc;dpb)qd!bq00NImMXxZ*RAG@;S zb9X*x6swmrOrnS(HOoq!1O+(+6N| zKBRNh>Jf`3kr7#uyTat)u%`@TWek9Cmdet%%!c~B3+7Ab&y@I^vZg5_?{|_R5e4&$ zAd^UyInd8aNTFHFc69Ct{5Qbg+K7}guPH~FEE7!#pUg2?X3ABrDic%gGFc`>>v7Nl z+@cV5(35CW{AJoMIz;EHp_(E(^_^s_TV{n!+F3kt%Lt6CwkXb}3 z$GpwVP&4SH=_s0^8qo{rU9x|0qQ5ke6WESeP`DXzBu7C{U@xg+&)=A4sMdOq8%KbbHeqEC*_^86veSgjUlabtd#0F zJoYtF>{doI!C;LAyNPEtM>gdu7rKH?|znR*TljSLI@qoX6E ztxk=dRFvtHn!QJ!O~rZ>aVe!>avQJ(GR*|$Tyx`ST$Thdb3$=GjVFF;OrKl#51Iv+ zJOqA$j3pNjlUuJbBJo)!S;1gmxR}mgvyjadGB|;WmBKqnM@o{3K@F3n+YrrNT81Qm z1r%WK@owl)1%J67cJ;5synV&{miTa<4=?iT9-~^;RUp~5@d6#Px|VCY7pk|;zqt_T zJu~=KV7*#huZCOHQ0)UNTj@Hpui!v+O;;oDM{ZbdS6+@R*6vvf>^U>2I($ox^?Aqo z&mAqQH}E(B9Ch9g2=6C_3RKlYJt=sh@a;+!a{HI~raa&D5P7UFm3O~4cXn=xU!UjK z-|*h&+m|aF)o|l-aD!UEOKt2{Yui@LOsMj)1qFf^#EauAPII85fXtqX6*uyR)lmI% zph>Om{_nBdp8q;BKe!NlS*_oyR=2>gstOq94P0ot*bKwGuvoLx^|yi_CVu+3`WZ3* zE)#3=PIX_9`he=$%v~`7b(QMlOxHK|?4a-1sh)1`j>AXj20OIfX{UO&F?TvRLU&W7 zej7#T9o{|@`-#10OYd&>x4T&=+%=hi&s~n|t1{npP<_1PuG36tpBV<=Hkepr!F@=H zRwjO)+=8EzYy~ph3XKH382UoojFix;!i2nl>D|(0t zl%wEJuUb#ct-61?MJYRdky6YCCy^!|;1+8DwdZ+&vi;-M*gw0$!Dt1~U|#EaD5Mb0 z*OSpFX95Ws__OfmvZg-tCN$;D;G@Wj^t3Afs`TKQo@3dw=9?wYR-Rk8iQx4tq9toN zfp9hp6fwy#6EMzM*VyfF9A&M)qW**WXKT*c04|liMM*^XWNn6Z2dHh$+D?EKKDRk* z6Rl)6(MIx|U9@NIAVr<%$O293pcDY#^*oNH9-@QvTf^N+cmVfkmi?Y_&SyuQJHeoH zzhS_04wZ%HtSyH@Xva# z_R5;dqdBqeis@GUP2yxA7}}1`Q|GSh`8c4Vl8z$;$mgY;Bg;Kk3(QlMMILMO zHE6^!MX>32k|Brax5YRZk$r=A82fO5rqSdCrW^x1Hzd zb5@uUs8!4p18hUIWCrP&reSWUjrc}AE&#U<;2@lVU}PekgkU2+osjg(4jnv@>QVrv z55hgLgt-vG2C;N%JU)@ZMR*XG!^yZz*l6C~k>Pzq1BXY~-~-*3<8~N~Nw7RjJUBr~ znqB{nN+;sd3?{%_vrcD@CF1fVL_mxjS0v4;NGBDD8%ANK)CAakt|*bl>Of|$`n4!l z;?YDHf=gI|9PT**SS5NaAa)j+!>yu+r#)(GLGIE#=~TD+`}WnX#d#ZgNMe1eItj5 z`ye=UPGeZ5A}A!HmNieTxMl!tMwLTJE73%$L`D?LrGwRit8q8TEs#x@AySiYToN@` z5%?Klc&h~ybD&ieSrktZ5FIRlI9z=_niuvcJ|2(hsv#6>HM23D12&EvsgQxa*X+93 zKm+iFKAB`Pif2DUt$rOh77=6_1>zTBZXLa0;?6Mfd74@02WXSTIvNL&t^~qxVmcX( zr|?eFk04sjT7D1FObIEaF;i)1Cqa$@Vv(~Y=!EQ=#_D^i@dj8GNt_Q4C(uqVhh`!8 zS^$;DFO#tX8B2}hW28WUuS_Cja}HNS7zwd@M2Rl#L|l7X#iaFgn91mx&*0dnNMXT^UbfRncYaCh^k%4U3Ga|o^SZFw(;6vzP95bvU&DX=lhqN+pec>NY_(K%`fJg zUtDb7b$&$k)i3#)^S5P4vts@|Z!40ezcv9fR?HseHi3po)BoJb|| z)~b~OwI-$fg8Y+J0|zTDP%Dxpw@)1cD&!QTphanqpQOE!eVvD1J=~&T0tf! z|BVGX{eN5LtG;*4>mO8pw|l`3{Qv%NBU%@szLg2^d8daR>Zb0Pseuj59ZNN#*EJ4w z(x0%@KpXc7w;Sl+vYjOFu7v!qtp`D;-`RXaUB2Jd?X^SU(>4lPKJBy)wX>gYe_^nb zy|;mb`g@%YsJXYZWvGSytbx>g*1|%~XYCfCTWdA*coZU92*}~(s2H_^c>zCAeB0xc z*=ew>n7j*O5;@>4kZCqBRh_yQnOHS;T8Px@!HnFwYM6`Rjf}}Nx#nJDuJqSeOo9n~ zoMglZ>Jf9%vNaHHGtx2LsITECVWCv38&Tby!ZR$G!Q$(nfQt{w_BF4j#m6`jJHxv{ z8X|FGNU%sU{TYrt*gdtpRDAE-jZZ_7gk18kAuE_DiuwYz{|VXu7u9}&YX5{BU!b1H zG)K|jAjlrO5cN9sXJ75b0}IHr!Z1|RWoD^rL%wRm3WCBzM?K{#?1guD?-CWrQvsE! zP`yoGy8P#J3(obR*3bPygwAsXGoq^=AuCN6d{Y$VTd40UAf4#$f2b3W{LuJ7Z`9ZF E|J2kYFaQ7m literal 5851 zcmb_geQX>@6`#G^`~LQQ*||G=&mR|G(%4CpG|dNb9mn~o9lO{Gs3)OibGx?J*}XHf zdv@%eX#^D{B7hN;1T~06RBBLyWC{N0pA?X&)FAolpgS!_6j2HI?~)j$3jD#Fy<4C2 z(MllZbh9&W-n@D5&6}Cu%m*Hi6G8gM)R!iAG$Zs^QZbUFUU(3q5W0eR#8VQIsSG8X zGA4@DO_DidhOb$oWjaIaG+MIA){IrQWo&xgBH3j}#vwa1PQ7lGT(Udkk-Zt8?9ce+ zKqeqJWt#Lhn-r8onUEaLg!Q^zipbGSRBp~RQ^Q-H)Op6H}N4(<=#5)D^^d3Xy z#`l)FR?ftPmg)Tsx>4d?f{%9#0g-w}?zu+a_8Jm+pWxyBf^UN618>k98YJdT>85`W zacPt0=zrzpSiYbNSo54XefpKb^I2g=74x}~G^N>jP8C#97J!jHDrQxU9u^f9YTo0c zqsNB_S?J9U4Zf;vsn?J751eKPP7DqVu}AvH2iY@Y!)c2aIiAl?O2RQNs}4*F*>hvU zR|%Tw;@sgE3jW~Cdl zPcEP}S9mFV?ZvyU&Pp(H<#XTr-1qxd-0f?=(51OcuU}PGeH|(vSJE6ea^d6q_(e@??HUZRvf@GXf(XF;^@VgB?p7OA#=$U2=zQ%Wn!q!h2rAh5D_@+CJQsZmH$Mn_)%_tdde2@?E z;cKR6k3CJ6{u7Ch5a008|HIhLe9JYS)HT!Xt+(p?>tdi77)SfilIb$rfzXnBPa{5{*z39j+5pUQCaXo<#8=Fm%o13yoC+Mx$80D@dd)a>O8n9OD!GS_GB0E4@Ldm%QG z*Jqv4XPp7S1F@hm6Zt}p*Zd=+?AYM=nc>sp?6J`^BS#mSPv(@u#Dtg?g`CQq%+;_d zOtxko&d)9EJ;Esh&%oSsvoNLH*?LUXq$_$PHKNBDZK125=S31SoWy{eM37&LAeozI z&I(LU5O{$H{~RC&I`t^iO~gkC0bm-Z-1UHCMNkc5%L9`w�_QZXSvj$U;qm zD^WgYklbos<)nHIrp!~T*JEFcHG%`=v-+h2@G2+?3fVLZDb8z6wNACjWNXd?#HjmH z3vM=vtx+|bF@Q@~jV+K{5l{=V%;EW;p>zVtbpeQm^a79El{(}z~rm2#_}>J!YnB&#sr+$gaGG$s$i>d^iNEF( zf#0C$&4yF_n=XN@F3F=CdR|q)Qv$D9Yn&ROapGjnqPt%6&2o}RF1d$tka3 zfXaU-m!xVO1;ZDHD$$k|I`mH~a&P%V^5B{`R%wo{1(`~8ZzZ;~67E>H(IM|cJMslD z@!y(Q_t<>SDzZ78>md|uUv26zBve9?3#UG6O58_rOZcL-YKl3+Yq96n{H<&LZ54m? zitl^Aia+weVTpSmB1^=3@mST1{LNQiDEX2fB|5I3C?$5>N6x^@)Y9NuYWt1cP2ona zoZ4SX?O#nDTpF!}+RC9+DU`bY`rXjJsueYN-SmAJ-B(HOTuzjdeZQhg$s<>-m00R# zsubI~{DpUCe|4_ZH&)s^R*64Xj_)qTcP}4%_h4zy@%OEzJ)?KyuT)~&Zccm{+f#|B z%kiD1_|E0XyS~z{6Mu-GtcHmupH!n@&Hcm39j%&Djzb?OwpHRCKNN}^}o zVs7FNO{GlI=ue79B z!`oK<+aWqWPB3o=SGxADCSO=h>|5L3bMv*;?FYfP0q45U<+olqQQd;vp$nr`)(r9Y zXnh;2+!|L7NGpzMtIV#nr(suOfw^r-J!O#YxFdnUbS7<3NQoC8BE@aFoCU_ z6ZZ6;8m?-b2wI#_^+yV&r*T5no-GtiURE{#rkz=TuEQ=O^vfh8r-go?)Hd-Xk!C!p zZIUNoB41qIe=n(Dktmz%pw&E4w=DpfZ!1lL`5rfQzh8>kvF`wxZ>%8pnvA~ qp2Om^R&B`C`~W#jrfTSfi3+W>^;VHy*fV%vFFXiC<2}7mx8gtZe#IgH diff --git a/app/api/fact_check.py b/app/api/fact_check.py index c5f494b..432f0de 100644 --- a/app/api/fact_check.py +++ b/app/api/fact_check.py @@ -2,7 +2,7 @@ from fastapi import APIRouter, HTTPException import json from datetime import datetime from typing import Dict, List - +import httpx from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL from app.models.fact_check_models import ( GoogleFactCheckRequest as FactCheckRequest, @@ -12,7 +12,6 @@ from app.models.fact_check_models import ( TokenUsage ) from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources -from app.api.scrap_websites import SearchRequest, search_websites fact_check_router = APIRouter() @@ -22,6 +21,39 @@ class CustomJSONEncoder(json.JSONEncoder): return obj.isoformat() return super().default(obj) +async def validate_api_key(): + """Validate the Google API key with a test request""" + async with httpx.AsyncClient() as client: + try: + test_url = f"{GOOGLE_FACT_CHECK_BASE_URL}claims:search" + params = { + "key": GOOGLE_API_KEY, + "query": "test", + "languageCode": "en-US", + "pageSize": 1 + } + response = await client.get(test_url, params=params) + response.raise_for_status() + return True + except httpx.HTTPStatusError as e: + if e.response.status_code == 403: + raise HTTPException( + status_code=503, + detail=ErrorResponse( + detail="Invalid or expired API key", + error_code="INVALID_API_KEY", + path="/check-facts" + ).dict() + ) + raise HTTPException( + status_code=503, + detail=ErrorResponse( + detail=f"API validation failed: {str(e)}", + error_code="API_VALIDATION_ERROR", + path="/check-facts" + ).dict() + ) + @fact_check_router.post( "/check-facts", response_model=FactCheckResponse, @@ -34,7 +66,7 @@ class CustomJSONEncoder(json.JSONEncoder): ) async def check_facts(request: FactCheckRequest) -> FactCheckResponse: """ - Check facts using multiple fact-checking sources and fallback to web search + Check facts using multiple fact-checking sources """ all_results = [] verified_results = [] @@ -50,10 +82,14 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: ).dict() ) + # Validate API key before proceeding + await validate_api_key() + # Get all sources in priority order all_sources = get_all_sources() all_sources_list = [] # To store source URLs contexts_used = [] # To store context snippets + failed_sources = [] # Track failed sources for source in all_sources: try: @@ -78,75 +114,39 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: if "textualRating" in review: contexts_used.append(review["textualRating"]) - except HTTPException: + except HTTPException as http_err: + failed_sources.append({ + "source": source.domain, + "error": str(http_err.detail) + }) continue except Exception as e: - # Log the error but continue with other sources - print(f"Error processing {source.domain}: {str(e)}") + failed_sources.append({ + "source": source.domain, + "error": str(e) + }) continue - # If no results found, try searching websites - if not all_results: - try: - # Create search request - search_request = SearchRequest( - search_text=request.content, - source_types=["fact_checkers"] - ) - - # Perform website search - search_response = await search_websites(search_request) - - # If AI fact check results are available, use them - if search_response.ai_fact_check_result: - # Create a claim from AI fact check result - ai_claim = { - "text": request.content, - "claimant": "AI Analysis", - "claimDate": datetime.now().isoformat(), - "claimReview": [{ - "publisher": { - "name": "AI Fact Checker", - "site": "ai-fact-check" - }, - "textualRating": search_response.ai_fact_check_result.verification_result["verdict"], - "title": "AI Fact Check Analysis", - "reviewDate": datetime.now().isoformat(), - "url": "" - }] - } - - validated_claim = Claim(**ai_claim).dict() - all_results.append(validated_claim) - - # Add sources and contexts - all_sources_list.extend(search_response.results.keys()) - if search_response.ai_fact_check_result.verification_result["evidence"]: - contexts_used.extend(search_response.ai_fact_check_result.verification_result["evidence"]) - - except Exception as e: - print(f"Error during website search: {str(e)}") - - # If still no results found after searching websites - if not all_results: + # Return partial results if some sources failed but we have data + if all_results: + verification_result = { + "verdict": "Partial Results Available" if failed_sources else "Complete Results", + "confidence": "Medium" if failed_sources else "High", + "evidence": contexts_used, + "reasoning": "Based on available fact checks", + "missing_info": f"{len(failed_sources)} sources failed" if failed_sources else None + } + else: raise HTTPException( status_code=404, detail=ErrorResponse( - detail="No fact check results found", + detail="No fact check results found. Failed sources: " + + ", ".join([f"{f['source']}: {f['error']}" for f in failed_sources]), error_code="NO_RESULTS_FOUND", path="/check-facts" ).dict() ) - # Prepare the verification result - verification_result = { - "verdict": "Insufficient Information", # Default verdict - "confidence": "Low", - "evidence": contexts_used, - "reasoning": "Based on available fact checks and web search results", - "missing_info": "Additional verification may be needed" - } - # Create token usage information token_usage = TokenUsage( prompt_tokens=0, @@ -161,10 +161,12 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: results=all_results, verification_result=verification_result, sources=list(set(all_sources_list)), + context_used=contexts_used, token_usage=token_usage, summary={ "total_sources": len(set(all_sources_list)), - "fact_checking_sites_queried": len(all_sources) + "fact_checking_sites_queried": len(all_sources), + "failed_sources": failed_sources } ) diff --git a/app/api/scrap_websites.py b/app/api/scrap_websites.py index 93fb31a..0dd584c 100644 --- a/app/api/scrap_websites.py +++ b/app/api/scrap_websites.py @@ -1,309 +1,342 @@ from fastapi import APIRouter, HTTPException -from pydantic import BaseModel -from typing import List, Dict, Optional -from urllib.parse import urlencode, urlparse -import urllib.parse -import numpy as np -from time import sleep +import httpx import logging -import requests -from bs4 import BeautifulSoup -import re +from urllib.parse import urlparse +import json from app.services.openai_client import OpenAIClient -from app.config import OPENAI_API_KEY +from app.config import OPENAI_API_KEY, GOOGLE_API_KEY, GOOGLE_ENGINE_ID from app.websites.fact_checker_website import SOURCES, get_all_sources from app.api.ai_fact_check import ai_fact_check +from typing import List, Dict, Optional +from pydantic import BaseModel from app.models.fact_check_models import ( - AIFactCheckRequest, - AIFactCheckResponse, - VerificationResult, - TokenUsage + AIFactCheckRequest, + FactCheckSource, + SourceType ) +# Define Pydantic models +class Publisher(BaseModel): + name: str + site: str + +class ClaimReview(BaseModel): + publisher: Publisher + textualRating: str + +class Claim(BaseModel): + claimReview: List[ClaimReview] + claimant: str + text: str + +class Summary(BaseModel): + fact_checking_sites_queried: int + total_sources: int + +class VerificationResult(BaseModel): + verdict: str + confidence: str + evidence: List[str] + reasoning: str + fact_check_type: str + +class SearchRequest(BaseModel): + search_text: str + source_types: List[str] + +class EnhancedFactCheckResponse(BaseModel): + query: str + results: List[Dict] + sources: List + summary: Summary + token_usage: Dict[str, int] + total_claims_found: int + verification_result: VerificationResult + # Configure logging logging.basicConfig( - level=logging.INFO, + level=logging.INFO, # Changed back to INFO from DEBUG format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) scrap_websites_router = APIRouter() -# Configuration for scraping -MAX_RETRIES = 2 -RETRY_DELAY = 2 +# Constants +RESULTS_PER_PAGE = 10 +MAX_PAGES = 5 +MAX_URLS_PER_DOMAIN = 5 +GOOGLE_SEARCH_URL = "https://www.googleapis.com/customsearch/v1" -class SearchRequest(BaseModel): - search_text: str - source_types: List[str] = ["fact_checkers"] - -class UrlSimilarityInfo(BaseModel): - url: str - similarity: float - extracted_text: str - -class SearchResponse(BaseModel): - results: Dict[str, List[str]] - error_messages: Dict[str, str] - ai_fact_check_result: Optional[Dict] = None - -def extract_url_text(url: str) -> str: - """Extract and process meaningful text from URL path with improved cleaning""" - logger.debug(f"Extracting text from URL: {url}") +def get_domain_from_url(url: str) -> str: + """Extract domain from URL with improved handling.""" try: - parsed = urllib.parse.urlparse(url) - path = parsed.path - path = path.replace('.html', '').replace('/index', '').replace('.php', '') - segments = [seg for seg in path.split('/') if seg] - cleaned_segments = [] - for segment in segments: - segment = segment.replace('-', ' ').replace('_', ' ') - if not (segment.replace(' ', '').isdigit() or - all(part.isdigit() for part in segment.split() if part)): - cleaned_segments.append(segment) - - common_words = { - 'www', 'live', 'news', 'intl', 'index', 'world', 'us', 'uk', - 'updates', 'update', 'latest', 'breaking', 'new', 'article' - } - - text = ' '.join(cleaned_segments) - words = [word.lower() for word in text.split() - if word.lower() not in common_words and len(word) > 1] - - result = ' '.join(words) - logger.debug(f"Extracted text: {result}") - return result + parsed = urlparse(url) + domain = parsed.netloc.lower() + # Remove 'www.' if present + if domain.startswith('www.'): + domain = domain[4:] + return domain except Exception as e: - logger.error(f"Error extracting text from URL {url}: {str(e)}") - return '' + logger.error(f"Error extracting domain from URL {url}: {str(e)}") + return "" -def extract_search_results(html_content): - """Extract URLs using multiple selectors and patterns""" - soup = BeautifulSoup(html_content, 'html.parser') - urls = set() # Using set to avoid duplicates +def is_valid_source_domain(domain: str, sources: List[FactCheckSource]) -> bool: + """Check if domain matches any source with improved matching logic.""" + if not domain: + return False - # Multiple CSS selectors to try - selectors = [ - 'div.g div.yuRUbf > a', # Main result links - 'div.g a.l', # Alternative link format - 'div.rc a', # Another possible format - 'div[class*="g"] > div > div > div > a', # Broader match - 'a[href^="http"]' # Any http link - ] + domain = domain.lower() + if domain.startswith('www.'): + domain = domain[4:] - for selector in selectors: - try: - elements = soup.select(selector) - for element in elements: - url = element.get('href') - if url and url.startswith('http') and not url.startswith('https://www.google.com'): - urls.add(url) - except Exception as e: - logger.debug(f"Error with selector {selector}: {str(e)}") + for source in sources: + source_domain = source.domain.lower() + if source_domain.startswith('www.'): + source_domain = source_domain[4:] + + # Check exact match + if domain == source_domain: + logger.debug(f"Exact domain match found: {domain} = {source_domain}") + return True + + # Check if domain ends with source domain + if domain.endswith('.' + source_domain): + logger.debug(f"Subdomain match found: {domain} ends with {source_domain}") + return True - # Also try finding URLs in the raw HTML using regex - url_pattern = r'href="(https?://[^"]+)"' - raw_urls = re.findall(url_pattern, html_content) - for url in raw_urls: - if not url.startswith('https://www.google.com'): - urls.add(url) - - return list(urls) + logger.debug(f"No match found for domain: {domain}") + return False -def google_search_scraper(search_text: str, site_domain: str, retry_count: int = 0) -> List[str]: - """Scrape Google search results with multiple query formats""" - logger.info(f"Searching for '{search_text}' on domain: {site_domain}") +async def build_enhanced_search_query(query: str, sources: List[FactCheckSource]) -> str: + """Build search query with site restrictions.""" + site_queries = [f"site:{source.domain}" for source in sources] + site_restriction = " OR ".join(site_queries) + enhanced_query = f"({query}) ({site_restriction})" + logger.debug(f"Enhanced search query: {enhanced_query}") + return enhanced_query + +async def google_custom_search(query: str, sources: List[FactCheckSource], page: int = 1) -> Optional[Dict]: + """Perform Google Custom Search with enhanced query.""" + enhanced_query = await build_enhanced_search_query(query, sources) + start_index = ((page - 1) * RESULTS_PER_PAGE) + 1 - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Referer': 'https://www.google.com/', - 'DNT': '1' + params = { + "key": GOOGLE_API_KEY, + "cx": GOOGLE_ENGINE_ID, + "q": enhanced_query, + "num": RESULTS_PER_PAGE, + "start": start_index } - # Try different query formats - query_formats = [ - f"{search_text} site:{site_domain}", - f"site:{site_domain} {search_text}", - f"\"{search_text}\" site:{site_domain}" - ] - - all_urls = set() - - for query in query_formats: + async with httpx.AsyncClient(timeout=30.0) as client: try: - google_url = f"https://www.google.com/search?q={urlencode({'q': query})}" - logger.debug(f"Trying query format: {query}") + logger.info(f"Making API request to Google Custom Search with params: {params}") + response = await client.get(GOOGLE_SEARCH_URL, params=params) + response.raise_for_status() - response = requests.get(google_url, headers=headers) + data = response.json() - if response.status_code == 200: - urls = extract_search_results(response.text) - domain_urls = [url for url in urls if site_domain in urlparse(url).netloc] - all_urls.update(domain_urls) - else: - logger.warning(f"Received status code {response.status_code} for query format: {query}") + search_info = data.get('searchInformation', {}) + logger.info(f"Search info: Total results: {search_info.get('totalResults', 0)}, " + f"Time taken: {search_info.get('searchTime', 0)}s") - sleep(2) # Delay between requests + if 'error' in data: + error_details = data['error'] + logger.error(f"API Error: {error_details}") + raise HTTPException( + status_code=response.status_code, + detail=f"Google API Error: {error_details.get('message')}" + ) + + return data except Exception as e: - logger.error(f"Error processing query format '{query}': {str(e)}") - if retry_count < MAX_RETRIES: - sleep(RETRY_DELAY) - return google_search_scraper(search_text, site_domain, retry_count + 1) + logger.error(f"Search error: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Search error: {str(e)}") + +async def analyze_fact_check_results(openai_client: OpenAIClient, original_response: Dict) -> Dict: + """Analyze fact check results using OpenAI to generate a consolidated verdict.""" - valid_urls = list(all_urls) - logger.info(f"Found {len(valid_urls)} unique URLs for domain: {site_domain}") - return valid_urls[:5] # Return up to 5 URLs - -def calculate_similarity(query_embedding: List[float], url_embedding: List[float]) -> float: - """Calculate cosine similarity between two embeddings""" - query_array = np.array(query_embedding) - url_array = np.array(url_embedding) + # Extract verification results from sources + verification_results = [] + for url, result in original_response.get('verification_result', {}).items(): + verification_results.append(f""" + Source: {url} + Verdict: {result.get('verdict')} + Confidence: {result.get('confidence')} + Evidence: {result.get('evidence')} + Reasoning: {result.get('reasoning')} + """) - similarity = np.dot(url_array, query_array) / ( - np.linalg.norm(url_array) * np.linalg.norm(query_array) - ) - return float(similarity) + system_prompt = """You are a professional fact-checking analyzer. Your task is to analyze multiple fact-checking results + and provide a consolidated verdict. Respond with a valid JSON object containing your analysis.""" + + user_prompt = f""" + Analyze these fact-checking results and provide a final verdict. + + Query: {original_response.get('query', '')} + + Fact Check Results: + {'\n'.join(verification_results)}""" + try: + logger.info("Generating AI analysis of fact check results") + response = await openai_client.generate_text_response( + system_prompt=system_prompt, + user_prompt=user_prompt, + max_tokens=2000 + ) + + # Create the enhanced result structure + enhanced_result = { + "query": original_response.get('query', ''), + "results": [ + { + "claimReview": [ + { + "publisher": { + "name": source, + "site": source + }, + "textualRating": result.get('verdict', '') + } for source in original_response.get('sources', []) + ], + "claimant": "source", + "text": original_response.get('query', '') + } + ], + "sources": original_response.get('sources', []), + "summary": { + "fact_checking_sites_queried": len(original_response.get('sources', [])), + "total_sources": len(original_response.get('verification_result', {})) + }, + "verification_result": { + "verdict": next(iter(original_response.get('verification_result', {}).values()), {}).get('verdict', ''), + "confidence": next(iter(original_response.get('verification_result', {}).values()), {}).get('confidence', ''), + "evidence": [next(iter(original_response.get('verification_result', {}).values()), {}).get('evidence', '')], + "reasoning": next(iter(original_response.get('verification_result', {}).values()), {}).get('reasoning', ''), + "fact_check_type": "ai fact checker" + }, + "token_usage": original_response.get('token_usage', { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0 + }) + } + + enhanced_result["total_claims_found"] = len(enhanced_result.get("results", [])) + + logger.info("Successfully generated AI analysis") + return enhanced_result -@scrap_websites_router.post("/search", response_model=SearchResponse) + except Exception as e: + logger.error(f"Error in OpenAI analysis: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error in fact check analysis: {str(e)}") +@scrap_websites_router.post("/search", response_model=EnhancedFactCheckResponse) async def search_websites(request: SearchRequest): logger.info(f"Starting search with query: {request.search_text}") logger.info(f"Source types requested: {request.source_types}") - results = {} - error_messages = {} - - # Initialize OpenAI client - logger.debug("Initializing OpenAI client") - openai_client = OpenAIClient(OPENAI_API_KEY) - - # Get domains based on requested source types - domains = [] + # Get sources for requested types + selected_sources = [] for source_type in request.source_types: if source_type in SOURCES: - domains.extend([source.domain for source in SOURCES[source_type]]) + selected_sources.extend(SOURCES[source_type]) - if not domains: - logger.warning("No valid source types provided. Using all available domains.") - domains = [source.domain for source in get_all_sources()] + if not selected_sources: + logger.warning("No valid source types provided. Using all available sources.") + selected_sources = get_all_sources() - logger.info(f"Processing {len(domains)} domains") + logger.info(f"Selected sources: {[source.domain for source in selected_sources]}") - # Enhance search text with key terms - search_context = request.search_text - logger.debug("Getting query embedding from OpenAI") - query_embedding = openai_client.get_embeddings([search_context])[0] + # Initialize collections for URLs + all_urls = [] + domain_results = {} - # Higher similarity threshold for better filtering - SIMILARITY_THRESHOLD = 0.75 - MAX_URLS_PER_DOMAIN = 2 # Adjusted to ensure total stays under 5 - TOTAL_MAX_URLS = 5 # Maximum URLs allowed for AIFactCheckRequest - - total_urls_collected = 0 - for domain in domains[:3]: # Limit to 3 domains for testing - if total_urls_collected >= TOTAL_MAX_URLS: - break + try: + # Search and collect URLs + for page in range(1, MAX_PAGES + 1): + if len(all_urls) >= 50: + logger.info("Reached maximum URL limit of 50") + break - logger.info(f"Processing domain: {domain}") - try: - urls = google_search_scraper(request.search_text, domain) - valid_urls = [] + logger.info(f"Fetching page {page} of search results") + search_response = await google_custom_search(request.search_text, selected_sources, page) - logger.debug(f"Found {len(urls)} URLs for domain {domain}") + if not search_response or not search_response.get("items"): + logger.warning(f"No results found on page {page}") + break - for url in urls: - if len(valid_urls) >= MAX_URLS_PER_DOMAIN or total_urls_collected >= TOTAL_MAX_URLS: - break - - url_text = extract_url_text(url) - - if not url_text: - logger.debug(f"No meaningful text extracted from URL: {url}") + for item in search_response.get("items", []): + url = item.get("link") + if not url: continue - logger.debug("Getting URL embedding from OpenAI") - url_embedding = openai_client.get_embeddings([url_text])[0] - similarity = calculate_similarity(query_embedding, url_embedding) + domain = get_domain_from_url(url) + logger.debug(f"Processing URL: {url} with domain: {domain}") - logger.debug(f"Similarity score for {url}: {similarity}") + if is_valid_source_domain(domain, selected_sources): + if domain not in domain_results: + domain_results[domain] = [] + + if len(domain_results[domain]) < MAX_URLS_PER_DOMAIN: + domain_results[domain].append({ + "url": url, + "title": item.get("title", ""), + "snippet": item.get("snippet", "") + }) + all_urls.append(url) + else: + logger.debug(f"Skipping URL {url} - domain not in allowed list") - if similarity >= SIMILARITY_THRESHOLD: - valid_urls.append(url) - total_urls_collected += 1 - - results[domain] = valid_urls - logger.info(f"Successfully processed domain {domain}. Found {len(valid_urls)} valid URLs") - - except HTTPException as e: - logger.error(f"HTTP Exception for domain {domain}: {str(e.detail)}") - error_messages[domain] = str(e.detail) - except Exception as e: - logger.error(f"Unexpected error for domain {domain}: {str(e)}") - error_messages[domain] = f"Unexpected error for {domain}: {str(e)}" + if len(all_urls) >= 50: + break - sleep(1) # Add delay between processing different domains - - logger.info("Search completed") - logger.debug(f"Results found for {len(results)} domains") - logger.debug(f"Errors encountered for {len(error_messages)} domains") - - # Collect all valid URLs from results - all_valid_urls = [] - for domain_urls in results.values(): - all_valid_urls.extend(domain_urls) - - logger.info(f"Total valid URLs collected: {len(all_valid_urls)}") - - # Create request body for AI fact check - if all_valid_urls: + logger.info(f"Total URLs collected: {len(all_urls)}") + + if not all_urls: + return EnhancedFactCheckResponse( + query=request.search_text, + results=[], + sources=[], + summary=Summary( + fact_checking_sites_queried=len(selected_sources), + total_sources=0 + ), + token_usage={ + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0 + }, + total_claims_found=0, + verification_result=VerificationResult( + verdict="Insufficient Evidence", + confidence="Low", + evidence=["No relevant sources found"], + reasoning="No fact-checking sources were found for this claim", + fact_check_type="ai fact checker" + ) + ) + + # Perform fact check with collected URLs fact_check_request = AIFactCheckRequest( content=request.search_text, - urls=all_valid_urls[:TOTAL_MAX_URLS] # Ensure we don't exceed the limit + urls=all_urls[:5] # Limit to 5 URLs ) - logger.info("Calling AI fact check service") - try: - ai_response = await ai_fact_check(fact_check_request) - logger.info("AI fact check completed successfully") - - # Format AI fact check response - formatted_response = { - "query": ai_response.query, - "token_usage": { - "prompt_tokens": ai_response.token_usage.prompt_tokens, - "completion_tokens": ai_response.token_usage.completion_tokens, - "total_tokens": ai_response.token_usage.total_tokens - }, - "sources": ai_response.sources, - "verification_result": { - url: { - "verdict": result.verdict, - "confidence": result.confidence, - "evidence": result.evidence, - "reasoning": result.reasoning, - "missing_info": result.missing_info - } for url, result in ai_response.verification_result.items() - } - } - - # Return response with AI fact check results - return SearchResponse( - results=results, - error_messages=error_messages, - ai_fact_check_result=formatted_response - ) - - except Exception as e: - logger.error(f"Error during AI fact check: {str(e)}") - error_messages["ai_fact_check"] = f"Error during fact checking: {str(e)}" - - # Return response without AI fact check if no valid URLs or error occurred - return SearchResponse( - results=results, - error_messages=error_messages, - ai_fact_check_result=None - ) \ No newline at end of file + logger.info(f"Performing fact check with {len(fact_check_request.urls)} URLs") + fact_check_response = await ai_fact_check(fact_check_request) + + # Get enhanced analysis + openai_client = OpenAIClient(OPENAI_API_KEY) + enhanced_response = await analyze_fact_check_results( + openai_client, + fact_check_response.dict() + ) + + return EnhancedFactCheckResponse(**enhanced_response) + + except Exception as e: + logger.error(f"Error during search/fact-check process: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/app/config.py b/app/config.py index a13fd4d..8b60dd0 100644 --- a/app/config.py +++ b/app/config.py @@ -5,6 +5,7 @@ load_dotenv() GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] +GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] FRONTEND_URL = os.environ["FRONTEND_URL"] \ No newline at end of file diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc index 6df8e42d278ee3c78aa5452c872101d3e81faf2c..694cd33e5de5f8ed1537243806a1ba949ad2ec85 100644 GIT binary patch delta 20 acmX@-dCrsjG%qg~0}z;QkK4$7SOow>dIlB% delta 20 acmX@-dCrsjG%qg~0}vEv#BSt1tO5W-O$G!2 diff --git a/app/models/scrap_websites_models.py b/app/models/scrap_websites_models.py new file mode 100644 index 0000000..1c629c5 --- /dev/null +++ b/app/models/scrap_websites_models.py @@ -0,0 +1,43 @@ +from pydantic import BaseModel +from typing import List, Dict + +class SearchRequest(BaseModel): + search_text: str + source_types: List[str] = ["fact_checkers"] + +class Publisher(BaseModel): + name: str + site: str + +class ClaimReview(BaseModel): + publisher: Publisher + textualRating: str + +class Claim(BaseModel): + claimReview: List[ClaimReview] + claimant: str + text: str + +class Summary(BaseModel): + fact_checking_sites_queried: int + total_sources: int + +class TokenUsage(BaseModel): + prompt_tokens: int + completion_tokens: int + total_tokens: int + +class VerificationResult(BaseModel): + verdict: str + confidence: str + evidence: List[str] + reasoning: str + +class EnhancedFactCheckResponse(BaseModel): + query: str + results: List[Claim] + sources: List[str] + summary: Summary + token_usage: Dict[str, int] + total_claims_found: int + verification_result: VerificationResult \ No newline at end of file From e56163a8c3843dff51304e8705b32125fc73a3bf Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Tue, 17 Dec 2024 17:31:13 +0600 Subject: [PATCH 5/5] content fact checked is functional --- app/__pycache__/config.cpython-312.pyc | Bin 576 -> 646 bytes .../__pycache__/fact_check.cpython-312.pyc | Bin 6289 -> 8116 bytes app/api/ai_fact_check.py | 2 +- app/api/fact_check.py | 305 ++++----- app/api/scrap_websites.py | 232 +------ app/api/scrap_websites2.py | 261 -------- app/config.py | 1 + .../fact_check_models.cpython-312.pyc | Bin 9422 -> 4380 bytes app/models/ai_fact_check_models.py | 229 +++++++ app/models/fact_check_models.py | 292 +++------ .../fact_checker_website.cpython-312.pyc | Bin 3152 -> 4885 bytes app/websites/fact_checker_website.py | 116 +++- output.json | 595 ------------------ 13 files changed, 610 insertions(+), 1423 deletions(-) delete mode 100644 app/api/scrap_websites2.py create mode 100644 app/models/ai_fact_check_models.py delete mode 100644 output.json diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index 22a68c5e11d594851135abcde42a1e5f1ce57cb2..b086fe1a5fd129666635b64227baa2bfcfa3fb8f 100644 GIT binary patch delta 121 zcmX@W(#FbrnwOW00SIR<%o;+)BM7@Y)eh=@)1n&@>w U-sqx;@dXCK$#P6`EPOzD0BejMQUCw| delta 99 zcmZo;J;1_ynwOW00SJ0_#HE`}a*Ujldl-!;A7OOi z;s+{Z1mfbD$wEv{N)H5ur%O$gx*(%-QBb#m?*_kMe^qDI484o|nvFa~yg-Ej_HG&x diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index 98c2526f8272844d81c35b9bb2a51494f76a6e41..b5709d28204fc20b85cd04dc7acb2b256391067d 100644 GIT binary patch literal 8116 zcmb_BYit|mk>B#o6<-o*J*d~}VaakN(vssx9NP&`)+@H9M6%OZMM|u=OKBCpZg(kJ zD&-bQP&hbW;i8ADgEWYPd&Gfts0tKF4mjZQuwxg!9~7iO?4FAFaQV^vV{8S^qd#tD zmrGKLlk|!%!QF4aZ@!uNX6Bo3X7+bZCr#k`*$>$1$|geo7hm+tQfS;$pz#aB5sr$H zI5kPd4U+~6-wiS2q!FIRm?>_aG{-HImbi7&s*agrwzz%L9;YX1_1zq+h&v`7YTFWX z#$A)HxO>ta_e^@?m6Mffzcp4B_fC2#Vjw(qy+D4_2X7AEdY#fbCUm zqZY2>Mbl(0@0;CUm{9iM9K3tkdEKCOaW38!HE`}1O-0=P1w)tT6CCa`DE82kBjd@m z%!`V1_{^Cn2hKc}neaS6B?&SwDb5MCW1RonG%v|rl;Rjm@rlq#e@x&L za+gW*46+fqe}<1dSAbC}%X*|#G9mGbb3hc6;y7P;wM-<_VuS~5?rB~Qv$0rMQhOxD z40OZ=#T^B#;RvD)bF9oN7Lk|JVj@xykt!%Oi5K91?~;*_lY|20+d=tD2F|c#wjM{KiR*B$k7Vz)9GNz1~voF)v3u7-3 zuHuK#{zv04j6X4($&$rRG8WEJ2NRd9vRR*>vCdlccInAEb+eIedarH{;;YkS$(FHl zPQ-PBJo{6?XfMNPm(|UI^NMqNGWJ=gj;ZwITsj@mkg+FBoI7KGg}iE5qBFKxcL`2! zW$28Z^Ze4F&!Whs$G1`T=#V;PiMsR@(0dU55U49wi&_M zLT_grI)3CuZ&O4@g{T+jlk1D^UEeg5tLAE!Yjum6t<-0fp8AGguG1y;JjGQkIW9Qn zsd;kVc#h0dJGky&a+Y!JFQZ+wsc+=EzKrVzuJJGA&UwLE;_ly!^Hf*!;uAxBf)`nt zXIKXJGNbNgM&wgTQD)MTkeFsPPr^*G63;Qo1S8Mz+hL`2OqK#{8a(Q`WSU_`#Fi41 zQC^b3-LWwhyhlZi2sP}y7+?TgWMo!)ju9kAPBIaZ*LdN0nH6F{@9Ru$;{&M72^_$o z_X#kA$w*p49@$7FEwYh?ZYC0AML~uJo8XYC6fZInL1xhrGs!4Z@Mr;=9#6-3=?L8$ zU`7&=Sek=X#A1bU2K=z3qDZiDUSb4I428)|Ops){FTkAS`IIUj{(LINCNv^ZWJ(j= z94`t{ft`wJ5|rruC1S7wKs-Og1NU>Vc!Z)K4lt)#ApybwW~XNPh%C&(YIORVB-tb2 z(8?4UNybwm4~QgTZilK=wW`d6!1?p2@h2w6Mww_*jI;6)`iC?FS1Sljh_@bLTE7Rf z;|nWN%>xIRDIV3REr#b>yR}L1&Iu72r;R0eknIuXjF{$wgKSLVgHN)eEPy~4w9d1M zozo-9L=;q+i16hYhK1>w;Ax%{((&M_KZLqn_0l4fdVjU6`@DrxgC;8iqmXDSX6b{S|qllww^64okt@qa2{e{B@saaQt5b{ z6&G}T&+!Y44#@23?dfAAIGb^r)TOVh%x;jK5L4U;Wh0<{v`^H*{7kZ_2S$*)HTw}{ zW}cOb1DfsQ;*o`@kie-Dv!}0T{{vWHIZnXM!N$U{5Sc%(YIG!yro$${Luj20I|Z?g zDpF085zX3Ri@}1VZrBs?!Zw4Kq{vK(NtT1nC&W`RK?4RJc8GyqqUnKzltve#=}p~h z`aVO~g4P}5!7BHZ#4sbvDd|Ws$Ous;E`ZlbOn0ffm1iR}`WBu6kxV7!8E{T$59kk2 z?E%H2n!B`UeF}q`Bg|q;QO#%|7$-4m=)^#1guV_y{M$gby|5N=g!HzqGa&ndPnBkp z=@@rp+i;o737tBn^ILQ|hp2k{^)VE6x(MS%*Fn5dWpCxogjCmI%2?Wl^?4JIn^ zG45;PIt}{{-9MTb9x(r}b%W5pbKN@yHJZc|HnD)4CkoatqZX}@&f%$3#^rM1NJWq#C zjhqOb85tW54~&nGjeqF~s%Lx;%2ayOs8}@brO+x$a3>tEm{P1f^QJ+mkl+kXOKPXs z0_-d{so4iJtwkg#A@%Ur_RSIIlP%<@;oE>9%PFN#DobyYE|=njLqEX^;fR_HDb)oE zd>EY@T8$<-S^&QRT!BMYjF9jsCOp;^qa=$84gI1l%?t93VqsH&k5f!|+KCR7q`jEi zDdv<2=e1~pZV@Be*S6CFAoQ2l5swmmcdDJ{6diBq?8bY z04j)BRTmh9#lv4vdo((M)PmwG%2cJV=}bHZByio*Fyw=OB%lA7?o^hI0V(PnquX=uMXx!SPr;^{35@%XRwZ1^8q^*?lNWX*r@KTPZHW4YG$%kEqg z^sF`=yi1&whb|9p*0fyhSZ-Ud3EbHe*x2*P>Yhj5?pWUwdO3Q#Haq@I_NjB(p~)-8 zTy5t@ZD6%FkZbG6weS7ZY~1Jnj2P?cw`|1Uesys9#D{ggcbeK?n|*oq+O8GDTGRe3 z#^2XBU6tOr@cM<7=z8a|T;Sk}w8Ce*k6szPdf?^LMU>a(KCC^MuLpMW4W#?g%g!sp zs%Ou~t@~G7Pprh@mUT4eyd7CbNB$VB_KU5@DdMTSOYC;nrlT_J3;xEKefY%MzJY9T zFk3aW?ik*zYR)w@Z`QQu8V~+P%07B>?a-<0!PD9L(Oh$HuC8;-YO1ZeZzJBCpSAzI zW6Npvy7I*8bbXaK1JbWRpHY+xrstcTqs+rU9LuMs#NSPtp`v)brGkiFz?a1`d#SP1K26)4OI9 zw&~ELC;a4nOQ;eaH);^-MlA&+H~jX2KI4tH5IxXiyxC)hmz#Y?7`S=BK13OB9zQ;K z)cCDdaDOORdu|&Wd6)#LdUx5F;W!U#xiKA4O5G``C z93pj=(sGWHi|IFK;EbGU*<4II?M2+hMltEMlqH>3&L&VxR4JXDMTfNG(q}$QUZ6O; zTK+H;${z-~SWeKPmW&xAlrR1T$`_{6Dl%lYSVSnH=je=SwyN~5x3~(<@k{fP8Hy{0 zS@dT+u98GDQ0{@68k~1{L3u{w&!m@UGM3q5vaiA5UFVg8;>~O^z5n|7k`)R$51?OG zVCgL-*AHG}#;VJECwCF3TSg3=9e+g_crTYQMJC9lH%w6Sl6UL;J(vzxHBBvhbv#fy z>MX(+3nx1{Sfam^ds@f1bMBwR`5|(dy5xf*%m?safWH7`jba&Vwn68x^n?OUaLFd) z?olr&4L#68pt$Fe@!VIhjO}`1KS2@1q_-(@4(wsccEJW*jA*TSN^aF>eT`P-Uy0U_ zbXs~b2Uh+T{9l3p--86oIq2WETHiOUsk|IjEHZMt$~*uRo&F*ZpvtFI*l8oZTeODv4NmTx7JUGm1jOz*H9w1n>+N;F(1O=7eVxkflS728umun1!*jHm*2JaT#C* zLKTG)KE|MmLkr=;GAvPAtktMNnI<$+rVrm z*cB6sU5)J&N8w|F%CLxEOvC`*`tXKFn22Tat_txnY!AZ?lWqJYq4>%LqQ|o0VT2pP zn-=;asbYjKK8grj1;G{0VWk2hd;N@-DgdG^s1>oetW>BL9Y%tZ_zd)1fWLGU4rMha z{fs6K|A#G)mPH~1x%z$I5{cxih^Hyr+?#dv{dj28?b~oOt8Qk^-Flz&nIZOp+tujD z&4IP%eOX(>o$9@JiQis#aq#1s-MKpEwZ@klb4+KhXaBFBee>B|L(``fmTK3hN{c9JWVR=HauOcp00JzLpf&88_w6A z|Kz^xyyImyyzQ&r_N&jVdjp#-2Ue!vivK$P&a-PR6C3pttGIw2<9pWnfuQNV zR%)Qj_Fh}a1uyUSQUed!-iM$Q+6N2(^!_0SOt@jB26`+vOlEA`vHymnV`!h@Mjth_ z*K%WjBeZXNsG)Al%}OJ-eK_kTLk$HiH(M;&?xFza&4<)`4~0EJ?@*`p14H{@xAlV# z3*h{q+kriY+lN}Mw;HhLR;w9$Zgm>5y_W*ax4PAP05RO!XCFRlzV*oQ;X~%zhs@Bv zeH0;YKV})JwEosijo2K&wOFz3v;urry<&~B;2~0ih_RgJaZcm*qrqhdu<#*Fmw6-% zeU)k)f++#0BeALC=SqCO(s|4kepk*8M4Qy-DgAISdu1~X;2N8t9E zgHXr+$JhAtlUY*vdr!laAFO%0f9%Ygo}(!GY9vqa`SuCznH_lgp4yS0G!lb%gQ{7j mYOc7lhlaCM%^Ee5H-sq4m2D2*CD^zJKu=Jgs+}SW-25M4>}7=j literal 6289 zcmbVQU2qdew(c3tjQ%x}C0Vv5+cGvb#(xCNPaq)}+t>zTCpJ)vvbj@2Gqz>)_}NEq4q*u?hO>X3#pI-s`h~_Y?W`_`@mz!vK3PI#lqS3?5uAkTbqYX)qN1)6olP} zdwQgiO_uCKTBV-Tf2U7(pFZb1{Xb5p4MBSA zjJT3~#GUj+JjsekMbaDb>TTwPFX@l?la-N53eiZSZk9xe1ZXsXM9XO;S|!ueCPUQK z?@d#Yj#P`b(i2TP}l-$V)EKExE@T(Q}$9%MqHT zTPwaM>{@Bf+Vk4bXgZ@vSaS{@I`mrq$(S^)#M7zaR!XyoQAJYXNeMbKeesy0F$d$a z0wveL$jHFKegQ@c2l|g{HKp>tp58-3?_huL0by^?Sif+1^k6HiRSu-n6A5WwG^X@U zO0nak(oZsytZ0FMsF$bHDOu9k-b6H>)SUeor}3y%sx<>&+v$TKb}u}NVZ zmmNhqx*jgh6qxMoTpo6@X;5KBa5SrB9*bU1wK>5i_+bW-ZdDDosMZni5b z%G2>gB8t1nhU%x(6`h`je_XFAZgJ=IEQYIw=Boc;dpb)qd!bq00NImMXxZ*RAG@;S zb9X*x6swmrOrnS(HOoq!1O+(+6N| zKBRNh>Jf`3kr7#uyTat)u%`@TWek9Cmdet%%!c~B3+7Ab&y@I^vZg5_?{|_R5e4&$ zAd^UyInd8aNTFHFc69Ct{5Qbg+K7}guPH~FEE7!#pUg2?X3ABrDic%gGFc`>>v7Nl z+@cV5(35CW{AJoMIz;EHp_(E(^_^s_TV{n!+F3kt%Lt6CwkXb}3 z$GpwVP&4SH=_s0^8qo{rU9x|0qQ5ke6WESeP`DXzBu7C{U@xg+&)=A4sMdOq8%KbbHeqEC*_^86veSgjUlabtd#0F zJoYtF>{doI!C;LAyNPEtM>gdu7rKH?|znR*TljSLI@qoX6E ztxk=dRFvtHn!QJ!O~rZ>aVe!>avQJ(GR*|$Tyx`ST$Thdb3$=GjVFF;OrKl#51Iv+ zJOqA$j3pNjlUuJbBJo)!S;1gmxR}mgvyjadGB|;WmBKqnM@o{3K@F3n+YrrNT81Qm z1r%WK@owl)1%J67cJ;5synV&{miTa<4=?iT9-~^;RUp~5@d6#Px|VCY7pk|;zqt_T zJu~=KV7*#huZCOHQ0)UNTj@Hpui!v+O;;oDM{ZbdS6+@R*6vvf>^U>2I($ox^?Aqo z&mAqQH}E(B9Ch9g2=6C_3RKlYJt=sh@a;+!a{HI~raa&D5P7UFm3O~4cXn=xU!UjK z-|*h&+m|aF)o|l-aD!UEOKt2{Yui@LOsMj)1qFf^#EauAPII85fXtqX6*uyR)lmI% zph>Om{_nBdp8q;BKe!NlS*_oyR=2>gstOq94P0ot*bKwGuvoLx^|yi_CVu+3`WZ3* zE)#3=PIX_9`he=$%v~`7b(QMlOxHK|?4a-1sh)1`j>AXj20OIfX{UO&F?TvRLU&W7 zej7#T9o{|@`-#10OYd&>x4T&=+%=hi&s~n|t1{npP<_1PuG36tpBV<=Hkepr!F@=H zRwjO)+=8EzYy~ph3XKH382UoojFix;!i2nl>D|(0t zl%wEJuUb#ct-61?MJYRdky6YCCy^!|;1+8DwdZ+&vi;-M*gw0$!Dt1~U|#EaD5Mb0 z*OSpFX95Ws__OfmvZg-tCN$;D;G@Wj^t3Afs`TKQo@3dw=9?wYR-Rk8iQx4tq9toN zfp9hp6fwy#6EMzM*VyfF9A&M)qW**WXKT*c04|liMM*^XWNn6Z2dHh$+D?EKKDRk* z6Rl)6(MIx|U9@NIAVr<%$O293pcDY#^*oNH9-@QvTf^N+cmVfkmi?Y_&SyuQJHeoH zzhS_04wZ%HtSyH@Xva# z_R5;dqdBqeis@GUP2yxA7}}1`Q|GSh`8c4Vl8z$;$mgY;Bg;Kk3(QlMMILMO zHE6^!MX>32k|Brax5YRZk$r=A82fO5rqSdCrW^x1Hzd zb5@uUs8!4p18hUIWCrP&reSWUjrc}AE&#U<;2@lVU}PekgkU2+osjg(4jnv@>QVrv z55hgLgt-vG2C;N%JU)@ZMR*XG!^yZz*l6C~k>Pzq1BXY~-~-*3<8~N~Nw7RjJUBr~ znqB{nN+;sd3?{%_vrcD@CF1fVL_mxjS0v4;NGBDD8%ANK)CAakt|*bl>Of|$`n4!l z;?YDHf=gI|9PT**SS5NaAa)j+!>yu+r#)(GLGIE#=~TD+`}WnX#d#ZgNMe1eItj5 z`ye=UPGeZ5A}A!HmNieTxMl!tMwLTJE73%$L`D?LrGwRit8q8TEs#x@AySiYToN@` z5%?Klc&h~ybD&ieSrktZ5FIRlI9z=_niuvcJ|2(hsv#6>HM23D12&EvsgQxa*X+93 zKm+iFKAB`Pif2DUt$rOh77=6_1>zTBZXLa0;?6Mfd74@02WXSTIvNL&t^~qxVmcX( zr|?eFk04sjT7D1FObIEaF;i)1Cqa$@Vv(~Y=!EQ=#_D^i@dj8GNt_Q4C(uqVhh`!8 zS^$;DFO#tX8B2}hW28WUuS_Cja}HNS7zwd@M2Rl#L|l7X#iaFgn91mx&*0dnNMXT^UbfRncYaCh^k%4U3Ga|o^SZFw(;6vzP95bvU&DX=lhqN+pec>NY_(K%`fJg zUtDb7b$&$k)i3#)^S5P4vts@|Z!40ezcv9fR?HseHi3po)BoJb|| z)~b~OwI-$fg8Y+J0|zTDP%Dxpw@)1cD&!QTphanqpQOE!eVvD1J=~&T0tf! z|BVGX{eN5LtG;*4>mO8pw|l`3{Qv%NBU%@szLg2^d8daR>Zb0Pseuj59ZNN#*EJ4w z(x0%@KpXc7w;Sl+vYjOFu7v!qtp`D;-`RXaUB2Jd?X^SU(>4lPKJBy)wX>gYe_^nb zy|;mb`g@%YsJXYZWvGSytbx>g*1|%~XYCfCTWdA*coZU92*}~(s2H_^c>zCAeB0xc z*=ew>n7j*O5;@>4kZCqBRh_yQnOHS;T8Px@!HnFwYM6`Rjf}}Nx#nJDuJqSeOo9n~ zoMglZ>Jf9%vNaHHGtx2LsITECVWCv38&Tby!ZR$G!Q$(nfQt{w_BF4j#m6`jJHxv{ z8X|FGNU%sU{TYrt*gdtpRDAE-jZZ_7gk18kAuE_DiuwYz{|VXu7u9}&YX5{BU!b1H zG)K|jAjlrO5cN9sXJ75b0}IHr!Z1|RWoD^rL%wRm3WCBzM?K{#?1guD?-CWrQvsE! zP`yoGy8P#J3(obR*3bPygwAsXGoq^=AuCN6d{Y$VTd40UAf4#$f2b3W{LuJ7Z`9ZF E|J2kYFaQ7m diff --git a/app/api/ai_fact_check.py b/app/api/ai_fact_check.py index ce5b0ee..6d1f2d7 100644 --- a/app/api/ai_fact_check.py +++ b/app/api/ai_fact_check.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, HTTPException from app.services.openai_client import OpenAIClient, AIFactChecker from app.config import OPENAI_API_KEY -from app.models.fact_check_models import ( +from app.models.ai_fact_check_models import ( AIFactCheckRequest, AIFactCheckResponse, VerificationResult, diff --git a/app/api/fact_check.py b/app/api/fact_check.py index 432f0de..b52ef24 100644 --- a/app/api/fact_check.py +++ b/app/api/fact_check.py @@ -1,173 +1,192 @@ from fastapi import APIRouter, HTTPException -import json -from datetime import datetime -from typing import Dict, List import httpx -from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL +from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, OPENAI_API_KEY +from app.api.scrap_websites import search_websites, SearchRequest +from app.services.openai_client import OpenAIClient from app.models.fact_check_models import ( - GoogleFactCheckRequest as FactCheckRequest, - GoogleFactCheckResponse as FactCheckResponse, - Claim, + FactCheckRequest, + FactCheckResponse, ErrorResponse, - TokenUsage + Source ) -from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources +from app.websites.fact_checker_website import get_all_sources fact_check_router = APIRouter() +openai_client = OpenAIClient(OPENAI_API_KEY) -class CustomJSONEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, datetime): - return obj.isoformat() - return super().default(obj) +async def generate_fact_report(query: str, fact_check_data: dict) -> FactCheckResponse: + """Generate a fact check report using OpenAI based on the fact check results.""" + try: + base_system_prompt = """You are a professional fact-checking reporter. Your task is to create a detailed fact check report based on the provided data. Focus on accuracy, clarity, and proper citation of sources. -async def validate_api_key(): - """Validate the Google API key with a test request""" - async with httpx.AsyncClient() as client: +Rules: +1. Include all source URLs and names in the sources list +2. Keep the explanation focused on verifiable facts +3. Include dates when available +4. Maintain objectivity in the report""" + + base_user_prompt = """Generate a comprehensive fact check report in this exact JSON format: +{ + "claim": "Write the exact claim being verified", + "verdict": "One of: True/False/Partially True/Unverified", + "confidence": "One of: High/Medium/Low", + "sources": [ + { + "url": "Full URL of the source", + "name": "Name of the source organization" + } + ], + "evidence": "A concise summary of the key evidence (1-2 sentences)", + "explanation": "A detailed explanation including who verified it, when it was verified, and the key findings (2-3 sentences)", + "additional_context": "Important context about the verification process, limitations, or broader implications (1-2 sentences)" +} + +Ensure all URLs in sources are complete (including https:// if missing) and each source has both a URL and name.""" + + if "claims" in fact_check_data: + system_prompt = base_system_prompt + user_prompt = f"""Query: {query} + Fact Check Results: {fact_check_data} + + {base_user_prompt} + + The report should: + 1. Include ALL source URLs and organization names + 2. Specify verification dates when available + 3. Name the fact-checking organizations involved + 4. Describe the verification process""" + + else: + system_prompt = base_system_prompt + user_prompt = f"""Query: {query} + Fact Check Results: {fact_check_data} + + {base_user_prompt} + + The report should: + 1. Include ALL source URLs and names from both verification_result and sources fields + 2. Mention all fact-checking organizations involved + 3. Describe the verification process + 4. Note any conflicting information between sources""" + + response = await openai_client.generate_text_response( + system_prompt=system_prompt, + user_prompt=user_prompt, + max_tokens=1000 + ) + try: - test_url = f"{GOOGLE_FACT_CHECK_BASE_URL}claims:search" - params = { - "key": GOOGLE_API_KEY, - "query": "test", - "languageCode": "en-US", - "pageSize": 1 - } - response = await client.get(test_url, params=params) - response.raise_for_status() - return True - except httpx.HTTPStatusError as e: - if e.response.status_code == 403: - raise HTTPException( - status_code=503, - detail=ErrorResponse( - detail="Invalid or expired API key", - error_code="INVALID_API_KEY", - path="/check-facts" - ).dict() - ) + # First try to parse the response directly + response_data = response["response"] + + # Clean up sources before validation + if isinstance(response_data.get('sources'), list): + cleaned_sources = [] + for source in response_data['sources']: + if isinstance(source, str): + # Convert string sources to Source objects + url = source if source.startswith('http') else f"https://{source}" + cleaned_sources.append({ + "url": url, + "name": source + }) + elif isinstance(source, dict): + # Ensure URL has proper scheme + url = source.get('url', '') + if url and not url.startswith('http'): + source['url'] = f"https://{url}" + cleaned_sources.append(source) + response_data['sources'] = cleaned_sources + + fact_check_response = FactCheckResponse(**response_data) + return fact_check_response + + except Exception as validation_error: + print(f"Response validation error: {str(validation_error)}") raise HTTPException( - status_code=503, + status_code=422, detail=ErrorResponse( - detail=f"API validation failed: {str(e)}", - error_code="API_VALIDATION_ERROR", + detail=f"Invalid response format: {str(validation_error)}", + error_code="VALIDATION_ERROR", path="/check-facts" ).dict() ) + + except Exception as e: + print(f"Error generating fact report: {str(e)}") + raise HTTPException( + status_code=500, + detail=ErrorResponse( + detail="Error generating fact report", + error_code="FACT_CHECK_ERROR", + path="/check-facts" + ).dict() + ) -@fact_check_router.post( - "/check-facts", - response_model=FactCheckResponse, - responses={ - 400: {"model": ErrorResponse}, - 404: {"model": ErrorResponse}, - 500: {"model": ErrorResponse}, - 503: {"model": ErrorResponse} - } -) -async def check_facts(request: FactCheckRequest) -> FactCheckResponse: +@fact_check_router.post("/check-facts", response_model=FactCheckResponse) +async def check_facts(request: FactCheckRequest): """ - Check facts using multiple fact-checking sources + Fetch fact check results and generate a comprehensive report. """ - all_results = [] - verified_results = [] - - # Validate configuration if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: raise HTTPException( status_code=500, detail=ErrorResponse( - detail="API configuration is missing", + detail="Google API key or base URL is not configured", error_code="CONFIGURATION_ERROR", path="/check-facts" ).dict() ) - - # Validate API key before proceeding - await validate_api_key() - - # Get all sources in priority order - all_sources = get_all_sources() - all_sources_list = [] # To store source URLs - contexts_used = [] # To store context snippets - failed_sources = [] # Track failed sources - - for source in all_sources: + + headers = {"Content-Type": "application/json"} + async with httpx.AsyncClient() as client: + # Get fact checker sources from the centralized configuration + fact_checker_sources = get_all_sources() + + for source in fact_checker_sources: + params = { + "key": GOOGLE_API_KEY, + "query": request.query, + "languageCode": "en-US", + "reviewPublisherSiteFilter": source.domain, + "pageSize": 10 + } + + try: + response = await client.get( + GOOGLE_FACT_CHECK_BASE_URL, + params=params, + headers=headers + ) + response.raise_for_status() + json_response = response.json() + + if json_response.get("claims"): + return await generate_fact_report(request.query, json_response) + + except httpx.RequestError as e: + print(f"Error fetching results for site {source.domain}: {str(e)}") + continue + except Exception as e: + print(f"Unexpected error for site {source.domain}: {str(e)}") + continue + try: - result = await fetch_fact_checks( - GOOGLE_API_KEY, - GOOGLE_FACT_CHECK_BASE_URL, - request.content, - source + search_request = SearchRequest( + search_text=request.query, + source_types=["fact_checkers"] ) - if "claims" in result: - # Validate each claim through Pydantic - for claim in result["claims"]: - validated_claim = Claim(**claim).dict() - all_results.append(validated_claim) - - # Extract source and context information - if "claimReview" in validated_claim: - review = validated_claim["claimReview"][0] - if "publisher" in review and "site" in review["publisher"]: - all_sources_list.append(review["publisher"]["site"]) - if "textualRating" in review: - contexts_used.append(review["textualRating"]) - - except HTTPException as http_err: - failed_sources.append({ - "source": source.domain, - "error": str(http_err.detail) - }) - continue + ai_response = await search_websites(search_request) + return await generate_fact_report(request.query, ai_response) + except Exception as e: - failed_sources.append({ - "source": source.domain, - "error": str(e) - }) - continue - - # Return partial results if some sources failed but we have data - if all_results: - verification_result = { - "verdict": "Partial Results Available" if failed_sources else "Complete Results", - "confidence": "Medium" if failed_sources else "High", - "evidence": contexts_used, - "reasoning": "Based on available fact checks", - "missing_info": f"{len(failed_sources)} sources failed" if failed_sources else None - } - else: - raise HTTPException( - status_code=404, - detail=ErrorResponse( - detail="No fact check results found. Failed sources: " + - ", ".join([f"{f['source']}: {f['error']}" for f in failed_sources]), - error_code="NO_RESULTS_FOUND", - path="/check-facts" - ).dict() - ) - - # Create token usage information - token_usage = TokenUsage( - prompt_tokens=0, - completion_tokens=0, - total_tokens=0 - ) - - # Create the response using Pydantic model with all required fields - response = FactCheckResponse( - query=request.content, - total_claims_found=len(all_results), - results=all_results, - verification_result=verification_result, - sources=list(set(all_sources_list)), - context_used=contexts_used, - token_usage=token_usage, - summary={ - "total_sources": len(set(all_sources_list)), - "fact_checking_sites_queried": len(all_sources), - "failed_sources": failed_sources - } - ) - - return response \ No newline at end of file + print(f"Error in AI fact check: {str(e)}") + raise HTTPException( + status_code=404, + detail=ErrorResponse( + detail="No fact check results found", + error_code="NOT_FOUND", + path="/check-facts" + ).dict() + ) \ No newline at end of file diff --git a/app/api/scrap_websites.py b/app/api/scrap_websites.py index 0dd584c..946ec01 100644 --- a/app/api/scrap_websites.py +++ b/app/api/scrap_websites.py @@ -2,60 +2,25 @@ from fastapi import APIRouter, HTTPException import httpx import logging from urllib.parse import urlparse -import json -from app.services.openai_client import OpenAIClient -from app.config import OPENAI_API_KEY, GOOGLE_API_KEY, GOOGLE_ENGINE_ID -from app.websites.fact_checker_website import SOURCES, get_all_sources -from app.api.ai_fact_check import ai_fact_check from typing import List, Dict, Optional from pydantic import BaseModel -from app.models.fact_check_models import ( +from app.models.ai_fact_check_models import ( AIFactCheckRequest, FactCheckSource, SourceType ) +from app.websites.fact_checker_website import SOURCES, get_all_sources +from app.api.ai_fact_check import ai_fact_check +from app.config import GOOGLE_API_KEY, GOOGLE_ENGINE_ID, GOOGLE_SEARCH_URL -# Define Pydantic models -class Publisher(BaseModel): - name: str - site: str - -class ClaimReview(BaseModel): - publisher: Publisher - textualRating: str - -class Claim(BaseModel): - claimReview: List[ClaimReview] - claimant: str - text: str - -class Summary(BaseModel): - fact_checking_sites_queried: int - total_sources: int - -class VerificationResult(BaseModel): - verdict: str - confidence: str - evidence: List[str] - reasoning: str - fact_check_type: str class SearchRequest(BaseModel): search_text: str - source_types: List[str] - -class EnhancedFactCheckResponse(BaseModel): - query: str - results: List[Dict] - sources: List - summary: Summary - token_usage: Dict[str, int] - total_claims_found: int - verification_result: VerificationResult + source_types: List[str] = ["fact_checkers"] # Configure logging logging.basicConfig( - level=logging.INFO, # Changed back to INFO from DEBUG + level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) @@ -66,14 +31,13 @@ scrap_websites_router = APIRouter() RESULTS_PER_PAGE = 10 MAX_PAGES = 5 MAX_URLS_PER_DOMAIN = 5 -GOOGLE_SEARCH_URL = "https://www.googleapis.com/customsearch/v1" + def get_domain_from_url(url: str) -> str: """Extract domain from URL with improved handling.""" try: parsed = urlparse(url) domain = parsed.netloc.lower() - # Remove 'www.' if present if domain.startswith('www.'): domain = domain[4:] return domain @@ -95,26 +59,16 @@ def is_valid_source_domain(domain: str, sources: List[FactCheckSource]) -> bool: if source_domain.startswith('www.'): source_domain = source_domain[4:] - # Check exact match - if domain == source_domain: - logger.debug(f"Exact domain match found: {domain} = {source_domain}") - return True - - # Check if domain ends with source domain - if domain.endswith('.' + source_domain): - logger.debug(f"Subdomain match found: {domain} ends with {source_domain}") + if domain == source_domain or domain.endswith('.' + source_domain): return True - logger.debug(f"No match found for domain: {domain}") return False async def build_enhanced_search_query(query: str, sources: List[FactCheckSource]) -> str: """Build search query with site restrictions.""" site_queries = [f"site:{source.domain}" for source in sources] site_restriction = " OR ".join(site_queries) - enhanced_query = f"({query}) ({site_restriction})" - logger.debug(f"Enhanced search query: {enhanced_query}") - return enhanced_query + return f"({query}) ({site_restriction})" async def google_custom_search(query: str, sources: List[FactCheckSource], page: int = 1) -> Optional[Dict]: """Perform Google Custom Search with enhanced query.""" @@ -131,141 +85,39 @@ async def google_custom_search(query: str, sources: List[FactCheckSource], page: async with httpx.AsyncClient(timeout=30.0) as client: try: - logger.info(f"Making API request to Google Custom Search with params: {params}") response = await client.get(GOOGLE_SEARCH_URL, params=params) response.raise_for_status() - - data = response.json() - - search_info = data.get('searchInformation', {}) - logger.info(f"Search info: Total results: {search_info.get('totalResults', 0)}, " - f"Time taken: {search_info.get('searchTime', 0)}s") - - if 'error' in data: - error_details = data['error'] - logger.error(f"API Error: {error_details}") - raise HTTPException( - status_code=response.status_code, - detail=f"Google API Error: {error_details.get('message')}" - ) - - return data - + return response.json() except Exception as e: - logger.error(f"Search error: {str(e)}", exc_info=True) + logger.error(f"Search error: {str(e)}") raise HTTPException(status_code=500, detail=f"Search error: {str(e)}") - -async def analyze_fact_check_results(openai_client: OpenAIClient, original_response: Dict) -> Dict: - """Analyze fact check results using OpenAI to generate a consolidated verdict.""" - - # Extract verification results from sources - verification_results = [] - for url, result in original_response.get('verification_result', {}).items(): - verification_results.append(f""" - Source: {url} - Verdict: {result.get('verdict')} - Confidence: {result.get('confidence')} - Evidence: {result.get('evidence')} - Reasoning: {result.get('reasoning')} - """) - - system_prompt = """You are a professional fact-checking analyzer. Your task is to analyze multiple fact-checking results - and provide a consolidated verdict. Respond with a valid JSON object containing your analysis.""" - - user_prompt = f""" - Analyze these fact-checking results and provide a final verdict. - - Query: {original_response.get('query', '')} - - Fact Check Results: - {'\n'.join(verification_results)}""" - try: - logger.info("Generating AI analysis of fact check results") - response = await openai_client.generate_text_response( - system_prompt=system_prompt, - user_prompt=user_prompt, - max_tokens=2000 - ) - - # Create the enhanced result structure - enhanced_result = { - "query": original_response.get('query', ''), - "results": [ - { - "claimReview": [ - { - "publisher": { - "name": source, - "site": source - }, - "textualRating": result.get('verdict', '') - } for source in original_response.get('sources', []) - ], - "claimant": "source", - "text": original_response.get('query', '') - } - ], - "sources": original_response.get('sources', []), - "summary": { - "fact_checking_sites_queried": len(original_response.get('sources', [])), - "total_sources": len(original_response.get('verification_result', {})) - }, - "verification_result": { - "verdict": next(iter(original_response.get('verification_result', {}).values()), {}).get('verdict', ''), - "confidence": next(iter(original_response.get('verification_result', {}).values()), {}).get('confidence', ''), - "evidence": [next(iter(original_response.get('verification_result', {}).values()), {}).get('evidence', '')], - "reasoning": next(iter(original_response.get('verification_result', {}).values()), {}).get('reasoning', ''), - "fact_check_type": "ai fact checker" - }, - "token_usage": original_response.get('token_usage', { - "prompt_tokens": 0, - "completion_tokens": 0, - "total_tokens": 0 - }) - } - - enhanced_result["total_claims_found"] = len(enhanced_result.get("results", [])) - - logger.info("Successfully generated AI analysis") - return enhanced_result - - except Exception as e: - logger.error(f"Error in OpenAI analysis: {str(e)}") - raise HTTPException(status_code=500, detail=f"Error in fact check analysis: {str(e)}") -@scrap_websites_router.post("/search", response_model=EnhancedFactCheckResponse) +@scrap_websites_router.post("/search") async def search_websites(request: SearchRequest): - logger.info(f"Starting search with query: {request.search_text}") - logger.info(f"Source types requested: {request.source_types}") + # Get the source types from the request + source_types = request.source_types if request.source_types else ["fact_checkers"] - # Get sources for requested types + # Get sources based on requested types selected_sources = [] - for source_type in request.source_types: + for source_type in source_types: if source_type in SOURCES: selected_sources.extend(SOURCES[source_type]) + # If no valid sources found, use fact checkers as default if not selected_sources: - logger.warning("No valid source types provided. Using all available sources.") - selected_sources = get_all_sources() + selected_sources = SOURCES["fact_checkers"] - logger.info(f"Selected sources: {[source.domain for source in selected_sources]}") - - # Initialize collections for URLs all_urls = [] domain_results = {} try: - # Search and collect URLs for page in range(1, MAX_PAGES + 1): if len(all_urls) >= 50: - logger.info("Reached maximum URL limit of 50") break - logger.info(f"Fetching page {page} of search results") search_response = await google_custom_search(request.search_text, selected_sources, page) if not search_response or not search_response.get("items"): - logger.warning(f"No results found on page {page}") break for item in search_response.get("items", []): @@ -274,7 +126,6 @@ async def search_websites(request: SearchRequest): continue domain = get_domain_from_url(url) - logger.debug(f"Processing URL: {url} with domain: {domain}") if is_valid_source_domain(domain, selected_sources): if domain not in domain_results: @@ -287,56 +138,23 @@ async def search_websites(request: SearchRequest): "snippet": item.get("snippet", "") }) all_urls.append(url) - else: - logger.debug(f"Skipping URL {url} - domain not in allowed list") if len(all_urls) >= 50: break - logger.info(f"Total URLs collected: {len(all_urls)}") - if not all_urls: - return EnhancedFactCheckResponse( - query=request.search_text, - results=[], - sources=[], - summary=Summary( - fact_checking_sites_queried=len(selected_sources), - total_sources=0 - ), - token_usage={ - "prompt_tokens": 0, - "completion_tokens": 0, - "total_tokens": 0 - }, - total_claims_found=0, - verification_result=VerificationResult( - verdict="Insufficient Evidence", - confidence="Low", - evidence=["No relevant sources found"], - reasoning="No fact-checking sources were found for this claim", - fact_check_type="ai fact checker" - ) - ) + return { + "status": "no_results", + "urls_found": 0 + } - # Perform fact check with collected URLs fact_check_request = AIFactCheckRequest( content=request.search_text, - urls=all_urls[:5] # Limit to 5 URLs + urls=all_urls[:5] ) - logger.info(f"Performing fact check with {len(fact_check_request.urls)} URLs") - fact_check_response = await ai_fact_check(fact_check_request) - - # Get enhanced analysis - openai_client = OpenAIClient(OPENAI_API_KEY) - enhanced_response = await analyze_fact_check_results( - openai_client, - fact_check_response.dict() - ) - - return EnhancedFactCheckResponse(**enhanced_response) + return await ai_fact_check(fact_check_request) except Exception as e: - logger.error(f"Error during search/fact-check process: {str(e)}", exc_info=True) + logger.error(f"Error during search/fact-check process: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) \ No newline at end of file diff --git a/app/api/scrap_websites2.py b/app/api/scrap_websites2.py deleted file mode 100644 index 17542c6..0000000 --- a/app/api/scrap_websites2.py +++ /dev/null @@ -1,261 +0,0 @@ -from fastapi import APIRouter, HTTPException -from pydantic import BaseModel -from typing import List, Dict, Optional -import requests -from bs4 import BeautifulSoup -import urllib.parse -import numpy as np -from time import sleep -import logging -from app.services.openai_client import OpenAIClient -from app.config import OPENAI_API_KEY -from app.websites.fact_checker_website import SOURCES, get_all_sources -from app.api.ai_fact_check import ai_fact_check -from app.models.fact_check_models import AIFactCheckRequest, AIFactCheckResponse - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) - -scrap_websites_router = APIRouter() - -# Configuration for rate limiting -RATE_LIMIT_DELAY = 2 # Delay between requests in seconds -MAX_RETRIES = 1 # Maximum number of retries per domain -RETRY_DELAY = 1 # Delay between retries in seconds - -class SearchRequest(BaseModel): - search_text: str - source_types: List[str] = ["fact_checkers"] - -class UrlSimilarityInfo(BaseModel): - url: str - similarity: float - extracted_text: str - -class SearchResponse(BaseModel): - results: Dict[str, List[str]] - error_messages: Dict[str, str] - ai_fact_check_result: Optional[AIFactCheckResponse] = None - -def extract_url_text(url: str) -> str: - """Extract and process meaningful text from URL path with improved cleaning""" - logger.debug(f"Extracting text from URL: {url}") - try: - parsed = urllib.parse.urlparse(url) - path = parsed.path - path = path.replace('.html', '').replace('/index', '').replace('.php', '') - segments = [seg for seg in path.split('/') if seg] - cleaned_segments = [] - for segment in segments: - segment = segment.replace('-', ' ').replace('_', ' ') - if not (segment.replace(' ', '').isdigit() or - all(part.isdigit() for part in segment.split() if part)): - cleaned_segments.append(segment) - - common_words = { - 'www', 'live', 'news', 'intl', 'index', 'world', 'us', 'uk', - 'updates', 'update', 'latest', 'breaking', 'new', 'article' - } - - text = ' '.join(cleaned_segments) - words = [word.lower() for word in text.split() - if word.lower() not in common_words and len(word) > 1] - - result = ' '.join(words) - logger.debug(f"Extracted text: {result}") - return result - except Exception as e: - logger.error(f"Error extracting text from URL {url}: {str(e)}") - return '' - -def google_search_scraper(search_text: str, site_domain: str, retry_count: int = 0) -> List[str]: - """Scrape Google search results with retry logic and rate limiting""" - logger.info(f"Searching for '{search_text}' on domain: {site_domain} (Attempt {retry_count + 1}/{MAX_RETRIES})") - - if retry_count >= MAX_RETRIES: - logger.error(f"Max retries exceeded for domain: {site_domain}") - raise HTTPException( - status_code=429, - detail=f"Max retries exceeded for {site_domain}" - ) - - query = f"{search_text} \"site:{site_domain}\"" - encoded_query = urllib.parse.quote(query) - base_url = "https://www.google.com/search" - url = f"{base_url}?q={encoded_query}" - - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' - } - - try: - logger.debug(f"Waiting {RATE_LIMIT_DELAY} seconds before request") - sleep(RATE_LIMIT_DELAY) - - logger.debug(f"Making request to Google Search for domain: {site_domain}") - response = requests.get(url, headers=headers) - - if response.status_code == 429 or "sorry/index" in response.url: - logger.warning(f"Rate limit hit for domain {site_domain}. Retrying after delay...") - sleep(RETRY_DELAY) - return google_search_scraper(search_text, site_domain, retry_count + 1) - - response.raise_for_status() - - soup = BeautifulSoup(response.content, 'html.parser') - search_results = soup.find_all('div', class_='g') - - urls = [] - for result in search_results[:3]: - link = result.find('a') - if link and 'href' in link.attrs: - url = link['href'] - if url.startswith('http'): - urls.append(url) - - logger.info(f"Found {len(urls)} results for domain: {site_domain}") - return urls[:5] - - except requests.RequestException as e: - if retry_count < MAX_RETRIES: - logger.warning(f"Request failed for {site_domain}. Retrying... Error: {str(e)}") - sleep(RETRY_DELAY) - return google_search_scraper(search_text, site_domain, retry_count + 1) - logger.error(f"All retries failed for domain {site_domain}. Error: {str(e)}") - raise HTTPException( - status_code=500, - detail=f"Error scraping {site_domain}: {str(e)}" - ) - -def calculate_similarity(query_embedding: List[float], url_embedding: List[float]) -> float: - """Calculate cosine similarity between two embeddings""" - query_array = np.array(query_embedding) - url_array = np.array(url_embedding) - - similarity = np.dot(url_array, query_array) / ( - np.linalg.norm(url_array) * np.linalg.norm(query_array) - ) - return float(similarity) - -@scrap_websites_router.post("/search", response_model=SearchResponse) -async def search_websites(request: SearchRequest): - logger.info(f"Starting search with query: {request.search_text}") - logger.info(f"Source types requested: {request.source_types}") - - results = {} - error_messages = {} - url_similarities = {} - - # Initialize OpenAI client - logger.debug("Initializing OpenAI client") - openai_client = OpenAIClient(OPENAI_API_KEY) - - # Get domains based on requested source types - domains = [] - for source_type in request.source_types: - if source_type in SOURCES: - domains.extend([source.domain for source in SOURCES[source_type]]) - - if not domains: - logger.warning("No valid source types provided. Using all available domains.") - domains = [source.domain for source in get_all_sources()] - - logger.info(f"Processing {len(domains)} domains") - - # Enhance search text with key terms - search_context = request.search_text - logger.debug("Getting query embedding from OpenAI") - query_embedding = openai_client.get_embeddings([search_context])[0] - - # Higher similarity threshold for better filtering - SIMILARITY_THRESHOLD = 0.75 - - for domain in domains: - logger.info(f"Processing domain: {domain}") - try: - urls = google_search_scraper(request.search_text, domain) - url_sims = [] - valid_urls = [] - - logger.debug(f"Found {len(urls)} URLs for domain {domain}") - - for url in urls: - url_text = extract_url_text(url) - - if not url_text: - logger.debug(f"No meaningful text extracted from URL: {url}") - continue - - logger.debug("Getting URL embedding from OpenAI") - url_embedding = openai_client.get_embeddings([url_text])[0] - similarity = calculate_similarity(query_embedding, url_embedding) - - logger.debug(f"Similarity score for {url}: {similarity}") - - url_sims.append(UrlSimilarityInfo( - url=url, - similarity=similarity, - extracted_text=url_text - )) - - if similarity >= SIMILARITY_THRESHOLD: - valid_urls.append(url) - - results[domain] = valid_urls - url_similarities[domain] = sorted(url_sims, - key=lambda x: x.similarity, - reverse=True) - - logger.info(f"Successfully processed domain {domain}. Found {len(valid_urls)} valid URLs") - - except HTTPException as e: - logger.error(f"HTTP Exception for domain {domain}: {str(e.detail)}") - error_messages[domain] = str(e.detail) - except Exception as e: - logger.error(f"Unexpected error for domain {domain}: {str(e)}") - error_messages[domain] = f"Unexpected error for {domain}: {str(e)}" - - logger.info("Search completed") - logger.debug(f"Results found for {len(results)} domains") - logger.debug(f"Errors encountered for {len(error_messages)} domains") - - # Collect all valid URLs from results - all_valid_urls = [] - for domain_urls in results.values(): - all_valid_urls.extend(domain_urls) - - logger.info(f"Total valid URLs collected: {len(all_valid_urls)}") - - # Create request body for AI fact check - if all_valid_urls: - fact_check_request = AIFactCheckRequest( - content=request.search_text, - urls=all_valid_urls - ) - - logger.info("Calling AI fact check service") - try: - ai_response = await ai_fact_check(fact_check_request) - logger.info("AI fact check completed successfully") - - # Return response with AI fact check results - return SearchResponse( - results=results, - error_messages=error_messages, - ai_fact_check_result=ai_response - ) - - except Exception as e: - logger.error(f"Error during AI fact check: {str(e)}") - error_messages["ai_fact_check"] = f"Error during fact checking: {str(e)}" - - # Return response without AI fact check if no valid URLs or error occurred - return SearchResponse( - results=results, - error_messages=error_messages, - ai_fact_check_result=None - ) \ No newline at end of file diff --git a/app/config.py b/app/config.py index 8b60dd0..b890247 100644 --- a/app/config.py +++ b/app/config.py @@ -6,6 +6,7 @@ load_dotenv() GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"] +GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] FRONTEND_URL = os.environ["FRONTEND_URL"] \ No newline at end of file diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc index 694cd33e5de5f8ed1537243806a1ba949ad2ec85..7cb8e9acf2b369f7da11e0dc68daf6904a7d2842 100644 GIT binary patch literal 4380 zcma(UO>Y~=b(Xtaeu^TgkI1%UZybMwZA}KY12>NAK$c`njx4*fWH)RJV!bnxw%+A3 zGfUYda!?uvRc}IDz&hkm^j6saq!$PFfnGF7i(aamU9m0TQ{S5|tgj^#u`iM}SrCI8U7T!=EkoXwj;|K5=iBAAN z$&@E*hkH+RULr zY}WT{%iIYP>!xE<)Ax8m4YYY%_(5#m_8B*Uce!eN)q)blfr0t9%V4d|RO_x4${9kX z^RYV)i^qhKvO-BYLY1;gBV~=MvX$ZxKc);yP$>b_V5+cZih6*b#&OkP?s zEq`i_S@%n3tyY5Tm?I>zVj&g<08Fvg;1E$n0t_WF3g8iGrE(k7t@O~wOl#!i#zHGM z@%eRrsLS}>wi^TTciXO^6{10A%B!x}lvOPjnr_x!T?_OZjM`vGb>4fhP|$e}@5mz< z0uX4kbJu2pX=ZwE`35lEyvy;LP?W>CLXSz1hK0cGUFio#HmxG3G7$h$74`D-yCuh*#oo7ms^PK2vzc z?-l|0+B;gg54(8E?}`Y5bZNg-K2vU!g1W#@!WqpA%Zu}dTNl0oMnexXx8^qtvr0oO zU`*d|mD1W_=TOD~WjP{H)&h%MBzmaH z9%_Omftu=}rs=@b%wFqCL3D^>7SB$r$P37?d2NJsfz( zTRj|E$uSmkK-)b8P4MuZ9=XPm!g!GDDKnx5*~p4JI1t!>hp=oO#X1AoZTX#QbB|%= z=yslh%F#RgT?7RHK~xr{<{0h^xh$FF_<@bBpoDy}&Uf?Je&vP$US|$gA3xlU8-k=P zTe7t8U=}q`U>7#L6+_586h>Vzio1I6gD_8E>4H}6iq4fC{+(fz_7on4ms_@AhN!!) z$s2g;ebzA8de}>Xcz3mf5!*a7&3w~#;5zoO=9tw`HQ3a9b+^8XAl1Xwe0R>RdE7Ul zN`po|d*~bHJ+BTmyU$cX-lWvV%4j;34(ow%PJk%*?4Ad(AI|$-g+d?sDnjElf$!A_ zP0|#FF#IZJ!_d*l6b9dn{kPk}7Ay8D{~jFLywKY)K;8ehW$;1R%_s1PlWQQT{Z#iP>}#^E6+OOM!WI>t zCu2@D2ljMAdb2D~UNeQ^nv_B11TR}_P&BJv9p(h=mqlZdS=cw_{oDgL8n$b$LNA1w z=YatZuEV5faL>ALta#iF8&rx`u76RhH;0!}Y7%SX!WowopWJ1h?R;UOv zK^XRT^r9j2&_*3PdW8urphbX801UVTgWZ>-%yYa|kjOcp_pff-t_>3)s0~AD zs1C?0VjATnAR0TXjcTq6TxT2e~bWSz&`OWDdY=0(rgf2?31`tZ4`qjw|;} za6+g>5he_*YWznCE+d#kFoj?S!BqeY1+A|G1-V}cuUZihM6OwZ6y#=K?+Nry8qC50 z^Y9Z303MOQq(@qZ7F$P-w~n58r4!?t(lRc*j;bT0uad-=g|AZhl}e1~Uk#IqhVm*x zjGt%#PuC)^-_DI?Ua3P9$*<#w!Y*}PN|^7HFr1X6Zu{L*YAGjasp>fF4E4n+Q#Q&z z%{d<4ry0M9I0QISh?fBNhpLg*&|9tH^X)`5oTzq5G^)Q&lkvB4<{B=xqw2W6ac%Rb zyF>*-oE$$b3GV~pxV~|HE7K)R97jTas5N}C9n&Uo)6&+dU;k{EXs|I!Mh?p<4laZ# z?tQp$MBkX*w7M6LO?1g1&lHfy^o@_VhPvcqC%R@WwkqlW&J0JB{~!QD z3!|B*(Jn+uhh#l@S8& zqfjA!6TvM6ePa}cFGp|>egZFpF)BUy#m6s4k8DppD}4B3^b)vEeXd=4nH|}(et-Ez z_AJg;Xm2S936(uUr+$AR7Fv~B_BK|AACzre@#w* zLngiD^a7hd4*r94aB%+H_y5qp1_E9Njw4^ZySUuTF#nDh_Tj8Hw*QW0nA?oZ$ZV2H zu`xE~h&fVRj7#w`J|)Bi7W*7YXUY|GrQ9)h$`kXXyfJUe7xU3RF6mDNVgcIblTE3n zSQBjv$>vl`tc7J9%xOk;e#po!h5d}Po*QeWTsLq%b=)?}^#a#d#|=`hAGm=!ZisT5 zfZJThZKvE8;I=AZCGZ(veO?FUw*fy`cXub{hJf2%$Bj^K7`PpE+%C%P1a3s>s=Y@y z<#qwLyN=sKxjn$`t(&2ja{GY0PwB1A&_}r>K1l$y|C43B4%KdWlG>3%J% ztC_TvG`XpC-V`pS;VR0S{7gERg0mYYP;@n=U^_>WS&3*$!rDQLR>j9`zXI2{8HI_l zG81!L0`-Y;3a$SiKH zgHZ(8A#->6&u~-Wh&gGmQ0oQDyzBZ5ya4TW(O!40*CTuD#<~@c!ch_PvafoNSN6*R zxk+x8TjbU|yvp8ftIg)id!j)zFhfX&Tu`(uY_VcGWkr|NM}MO3g`Ou%l5 z3((M)R87?LSw+k&Sbd+ercYM11W~E}nqE30jw2d21YZ#~bo*r>w;8?qQVesY`ttaS z_VHP@aYe8>J7>FZo{6$^QI0f0m&v6w%ce)wGO);$q?0aST9WC3)+6@|Nhx(%mW~&~ z4evW@^IPC9tsTe;^ZB!1yzuK6eslcylds;Ne6>7zZpE4j?57NlHKkE3>FNZ$0I zL29~`%FY!ITwH?fO3T>{=&qQR^d)h5Ng?(YW)g`UA&PAB*k!tuYcMLQ6k6$m#D*=L zxp3jN3-RgKPR$g$CZ3vjdSrZJ()RAY79x3?8s4kD-YP2X% z_TftpAUTL63IwFndPM`X)Zo^X6j!e4M2h+ezA6bL=>TGKnobDn4nd#1YYu9W#D(|y zso8*V(TVNaWYUL2kp$qUT?PV%Zr(9=$#r&;fp1IC~k#KX5TuTs%o*Nr`^uEl_4J=q=H;WmRAL)M0Xo zq-L^LHC&%460{mKP>owk9G70`sJ+@~Lq|W{s~rZi!hGlR7p2?DTgmlf8{v_AtzY!~ zy64L)8{@M^d+(aGHeWnm^7k9fEvs%SxTtrRB9c*mi#Z7H)8MqDO}k}6cbGbdmtYN+IM6SX6G*7Urm%Gq3H2W{*xDth z7mjm@JnR3OII`v;JR z(-f#YOdfZ6&U7bGIuf844{cEi_-NA-+4t%EEUeJlUxaYPgM{weuDv{s9Z1IDr=bL@ zI&bt1tejb0Drz@VWq!ceHwgaq+8TEgV9TJ<+FpF`e(S&nKVaUH`8*fpP49drM-s|KfB>c+1CNELzyW&F%Cbg1#avX_r9-Cw<*Dh5@#!-&)3454 zFgHg8IwQlTgSho!lwowCj)_sZD z!CHy{Hbj9hpf0D(!9V`4P{a8ykSs4#`^&qcZnODi8$MdE3%DqglGHRv8D9J$Zq!3` zOP_M&6r4=QqGCG0?A%$RW{9fi0rBqIf;!hmUy>Ti+c21PBH1P3dE_BE20u*%QjMjC zH&o_B4?>ak{!iFXE^ma!LEwoEe%#s{9MZ5is6%*c|2>fUy@7q;WL9?E}4sbDPyx~Ew=zSs0v`M*|dfFAWI-3EW&hM zRfr6!RiXKey0|oUPLb7IYV0gPL@$LdvXV}Ku)sxHEe}y707>8-=K$iUng*@7inAWV z>3Dpw$>Uswky9#B61w<)E~5h^hBQdjs6~icE}P8|bg_Vv6_J{J;m9e9rxjV0AhpYD zfSv(3FE8bH?h%0ptI}XC01rhSo>dry6i`e^R~1Qz0YpryDYcpg;dq=MUa_`Ap9|6_$4!yP!Jil^!b-v7>ufo6M8{wBM`1g|06f91!h1L?kj1=eYHw~h9YHs^r&KNjg zXI5qYKo$OV^cr2F5jj)poHZinjh?exJ|}`eH-bR-4hY0`G^l>9jB*xw_EP<-6TrQI zBW9_;9fV!7(+VI$yQ|Q5Lfa-v#XT-~( zD&SE@C|+0OC}(<8YC4`&(u+p79DeiL(|sNK>;!Q#>Fd;?B`JtujDix)ED5OmX#7p)L{ycI;dIG`?{3O zDKwWM&%$#XfMiPzFIHcyuz%0}+kM+xIME=G73SOKmY*))T>d!!bz4uVcedR2O3D8U zAW3BYBwuoOQvbJy+ohU*1?H%8z!XfTk+;k3E+P-fpTJMU_n-=GxP3;j-3WFVp@|Bl zAWqcmZAK8BTTP6!c{N`aA_yZR2q?Q~%W}s!RfA5}Le-OWvd-b|cE^GYP8NV+Rj^RM z0gzb*hTf>p44=+q7L$)@C7Fgr6oT)*EsYf3`QgNkk+-KtV%QjpI;`mBf{YZhl(gx# zfgn|6@;VHBI01xUf?|eIO$l|zT0Ei74{tB z+i+>}%tmMy!&{l3eK0V5&vEblFE4BioG9^KYttM232TpWXu}@Y>>gL}zqNZB#K7jO zVaojk>-Rul^vZ4-N-wf+(INZqI3RolY)=vM<(s0-X4};4 zWIX6(z*bWlG(mW@X}K64k_D~7zo4r})!IG|@a8tN$lQQD@)Yw9*v<{^I`{wbHr!qX z9JxMMg_$u|y}@}H9pel=u5JFeGi0kCF%=Xo-7L-=!HnO$bCac?op zY}5sX01)nO4+;i{xRxBXNug^tO`RUfSp-j-OXxW$8DXid;IHp1IGz)uZj!{2DJ0Jz z!IhcLw4x_73DXIcPN+Ybo|$V2g%;ze`9+-sK_n^Qy-l`@mE))Y1OvC}w2azh6%B1i zq{r?O0N!2@_fTwmMmphpFkbrt2zcyB_i7XP%s(wz7tenk7EAp{ z%i(8AEzfK^nNY{=UmyHcZ_#OV^xnSl zvm0M`4V8wcemnTv_R{{-<*qZO&>5qp@AobH@3-tX$D>nEJAU`H z=VXuYyBE7ob_oB}AwZjg!AC>0X<3!h;zCkd6qhAUl!>&Q7S*(P^3u#jka#Ma(+d5| z%gduuQpcQVR85aTrA197l@aN3CZ~_!*uo&a1p+Kok47w4Itq34F`^_PYnMlGC@2gK zT_`(>v-|omPw=K$fm%xH*E=f!ZrJ;@J#El zt$h1W;kw>!@0G>XW#%+n2e<5!d6|YtbkBFVnuF#V9JHG{==NRkqgwqI0LDR8*GE5V z#B=z)!*p4q(Wvwb-7{9%7Hk!Sl)~%-{5Qqo`G+zroM=EE{tXbw4cm|rM1mRhBcCml8Z{?F% zPcHZoM~Kt-$x0!GW=tZi9D-22aS#g6CSkh?N&@`FacpBD1eEFwwo&lpRV1jNh2Z>7 zo@yt+h1SQ?RQQ~*S4w#k=Rp-Ge}x1!i(*Ac1OnFnRL#Zg#)}KFC@j z|8ZIRUc}+Iko*h@)qH9LKgUbU27J5gC=^4Y;#lgBZo`6^K5RYF8h(nsNbbN-`v)Lk z4V|_j3>CGt>Eg9pqxS>-Sc8o8t|itd*Oq?SSLTPQr4+T|!JF@b-PqPL1m6L_?L2Js zL?MXvj~U|lmcR{!H{HyE!;qr(w{0>`F}UUOMw&JmZ)fvnC(|=v^bHxk6Q!O>qxUKJ zm@(9}>1QH`N*#xGnVz2+7&OLk#?Gd#BYkLnB3hrgV}1B84c3S6kHYFUNqd%Zdz-hYX3wPzl{$=^3U+ot^)xTM@=pb9x3%5xgR-N=8xLy`#fl^ zc)%DqX!M^di8Dt3Nn_|B=rsp1W=uHtNkfm20vv#muB6u8*dO32BM6{8gfPL&p(9bAQb;8gKD z*q&mrC|4M0RNNdpQB-lJiHg6Ob*;gyaH(Ec diff --git a/app/models/ai_fact_check_models.py b/app/models/ai_fact_check_models.py new file mode 100644 index 0000000..0949e51 --- /dev/null +++ b/app/models/ai_fact_check_models.py @@ -0,0 +1,229 @@ +from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict +from typing import Dict, List, Optional, Any, Union +from enum import Enum +from datetime import datetime +from urllib.parse import urlparse + +# Common Models +class TokenUsage(BaseModel): + prompt_tokens: Optional[int] = 0 + completion_tokens: Optional[int] = 0 + total_tokens: Optional[int] = 0 + +class ErrorResponse(BaseModel): + detail: str + error_code: str = Field(..., description="Unique error code for this type of error") + timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) + path: Optional[str] = Field(None, description="The endpoint path where error occurred") + + model_config = ConfigDict(json_schema_extra={ + "example": { + "detail": "Error description", + "error_code": "ERROR_CODE", + "timestamp": "2024-12-09T16:49:30.905765", + "path": "/check-facts" + } + }) + +# Fact Check Models +class Publisher(BaseModel): + name: str + site: Optional[str] = Field(None, description="Publisher's website") + + @validator('site') + def validate_site(cls, v): + if v and not (v.startswith('http://') or v.startswith('https://')): + return f"https://{v}" + return v + +class ClaimReview(BaseModel): + publisher: Publisher + url: Optional[HttpUrl] = None + title: Optional[str] = None + reviewDate: Optional[str] = None + textualRating: Optional[str] = None + languageCode: str = Field(default="en-US") + +class Claim(BaseModel): + text: str + claimant: Optional[str] = None + claimDate: Optional[str] = None + claimReview: List[ClaimReview] + +class SourceType(str, Enum): + FACT_CHECKER = "fact_checker" + NEWS_SITE = "news_site" + +class FactCheckSource(BaseModel): + domain: str + type: SourceType + priority: int = Field(default=1, ge=1, le=10) + +# Verification Models +class VerificationResult(BaseModel): + verdict: str = Field(..., description="True/False/Insufficient Information") + confidence: str = Field(..., description="High/Medium/Low") + evidence: Union[str, List[str]] + reasoning: str + missing_info: Optional[str] = None + + model_config = ConfigDict(json_schema_extra={ + "example": { + "verdict": "True", + "confidence": "High", + "evidence": ["Direct quote from source supporting the claim"], + "reasoning": "Detailed analysis of why the claim is considered true", + "missing_info": "Any caveats or limitations of the verification" + } + }) + +# Request Models +class BaseFactCheckRequest(BaseModel): + content: str = Field( + ..., + min_length=10, + max_length=1000, + description="The claim to be fact-checked" + ) + + @validator('content') + def validate_content(cls, v): + if not v.strip(): + raise ValueError("Content cannot be empty or just whitespace") + return v.strip() + +class GoogleFactCheckRequest(BaseFactCheckRequest): + language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") + max_results_per_source: int = Field(default=10, ge=1, le=50) + +class AIFactCheckRequest(BaseFactCheckRequest): + urls: List[str] = Field( + ..., + min_items=1, + max_items=5, + description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing" + ) + + @validator('urls') + def validate_urls(cls, urls): + validated_urls = [] + for url in urls: + if not url.strip(): + raise ValueError("URL cannot be empty") + + # Add https:// if no protocol specified + if not url.startswith(('http://', 'https://')): + url = f'https://{url}' + + try: + result = urlparse(url) + if not result.netloc: + raise ValueError(f"Invalid URL structure for {url}") + validated_urls.append(url) + except Exception as e: + raise ValueError(f"Invalid URL {url}: {str(e)}") + + return validated_urls + + model_config = ConfigDict(json_schema_extra={ + "example": { + "content": "Indian flag was drawn in BUET campus", + "urls": [ + "www.altnews.in/article-about-flag", + "www.another-source.com/related-news" + ] + } + }) + +# Response Models +class BaseFactCheckResponse(BaseModel): + query: str + token_usage: TokenUsage + sources: List[str] + + model_config = ConfigDict(json_schema_extra={ + "example": { + "query": "Example statement to verify", + "token_usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "sources": ["source1.com", "source2.com"], + } + }) + +class GoogleFactCheckResponse(BaseFactCheckResponse): + total_claims_found: int + results: List[Dict[str, Any]] + verification_result: Dict[str, Any] + summary: Dict[str, int] + + model_config = ConfigDict(json_schema_extra={ + "example": { + "query": "Example claim", + "total_claims_found": 1, + "results": [{ + "text": "Example claim text", + "claimant": "Source name", + "claimReview": [{ + "publisher": { + "name": "Fact Checker", + "site": "factchecker.com" + }, + "textualRating": "True" + }] + }], + "verification_result": { + "verdict": "True", + "confidence": "High", + "evidence": ["Supporting evidence"], + "reasoning": "Detailed analysis" + }, + "sources": ["factchecker.com"], + "token_usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "summary": { + "total_sources": 1, + "fact_checking_sites_queried": 10 + } + } + }) + +class AIFactCheckResponse(BaseFactCheckResponse): + verification_result: Dict[str, VerificationResult] # Changed to Dict to store results per URL + + model_config = ConfigDict(json_schema_extra={ + "example": { + "query": "Indian flag was drawn in BUET campus", + "verification_result": { + "https://www.source1.com": { + "verdict": "True", + "confidence": "High", + "evidence": ["Supporting evidence from source 1"], + "reasoning": "Detailed analysis from source 1", + "missing_info": None + }, + "https://www.source2.com": { + "verdict": "True", + "confidence": "Medium", + "evidence": ["Supporting evidence from source 2"], + "reasoning": "Analysis from source 2", + "missing_info": "Additional context needed" + } + }, + "sources": ["source1.com", "source2.com"], + "token_usage": { + "prompt_tokens": 200, + "completion_tokens": 100, + "total_tokens": 300 + } + } + }) + +# Backwards compatibility aliases +FactCheckRequest = GoogleFactCheckRequest +FactCheckResponse = GoogleFactCheckResponse \ No newline at end of file diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py index 0949e51..1b30511 100644 --- a/app/models/fact_check_models.py +++ b/app/models/fact_check_models.py @@ -1,229 +1,101 @@ -from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict -from typing import Dict, List, Optional, Any, Union -from enum import Enum +from pydantic import BaseModel, Field, HttpUrl, validator +from typing import List, Literal, Union from datetime import datetime -from urllib.parse import urlparse +from enum import Enum -# Common Models -class TokenUsage(BaseModel): - prompt_tokens: Optional[int] = 0 - completion_tokens: Optional[int] = 0 - total_tokens: Optional[int] = 0 +class VerdictEnum(str, Enum): + TRUE = "True" + FALSE = "False" + PARTIALLY_TRUE = "Partially True" + UNVERIFIED = "Unverified" -class ErrorResponse(BaseModel): - detail: str - error_code: str = Field(..., description="Unique error code for this type of error") - timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) - path: Optional[str] = Field(None, description="The endpoint path where error occurred") +class ConfidenceEnum(str, Enum): + HIGH = "High" + MEDIUM = "Medium" + LOW = "Low" - model_config = ConfigDict(json_schema_extra={ - "example": { - "detail": "Error description", - "error_code": "ERROR_CODE", - "timestamp": "2024-12-09T16:49:30.905765", - "path": "/check-facts" - } - }) +class FactCheckRequest(BaseModel): + query: str = Field( + ..., + min_length=3, + max_length=500, + description="The claim or statement to be fact-checked", + example="Did NASA confirm finding alien structures on Mars in 2024?" + ) -# Fact Check Models -class Publisher(BaseModel): - name: str - site: Optional[str] = Field(None, description="Publisher's website") - - @validator('site') - def validate_site(cls, v): - if v and not (v.startswith('http://') or v.startswith('https://')): - return f"https://{v}" +class Source(BaseModel): + url: str + name: str = "" + + @validator('url') + def validate_url(cls, v): + # Basic URL validation without requiring HTTP/HTTPS + if not v or len(v) < 3: + raise ValueError("URL must not be empty and must be at least 3 characters") return v -class ClaimReview(BaseModel): - publisher: Publisher - url: Optional[HttpUrl] = None - title: Optional[str] = None - reviewDate: Optional[str] = None - textualRating: Optional[str] = None - languageCode: str = Field(default="en-US") - -class Claim(BaseModel): - text: str - claimant: Optional[str] = None - claimDate: Optional[str] = None - claimReview: List[ClaimReview] - -class SourceType(str, Enum): - FACT_CHECKER = "fact_checker" - NEWS_SITE = "news_site" - -class FactCheckSource(BaseModel): - domain: str - type: SourceType - priority: int = Field(default=1, ge=1, le=10) - -# Verification Models -class VerificationResult(BaseModel): - verdict: str = Field(..., description="True/False/Insufficient Information") - confidence: str = Field(..., description="High/Medium/Low") - evidence: Union[str, List[str]] - reasoning: str - missing_info: Optional[str] = None - - model_config = ConfigDict(json_schema_extra={ - "example": { - "verdict": "True", - "confidence": "High", - "evidence": ["Direct quote from source supporting the claim"], - "reasoning": "Detailed analysis of why the claim is considered true", - "missing_info": "Any caveats or limitations of the verification" - } - }) - -# Request Models -class BaseFactCheckRequest(BaseModel): - content: str = Field( +class FactCheckResponse(BaseModel): + claim: str = Field( ..., min_length=10, max_length=1000, - description="The claim to be fact-checked" + description="The exact claim being verified" ) - - @validator('content') - def validate_content(cls, v): - if not v.strip(): - raise ValueError("Content cannot be empty or just whitespace") - return v.strip() - -class GoogleFactCheckRequest(BaseFactCheckRequest): - language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") - max_results_per_source: int = Field(default=10, ge=1, le=50) - -class AIFactCheckRequest(BaseFactCheckRequest): - urls: List[str] = Field( + verdict: VerdictEnum = Field( + ..., + description="The verification verdict" + ) + confidence: ConfidenceEnum = Field( + ..., + description="Confidence level in the verdict" + ) + sources: List[Source] = Field( ..., min_items=1, - max_items=5, - description="List of URLs to check the content against. URLs will be prefixed with https:// if protocol is missing" + description="List of sources used in verification" + ) + evidence: str = Field( + ..., + min_length=20, + max_length=500, + description="Concise summary of key evidence" + ) + explanation: str = Field( + ..., + min_length=50, + max_length=1000, + description="Detailed explanation of verification findings" + ) + additional_context: str = Field( + ..., + min_length=20, + max_length=500, + description="Important context about the verification" ) - - @validator('urls') - def validate_urls(cls, urls): - validated_urls = [] - for url in urls: - if not url.strip(): - raise ValueError("URL cannot be empty") - - # Add https:// if no protocol specified - if not url.startswith(('http://', 'https://')): - url = f'https://{url}' - - try: - result = urlparse(url) - if not result.netloc: - raise ValueError(f"Invalid URL structure for {url}") - validated_urls.append(url) - except Exception as e: - raise ValueError(f"Invalid URL {url}: {str(e)}") - - return validated_urls - model_config = ConfigDict(json_schema_extra={ - "example": { - "content": "Indian flag was drawn in BUET campus", - "urls": [ - "www.altnews.in/article-about-flag", - "www.another-source.com/related-news" - ] - } - }) - -# Response Models -class BaseFactCheckResponse(BaseModel): - query: str - token_usage: TokenUsage - sources: List[str] - - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Example statement to verify", - "token_usage": { - "prompt_tokens": 100, - "completion_tokens": 50, - "total_tokens": 150 - }, - "sources": ["source1.com", "source2.com"], - } - }) - -class GoogleFactCheckResponse(BaseFactCheckResponse): - total_claims_found: int - results: List[Dict[str, Any]] - verification_result: Dict[str, Any] - summary: Dict[str, int] - - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Example claim", - "total_claims_found": 1, - "results": [{ - "text": "Example claim text", - "claimant": "Source name", - "claimReview": [{ - "publisher": { - "name": "Fact Checker", - "site": "factchecker.com" - }, - "textualRating": "True" - }] - }], - "verification_result": { - "verdict": "True", + class Config: + json_schema_extra = { + "example": { + "claim": "NASA confirmed finding alien structures on Mars in 2024", + "verdict": "False", "confidence": "High", - "evidence": ["Supporting evidence"], - "reasoning": "Detailed analysis" - }, - "sources": ["factchecker.com"], - "token_usage": { - "prompt_tokens": 100, - "completion_tokens": 50, - "total_tokens": 150 - }, - "summary": { - "total_sources": 1, - "fact_checking_sites_queried": 10 + "sources": [ + { + "url": "https://www.nasa.gov/mars-exploration", + "name": "NASA Mars Exploration" + }, + { + "url": "https://factcheck.org/2024/mars-claims", + "name": "FactCheck.org" + } + ], + "evidence": "NASA has made no such announcement. Recent Mars rover images show natural rock formations.", + "explanation": "Multiple fact-checking organizations investigated this claim. NASA's official communications and Mars mission reports from 2024 contain no mention of alien structures. The viral images being shared are misidentified natural geological formations.", + "additional_context": "Similar false claims about alien structures on Mars have circulated periodically since the first Mars rovers began sending back images." } } - }) -class AIFactCheckResponse(BaseFactCheckResponse): - verification_result: Dict[str, VerificationResult] # Changed to Dict to store results per URL - - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Indian flag was drawn in BUET campus", - "verification_result": { - "https://www.source1.com": { - "verdict": "True", - "confidence": "High", - "evidence": ["Supporting evidence from source 1"], - "reasoning": "Detailed analysis from source 1", - "missing_info": None - }, - "https://www.source2.com": { - "verdict": "True", - "confidence": "Medium", - "evidence": ["Supporting evidence from source 2"], - "reasoning": "Analysis from source 2", - "missing_info": "Additional context needed" - } - }, - "sources": ["source1.com", "source2.com"], - "token_usage": { - "prompt_tokens": 200, - "completion_tokens": 100, - "total_tokens": 300 - } - } - }) - -# Backwards compatibility aliases -FactCheckRequest = GoogleFactCheckRequest -FactCheckResponse = GoogleFactCheckResponse \ No newline at end of file +class ErrorResponse(BaseModel): + detail: str + error_code: str = Field(..., example="VALIDATION_ERROR") + path: str = Field(..., example="/check-facts") \ No newline at end of file diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc index 095e86372ccf3e3756b303b4997437a8fc325911..b0b0fa42365d2d038144e1a28f8284abff9098f8 100644 GIT binary patch delta 2342 zcmY*aOKcoP5S{V*6Ytl%>rWCKLI?r{>{%mF5`GcC0|E{p5Qj)$dS`lO+dDHo?(W&m zwt)44D+p;p_|7E=B!qI|gail)DVJCvkPIk@3s(~&q*xrN>REd?E4kfOU0q$T>Q&9P zqaWQ{zFI2fE%?3mCJFwme_8&}?r-S4jCn$(FD}?O^H;sxbT^)7>`X{m5>roDlXVC@TzgwH;Dspt{tdf&pIQaVNT@%QWpsk4b#pp{cGO zdT@sM(s^*L1PL7~bv`1Y$KW|C_W@m7@jwvYX$f8SXhZ@YAUY=rb;<8TTrr7g%cQk{ z)=ajQBdMNMw8xaG`z$^f_5i+~ZE_wtA?r|GgaRaRgBqK$JOY9-sU3_zG-2;qQqfUrJOq#LI$@jgq zNxH;Dx&%aIkmpg1NYj|;f@Tpaja4aVk=WRnw43&n32`X|k8Qc1b%&tRrI>20SEa*b zt3#w|M^qeguHSaTwl3qr5G<0sfFwtb8^L1%gI+MgQ79h+{&)ntVT^U6h-FzwSdYkz zNn?zhSA=y*OeJ+t3;D!BPzP-v)FZqYS|REMFpsl2S5>~dnq5Zlj_T^X%QGLaFN!n* zDuF>lnTGC#9BpnI$crPd2QxKfQt4UZ`Y;->e@?i9_hp6~h$E!(U`}K+3(spU7}cuf zQ|dLz@C#6PLlzN5Z&TK!%V_M5lrA4<8X;z;J0#U58c`8IXF>9ClD7IlnpGK+9<*^x zmB7!_*o1T<3h9Qv!GB!_`NP1LVBT)PsE=XZF=~onil>)`S^z460AK>Nz#D~#s*E#? z>pKNaxWMmNNa{^IAFCsLFs@v+gGqaF@X+)X`_$mC=~cTtSeSWrVIE==3%D%evV_aY z!8E-j)Amp0m%c!S-WI!J7R%`xtEAwp8vw^OogJylZ`)`)nf9ykec19K1i< zuxo?wW}m3ef%c5OYhRK!?Cuh4gKB&#(0H z-Q7w+)jA2o^$fZckAn6q_>;@9Y*{z+R{i*PW2bii)(gKCsvkVCbNbv*h4a@Y&x^BQ zaHhIDt$GQhKwz|D5|{g6*`4u;R3u?y3Z;`Ucb$lPF!3D1){$w0O5w5oIkaEF1ruT( zhxp2{H(1sL2DyLaak!d>^n!R2Lc|{|^V`EKvXe delta 532 zcmbQLc0q#gG%qg~0}$kA#HOcmZRDH6sL5Q#o|KfVmz)2a z#aGFy2V|;kzQO0g$mlXzQ9#tB1SkLjDNK?;Mimg(GL$ew1sQ4>QkYv9*08K*f`|k& zXtGWY6VPYl2l9&qCbtRL3i1M(nvAy?D~cI`EQKQ7$yWpnWWs>#VvyGw7*+(Xk6Ibk z!0;K!1F9^Nn5-bEO33OO}+-{BUVpnXx<{0g_l1s01UJJy~)P~?ITTqqD7h@0u&Np%Yg3T2C~6!Hz)!*>KBI%P)})2s$G#S zkPGru@lGJ|ftit!@jip>Z3h0k42t&|QZF;4e&Asd=laIVJ~>fHnw4FQ?SshVW+7D$ zVOegb2KEnjlQ#>Q=&&+c&JdZAe^FBJ3j>h-^HVSrQ03200>Z*<#UQ&E%Q)Jx9AuSo HVrByXHG+Z1 diff --git a/app/websites/fact_checker_website.py b/app/websites/fact_checker_website.py index 571b333..2e4934b 100644 --- a/app/websites/fact_checker_website.py +++ b/app/websites/fact_checker_website.py @@ -1,17 +1,120 @@ from typing import Dict, List import requests from fastapi import HTTPException -from app.models.fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType +from app.models.ai_fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType # Sources configuration with validation SOURCES = { "fact_checkers": [ FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) for domain in [ - "bbc.com", - "altnews.in", - "en.prothomalo.com" - ] + "snopes.com", + "politifact.com", + "factcheck.org", + "reuters.com/fact-check", + "apnews.com/hub/ap-fact-check", + "bbc.com/news/reality_check", + "fullfact.org", + "afp.com/fact-check", + "truthorfiction.com", + "leadstories.com", + "checkyourfact.com", + "washingtonpost.com/news/fact-checker", + "factcheck.kz", + "poynter.org/ifcn", + "factcheckeu.info", + "africacheck.org", + "thequint.com/webqoof", + "altnews.in", + "facta.news", + "factcheckni.org", + "mythdetector.ge", + "verificado.mx", + "euvsdisinfo.eu", + "factcheck.afp.com", + "newtral.es", + "maldita.es", + "faktograf.hr", + "demagog.org.pl", + "factnameh.com", + "faktiskt.se", + "teyit.org", + "factly.in", + "boom.live", + "stopfake.org", + "factcheck.ge", + "factcheck.kg", + "factcheck.uz", + "factcheck.tj", + "factcheck.az", + "factcheck.am", + "factcheck.md", + "verafiles.org", + "rappler.com/fact-check", + "vera.com.gt", + "chequeado.com", + "aosfatos.org", + "lasillavacia.com/detector-mentiras", + "colombiacheck.com", + "ecuadorchequea.com", + "elsurti.com/checado", + "verificat.cat", + "mafindo.or.id", + "tempo.co/cek-fakta", + "factcheck.mk", + "raskrinkavanje.ba", + "faktograf.hr", + "demagog.cz", + "faktabaari.fi", + "correctiv.org", + "mimikama.at", + "factcheck.vlaanderen", + "factuel.afp.com", + "nieuwscheckers.nl", + "faktisk.no", + "tjekdet.dk", + "ellinikahoaxes.gr", + "faktograf.id", + "stopfake.kz", + "pesacheck.org", + "dubawa.org", + "namibiafactcheck.org.na", + "zimfact.org", + "ghanafact.com", + "factspace.africa", + "factcrescendo.com", + "vishvasnews.com", + "factcheck.lk", + "newschecker.in", + "boomlive.in", + "digiteye.in", + "indiatoday.in/fact-check", + "factcrescendo.com", + "piyasa.com/fact-check", + "taiwanese.facts.news", + "taiwanfactcheck.com", + "mygopen.com", + "tfc-taiwan.org.tw", + "cofacts.tw", + "rumor.taipei", + "fact.qq.com", + "factcheck.afp.com/list", + "acfta.org", + "crosscheck.firstdraftnews.org", + "healthfeedback.org", + "climatefeedback.org", + "sciencefeedback.co", + "factcheck.aap.com.au", + "emergent.info", + "hoax-slayer.net", + "truthorfiction.com", + "factcheck.media", + "mediawise.org", + "thejournal.ie/factcheck", + "journalistsresource.org", + "metafact.io", + "reporterslab.org/fact-checking" +] ], "news_sites": [ FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) @@ -82,5 +185,6 @@ def get_all_sources() -> List[FactCheckSource]: """ Get all sources sorted by priority """ - all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"] + # all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"] + all_sources = SOURCES["fact_checkers"] return sorted(all_sources, key=lambda x: x.priority) \ No newline at end of file diff --git a/output.json b/output.json deleted file mode 100644 index 8360403..0000000 --- a/output.json +++ /dev/null @@ -1,595 +0,0 @@ -{ - "kind": "customsearch#search", - "url": { - "type": "application/json", - "template": "https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={safe?}&cx={cx?}&sort={sort?}&filter={filter?}&gl={gl?}&cr={cr?}&googlehost={googleHost?}&c2coff={disableCnTwTranslation?}&hq={hq?}&hl={hl?}&siteSearch={siteSearch?}&siteSearchFilter={siteSearchFilter?}&exactTerms={exactTerms?}&excludeTerms={excludeTerms?}&linkSite={linkSite?}&orTerms={orTerms?}&dateRestrict={dateRestrict?}&lowRange={lowRange?}&highRange={highRange?}&searchType={searchType}&fileType={fileType?}&rights={rights?}&imgSize={imgSize?}&imgType={imgType?}&imgColorType={imgColorType?}&imgDominantColor={imgDominantColor?}&alt=json" - }, - "queries": { - "request": [ - { - "title": "Google Custom Search - Sheikh Hasina resigned as a Prime Minister of Bangladesh", - "totalResults": "758000", - "searchTerms": "Sheikh Hasina resigned as a Prime Minister of Bangladesh", - "count": 10, - "startIndex": 1, - "inputEncoding": "utf8", - "outputEncoding": "utf8", - "safe": "off", - "cx": "d437f1eb581de4590" - } - ], - "nextPage": [ - { - "title": "Google Custom Search - Sheikh Hasina resigned as a Prime Minister of Bangladesh", - "totalResults": "758000", - "searchTerms": "Sheikh Hasina resigned as a Prime Minister of Bangladesh", - "count": 10, - "startIndex": 11, - "inputEncoding": "utf8", - "outputEncoding": "utf8", - "safe": "off", - "cx": "d437f1eb581de4590" - } - ] - }, - "context": { - "title": "Prothom Alo" - }, - "searchInformation": { - "searchTime": 0.513164, - "formattedSearchTime": "0.51", - "totalResults": "758000", - "formattedTotalResults": "758,000" - }, - "items": [ - { - "kind": "customsearch#result", - "title": "Sheikh Hasina: Euphoria in Bangladesh after PM flees country", - "htmlTitle": "\u003cb\u003eSheikh Hasina\u003c/b\u003e: Euphoria in \u003cb\u003eBangladesh\u003c/b\u003e after PM flees country", - "link": "https://www.bbc.com/news/articles/clywww69p2vo", - "displayLink": "www.bbc.com", - "snippet": "Aug 5, 2024 ... Bangladeshi Prime Minister Sheikh Hasina has resigned after weeks of deadly anti-government protests, putting an end to her more than two decades dominating ...", - "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e \u003cb\u003eBangladeshi Prime Minister Sheikh Hasina\u003c/b\u003e has \u003cb\u003eresigned\u003c/b\u003e after weeks of deadly anti-government protests, putting an end to her more than two decades dominating ...", - "formattedUrl": "https://www.bbc.com/news/articles/clywww69p2vo", - "htmlFormattedUrl": "https://www.bbc.com/news/articles/clywww69p2vo", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ2noEFH2T-yJo4oB7DU_MF2FqAUzIHU5paMXHka1ny_vMi037f2gtOZ3of&s", - "width": "300", - "height": "168" - } - ], - "metatags": [ - { - "msapplication-tilecolor": "#da532c", - "og:image": "https://ichef.bbci.co.uk/news/1024/branded_news/db85/live/388ebc30-5367-11ef-aebc-6de4d31bf5cd.jpg", - "apple-itunes-app": "app-id=364147881, app-argument=https://www.bbc.com/news/articles/clywww69p2vo", - "twitter:title": "Sheikh Hasina: Euphoria in Bangladesh after PM flees country", - "twitter:card": "summary_large_image", - "og:image:alt": "Protesters storming Prime Minister Sheikh Hasina's palace after she fled the country", - "theme-color": "#ffffff", - "al:ios:app_name": "BBC: World News & Stories", - "og:title": "Sheikh Hasina: Euphoria in Bangladesh after PM flees country", - "al:android:package": "bbc.mobile.news.ww", - "al:ios:url": "bbcx://news/articles/clywww69p2vo", - "al:web:url": "https://bbc.com/news/articles/clywww69p2vo", - "og:description": "President Mohammed Shahabuddin ordered the release of a jailed former prime minister.", - "version": "2.12.0+20", - "al:ios:app_store_id": "364147881", - "twitter:image:src": "https://ichef.bbci.co.uk/news/1024/branded_news/db85/live/388ebc30-5367-11ef-aebc-6de4d31bf5cd.jpg", - "al:android:url": "bbcx://news/articles/clywww69p2vo", - "next-head-count": "36", - "twitter:image:alt": "Protesters storming Prime Minister Sheikh Hasina's palace after she fled the country", - "viewport": "width=device-width", - "twitter:description": "President Mohammed Shahabuddin ordered the release of a jailed former prime minister.", - "al:android:app_name": "BBC: World News & Stories" - } - ], - "cse_image": [ - { - "src": "https://ichef.bbci.co.uk/news/1024/branded_news/db85/live/388ebc30-5367-11ef-aebc-6de4d31bf5cd.jpg" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Bangladesh: Prime Minister Hasina Resigns amid Mass Protests ...", - "htmlTitle": "\u003cb\u003eBangladesh\u003c/b\u003e: \u003cb\u003ePrime Minister Hasina Resigns\u003c/b\u003e amid Mass Protests ...", - "link": "https://www.hrw.org/news/2024/08/06/bangladesh-prime-minister-hasina-resigns-amid-mass-protests", - "displayLink": "www.hrw.org", - "snippet": "Aug 6, 2024 ... (London) – Bangladesh Prime Minister Sheikh Hasina resigned on August 5, 2024, and fled the country after weeks of student protests, ...", - "htmlSnippet": "Aug 6, 2024 \u003cb\u003e...\u003c/b\u003e (London) – \u003cb\u003eBangladesh Prime Minister Sheikh Hasina resigned\u003c/b\u003e on August 5, 2024, and fled the country after weeks of student protests, ...", - "formattedUrl": "https://www.hrw.org/.../bangladesh-prime-minister-hasina-resigns-amid-ma...", - "htmlFormattedUrl": "https://www.hrw.org/.../\u003cb\u003ebangladesh\u003c/b\u003e-\u003cb\u003eprime\u003c/b\u003e-\u003cb\u003eminister\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e-resigns-amid-ma...", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT7Rd-kZwml7ax4Q_93QFbon2bmbwYliEYvMil6qgM0xEG6tV72lS_iclM&s", - "width": "310", - "height": "163" - } - ], - "metatags": [ - { - "og:image": "https://www.hrw.org/sites/default/files/styles/opengraph/public/media_2024/08/202408asia_bangladesh_Sheikh%20Hasina.jpg?h=888143e8&itok=IKUTUc3F", - "og:image:alt": "Bangladesh’s former Prime Minister Sheikh Hasina addresses the media in Mirpur after the anti-quota protests.", - "article:published_time": "2024-08-06T14:00:00-0400", - "twitter:card": "summary_large_image", - "twitter:title": "Bangladesh: Prime Minister Hasina Resigns amid Mass Protests", - "og:site_name": "Human Rights Watch", - "twitter:site:id": "14700316", - "handheldfriendly": "true", - "og:title": "Bangladesh: Prime Minister Hasina Resigns amid Mass Protests", - "google": "H_DzcJuJMJKVAO6atlPsK4HHr2WienspT6e74P5fVFY", - "og:updated_time": "2024-08-08T10:24:02-0400", - "og:description": "Bangladesh Prime Minister Sheikh Hasina resigned on August 5, 2024, and fled the country after weeks of student protests.", - "og:image:secure_url": "https://www.hrw.org/sites/default/files/styles/opengraph/public/media_2024/08/202408asia_bangladesh_Sheikh%20Hasina.jpg?h=888143e8&itok=IKUTUc3F", - "article:publisher": "https://www.facebook.com/HumanRightsWatch", - "twitter:image": "https://www.hrw.org/sites/default/files/styles/opengraph/public/media_2024/08/202408asia_bangladesh_Sheikh%20Hasina.jpg?h=888143e8&itok=IKUTUc3F", - "twitter:image:alt": "Bangladesh’s former Prime Minister Sheikh Hasina addresses the media in Mirpur after the anti-quota protests.", - "twitter:site": "@hrw", - "article:modified_time": "2024-08-08T10:24:02-0400", - "viewport": "width=device-width, initial-scale=1.0", - "twitter:description": "Bangladesh Prime Minister Sheikh Hasina resigned on August 5, 2024, and fled the country after weeks of student protests.", - "mobileoptimized": "width", - "og:url": "https://www.hrw.org/news/2024/08/06/bangladesh-prime-minister-hasina-resigns-amid-mass-protests" - } - ], - "cse_image": [ - { - "src": "https://www.hrw.org/sites/default/files/styles/opengraph/public/media_2024/08/202408asia_bangladesh_Sheikh%20Hasina.jpg?h=888143e8&itok=IKUTUc3F" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Bangladesh wakes up to new uncertain future after PM Sheikh ...", - "htmlTitle": "\u003cb\u003eBangladesh\u003c/b\u003e wakes up to new uncertain future after PM \u003cb\u003eSheikh\u003c/b\u003e ...", - "link": "https://www.bbc.com/news/live/ckdgg87lnkdt", - "displayLink": "www.bbc.com", - "snippet": "Aug 5, 2024 ... Yesterday's historic events saw Bangladesh's Prime Minister Sheikh Hasina resign from power and flee the country. Today, government ...", - "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e Yesterday's historic events saw \u003cb\u003eBangladesh's Prime Minister Sheikh Hasina resign\u003c/b\u003e from power and flee the country. Today, government ...", - "formattedUrl": "https://www.bbc.com/news/live/ckdgg87lnkdt", - "htmlFormattedUrl": "https://www.bbc.com/news/live/ckdgg87lnkdt", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ9V5V2pFKUOVvlosPa5swslIzMQnDiFW21RkSxNXvXxhrcyvRNZMc2bqXE&s", - "width": "300", - "height": "168" - } - ], - "metatags": [ - { - "og:image": "https://static.files.bbci.co.uk/ws/simorgh-assets/public/news/images/metadata/poster-1024x576.png", - "theme-color": "#FFFFFF", - "og:type": "article", - "twitter:title": "Bangladesh wakes up to new uncertain future after PM Sheikh Hasina's dramatic resignation", - "og:site_name": "BBC News", - "twitter:url": "https://www.bbc.com/news/live/ckdgg87lnkdt", - "og:title": "Bangladesh wakes up to new uncertain future after PM Sheikh Hasina's dramatic resignation", - "msapplication-tileimage": "https://static.files.bbci.co.uk/core/website/assets/static/icons/windows-phone/news/windows-phone-icon-270x270.23502b4459eb7a6ab2ab.png", - "og:description": "Looting and disorder have been reported in the South Asian nation, a day after mass protests forced Ms Hasina to flee and resign.", - "fb:pages": "1143803202301544,317278538359186,1392506827668140,742734325867560,185246968166196,156060587793370,137920769558355,193435954068976,21263239760,156400551056385,929399697073756,154344434967,228735667216,80758950658,260212261199,294662213128,1086451581439054,283348121682053,295830058648,239931389545417,304314573046,310719525611571,647687225371774,1159932557403143,286567251709437,1731770190373618,125309456546,163571453661989,285361880228,512423982152360,238003846549831,176663550714,260967092113,118450564909230,100978706649892,15286229625,122103087870579,120655094632228,102814153147070,124715648647,153132638110668,150467675018739", - "twitter:creator": "@BBCWorld", - "article:author": "https://www.facebook.com/bbcnews", - "twitter:image": "https://static.files.bbci.co.uk/ws/simorgh-assets/public/news/images/metadata/poster-1024x576.png", - "fb:app_id": "1609039196070050", - "twitter:site": "@BBCWorld", - "viewport": "width=device-width, initial-scale=1", - "twitter:description": "Looting and disorder have been reported in the South Asian nation, a day after mass protests forced Ms Hasina to flee and resign.", - "og:locale": "en_GB", - "og:image_alt": "BBC News", - "fb:admins": "100004154058350", - "og:url": "https://www.bbc.com/news/live/ckdgg87lnkdt", - "format-detection": "telephone=no" - } - ], - "cse_image": [ - { - "src": "https://static.files.bbci.co.uk/ws/simorgh-assets/public/news/images/metadata/poster-1024x576.png" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Bangladesh protests: PM Sheikh Hasina flees to India as ...", - "htmlTitle": "\u003cb\u003eBangladesh\u003c/b\u003e protests: PM \u003cb\u003eSheikh Hasina\u003c/b\u003e flees to India as ...", - "link": "https://www.cnn.com/2024/08/05/asia/bangladesh-prime-minister-residence-stormed-intl/index.html", - "displayLink": "www.cnn.com", - "snippet": "Aug 6, 2024 ... The prime minister of Bangladesh, Sheikh Hasina, resigned and fled to neighboring India on Monday after protesters stormed her official ...", - "htmlSnippet": "Aug 6, 2024 \u003cb\u003e...\u003c/b\u003e The \u003cb\u003eprime minister of Bangladesh\u003c/b\u003e, \u003cb\u003eSheikh Hasina\u003c/b\u003e, \u003cb\u003eresigned\u003c/b\u003e and fled to neighboring India on Monday after protesters stormed her official ...", - "formattedUrl": "https://www.cnn.com/2024/08/05/.../bangladesh-prime-minister.../index.ht...", - "htmlFormattedUrl": "https://www.cnn.com/2024/08/05/.../\u003cb\u003ebangladesh\u003c/b\u003e-\u003cb\u003eprime\u003c/b\u003e-\u003cb\u003eminister\u003c/b\u003e.../index.ht...", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcScyayfP1an0tjs821kLSqSGIsgUFwc02vkRXh6ERXuqeV7xOEt3sC__sM&s", - "width": "300", - "height": "168" - } - ], - "metatags": [ - { - "og:image": "https://media.cnn.com/api/v1/images/stellar/prod/ap24218390125876-2.jpg?c=16x9&q=w_800,c_fill", - "twitter:title": "Bangladesh prime minister flees to India as anti-government protesters storm her residence | CNN", - "og:type": "article", - "twitter:card": "summary_large_image", - "article:published_time": "2024-08-05T10:01:00.074Z", - "og:site_name": "CNN", - "author": "Isaac Yee, Tanbirul Miraj Ripon", - "og:title": "Bangladesh prime minister flees to India as anti-government protesters storm her residence | CNN", - "meta-section": "world", - "type": "article", - "og:description": "The prime minister of Bangladesh, Sheikh Hasina, resigned and fled to neighboring India on Monday after protesters stormed her official residence after weeks of deadly anti-government demonstrations in the South Asian nation.", - "twitter:image": "https://media.cnn.com/api/v1/images/stellar/prod/ap24218390125876-2.jpg?c=16x9&q=w_800,c_fill", - "article:publisher": "https://www.facebook.com/CNN", - "fb:app_id": "80401312489", - "twitter:site": "@CNN", - "article:modified_time": "2024-08-06T05:24:05.249Z", - "viewport": "width=device-width,initial-scale=1,shrink-to-fit=no", - "twitter:description": "The prime minister of Bangladesh, Sheikh Hasina, resigned and fled to neighboring India on Monday after protesters stormed her official residence after weeks of deadly anti-government demonstrations in the South Asian nation.", - "template_type": "article_leaf", - "theme": "world", - "og:url": "https://www.cnn.com/2024/08/05/asia/bangladesh-prime-minister-residence-stormed-intl/index.html" - } - ], - "cse_image": [ - { - "src": "https://media.cnn.com/api/v1/images/stellar/prod/ap24218390125876-2.jpg?c=16x9&q=w_800,c_fill" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Why did Bangladesh PM Sheikh Hasina resign and where is she ...", - "htmlTitle": "Why did \u003cb\u003eBangladesh\u003c/b\u003e PM \u003cb\u003eSheikh Hasina resign\u003c/b\u003e and where is she ...", - "link": "https://www.reuters.com/world/asia-pacific/why-did-bangladesh-pm-sheikh-hasina-resign-where-is-she-now-2024-08-06/", - "displayLink": "www.reuters.com", - "snippet": "Aug 6, 2024 ... Aug 7 (Reuters) - Sheikh Hasina resigned as Bangladesh's prime minister and fled the country on Monday following weeks of dedly protests ...", - "htmlSnippet": "Aug 6, 2024 \u003cb\u003e...\u003c/b\u003e Aug 7 (Reuters) - \u003cb\u003eSheikh Hasina resigned\u003c/b\u003e as \u003cb\u003eBangladesh's prime minister\u003c/b\u003e and fled the country on Monday following weeks of dedly protests ...", - "formattedUrl": "https://www.reuters.com/.../why-did-bangladesh-pm-sheikh-hasina-resign-...", - "htmlFormattedUrl": "https://www.reuters.com/.../why-did-\u003cb\u003ebangladesh\u003c/b\u003e-pm-\u003cb\u003esheikh\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e-resign-...", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR_IDuyjGdce77t1tWrSwheC6g8XSyuUQKn_KxA0H9x3eCRV4kretMyY0_J&s", - "width": "310", - "height": "162" - } - ], - "metatags": [ - { - "apple-itunes-app": "app-id=602660809, app-argument=https://www.reuters.com/world/asia-pacific/why-did-bangladesh-pm-sheikh-hasina-resign-where-is-she-now-2024-08-06/?id=E5O5XBJMZBPTDAUM7I6BFYX4UA", - "og:image": "https://www.reuters.com/resizer/v2/QSLRZINWOVJ25LWOYIOOXO4L6A.jpg?auth=581b869970d6c61101b4e8bba552bd5ae55ec08c8a333a33ef63a72f57b8f0c4&height=1005&width=1920&quality=80&smart=true", - "analytics:page_layout": "regular-article", - "article:published_time": "2024-08-07T03:23:35Z", - "og:image:width": "1200", - "twitter:card": "summary_large_image", - "og:site_name": "Reuters", - "og:article:modified_time": "2024-08-07T03:51:39.907Z", - "ccbot": "nofollow", - "analytics:ad_layout": "leaderboard, right rail, sponsored", - "analyticsattributes.topicchannel": "World", - "title": "Why did Bangladesh PM Sheikh Hasina resign and where is she now? | Reuters", - "og:description": "Sheikh Hasina resigned as Bangladesh's prime minister and fled the country on Monday following weeks of dedly protests that began as demonstrations by students against government job quotas but surged into a movement demanding her resignation.", - "twitter:creator": "@Reuters", - "twitter:image": "https://www.reuters.com/resizer/v2/QSLRZINWOVJ25LWOYIOOXO4L6A.jpg?auth=581b869970d6c61101b4e8bba552bd5ae55ec08c8a333a33ef63a72f57b8f0c4&height=1005&width=1920&quality=80&smart=true", - "twitter:image:alt": "Bangladeshi Prime Minister Sheikh Hasina reviews an honour guard at the Government House, during her visit to Thailand, in Bangkok, Thailand, April 26, 2024. REUTERS/Athit Perawongmetha/File Photo", - "twitter:site": "@Reuters", - "article:modified_time": "2024-08-07T03:51:39.907Z", - "fb:admins": "988502044532272", - "article:content_tier": "metered", - "og:type": "article", - "article:section": "Asia Pacific", - "og:image:alt": "Bangladeshi Prime Minister Sheikh Hasina reviews an honour guard at the Government House, during her visit to Thailand, in Bangkok, Thailand, April 26, 2024. REUTERS/Athit Perawongmetha/File Photo", - "twitter:title": "Why did Bangladesh PM Sheikh Hasina resign and where is she now?", - "ad:template": "article", - "og:image:url": "https://www.reuters.com/resizer/v2/QSLRZINWOVJ25LWOYIOOXO4L6A.jpg?auth=581b869970d6c61101b4e8bba552bd5ae55ec08c8a333a33ef63a72f57b8f0c4&height=1005&width=1920&quality=80&smart=true", - "dcsext.dartzone": "/4735792/reuters.com/world/apac/article", - "og:title": "Why did Bangladesh PM Sheikh Hasina resign and where is she now?", - "dcsext.channellist": "World;World;Asia Pacific;Asian Markets", - "og:image:height": "628", - "og:article:published_time": "2024-08-07T03:23:35Z", - "og:updated_time": "2024-08-07T03:51:39.907Z", - "fb:pages": "114050161948682", - "article:author": "Sudipto Ganguly", - "article:tag": "MTVID,EXPLN,TOPNWS,ANLINS,CIV,CWP,DIP,DLI,ECI,ECO,EDU,GEN,JOB,MCE,MPLT,MPOP,NEWS1,POL,RACR,SOCI,TOPCMB,VIO,SASIA,IN,PK,ASXPAC,BD,EMRG,ASIA,PACKAGE:US-TOP-NEWS,PACKAGE:WORLD-NEWS", - "analyticsattributes.topicsubchannel": "Asia Pacific", - "fb:app_id": "988502044532272", - "og:locale:alternate": "en_US", - "viewport": "width=device-width, initial-scale=1", - "twitter:description": "Sheikh Hasina resigned as Bangladesh's prime minister and fled the country on Monday following weeks of dedly protests that began as demonstrations by students against government job quotas but surged into a movement demanding her resignation.", - "og:locale": "en_US", - "og:url": "https://www.reuters.com/world/asia-pacific/why-did-bangladesh-pm-sheikh-hasina-resign-where-is-she-now-2024-08-06/" - } - ], - "cse_image": [ - { - "src": "https://www.reuters.com/resizer/v2/QSLRZINWOVJ25LWOYIOOXO4L6A.jpg?auth=581b869970d6c61101b4e8bba552bd5ae55ec08c8a333a33ef63a72f57b8f0c4&height=1005&width=1920&quality=80&smart=true" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Bangladesh's 'Gen Z revolution' toppled PM Sheikh Hasina. Why did ...", - "htmlTitle": "\u003cb\u003eBangladesh's\u003c/b\u003e 'Gen Z revolution' toppled PM \u003cb\u003eSheikh Hasina\u003c/b\u003e. Why did ...", - "link": "https://www.cnn.com/2024/08/06/asia/bangladesh-protests-hasina-resignation-explainer-intl-hnk/index.html", - "displayLink": "www.cnn.com", - "snippet": "Aug 6, 2024 ... People celebrate the resignation of Prime Minister Sheikh Hasina in Dhaka, Bangladesh, on August 5, 2024. Mohammad Ponir Hossain/Reuters. CNN —.", - "htmlSnippet": "Aug 6, 2024 \u003cb\u003e...\u003c/b\u003e People celebrate the \u003cb\u003eresignation\u003c/b\u003e of \u003cb\u003ePrime Minister Sheikh Hasina\u003c/b\u003e in Dhaka, \u003cb\u003eBangladesh\u003c/b\u003e, on August 5, 2024. Mohammad Ponir Hossain/Reuters. CNN —.", - "formattedUrl": "https://www.cnn.com/2024/08/06/asia/bangladesh...hasina.../index.html", - "htmlFormattedUrl": "https://www.cnn.com/2024/08/06/asia/\u003cb\u003ebangladesh\u003c/b\u003e...\u003cb\u003ehasina\u003c/b\u003e.../index.html", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTOW5T__EO6GShxs6es-aGavTBFUU2GCU-SyqlBE3t5d0hFX5WugbjKA-JH&s", - "width": "300", - "height": "168" - } - ], - "metatags": [ - { - "og:image": "https://media.cnn.com/api/v1/images/stellar/prod/2024-08-05t184829z-2105365796-rc2l99a18sqr-rtrmadp-3-bangladesh-protests.jpg?c=16x9&q=w_800,c_fill", - "twitter:title": "Bangladesh’s ‘Gen Z revolution’ toppled a veteran leader. Why did they hit the streets and what happens now? | CNN", - "og:type": "article", - "twitter:card": "summary_large_image", - "article:published_time": "2024-08-06T08:16:31.519Z", - "og:site_name": "CNN", - "author": "Helen Regan", - "og:title": "Bangladesh’s ‘Gen Z revolution’ toppled a veteran leader. Why did they hit the streets and what happens now? | CNN", - "meta-section": "world", - "type": "article", - "og:description": "Inside Bangladesh it’s being dubbed a Gen Z revolution – a protest movement that pitted mostly young student demonstrators against a 76-year-old leader who had dominated her nation for decades and turned increasingly authoritarian in recent years.", - "twitter:image": "https://media.cnn.com/api/v1/images/stellar/prod/2024-08-05t184829z-2105365796-rc2l99a18sqr-rtrmadp-3-bangladesh-protests.jpg?c=16x9&q=w_800,c_fill", - "article:publisher": "https://www.facebook.com/CNN", - "fb:app_id": "80401312489", - "twitter:site": "@CNN", - "article:modified_time": "2024-08-07T03:48:11.066Z", - "viewport": "width=device-width,initial-scale=1,shrink-to-fit=no", - "twitter:description": "Inside Bangladesh it’s being dubbed a Gen Z revolution – a protest movement that pitted mostly young student demonstrators against a 76-year-old leader who had dominated her nation for decades and turned increasingly authoritarian in recent years.", - "template_type": "article_leaf", - "theme": "world", - "og:url": "https://www.cnn.com/2024/08/06/asia/bangladesh-protests-hasina-resignation-explainer-intl-hnk/index.html" - } - ], - "cse_image": [ - { - "src": "https://media.cnn.com/api/v1/images/stellar/prod/2024-08-05t184829z-2105365796-rc2l99a18sqr-rtrmadp-3-bangladesh-protests.jpg?c=16x9&q=w_800,c_fill" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Bangladesh PM Sheikh Hasina resigns, ending 15 years in power ...", - "htmlTitle": "\u003cb\u003eBangladesh\u003c/b\u003e PM \u003cb\u003eSheikh Hasina resigns\u003c/b\u003e, ending 15 years in power ...", - "link": "https://www.npr.org/2024/08/05/g-s1-15332/bangladesh-protests", - "displayLink": "www.npr.org", - "snippet": "Aug 5, 2024 ... DHAKA, Bangladesh — Bangladesh's Prime Minister Sheikh Hasina resigned on Monday, ending 15 years in power as thousands of protesters defied ...", - "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e DHAKA, \u003cb\u003eBangladesh\u003c/b\u003e — \u003cb\u003eBangladesh's Prime Minister Sheikh Hasina resigned\u003c/b\u003e on Monday, ending 15 years in power as thousands of protesters defied ...", - "formattedUrl": "https://www.npr.org/2024/08/05/g-s1-15332/bangladesh-protests", - "htmlFormattedUrl": "https://www.npr.org/2024/08/05/g-s1-15332/\u003cb\u003ebangladesh\u003c/b\u003e-protests", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSqvTrTl13trd-nrF4oQvAQOY3z2N2MfxSSyZsmd4Pm6E_e0TTbu0ER6zE&s", - "width": "300", - "height": "168" - } - ], - "speakablespecification": [ - { - "cssselector": "[data-is-speakable]" - } - ], - "metatags": [ - { - "date": "2024-08-05", - "apple-itunes-app": "app-id=324906251, app-argument=https://www.npr.org/2024/08/05/g-s1-15332/bangladesh-protests", - "og:image": "https://npr.brightspotcdn.com/dims3/default/strip/false/crop/6043x3399+0+315/resize/1400/quality/100/format/jpeg/?url=http%3A%2F%2Fnpr-brightspot.s3.amazonaws.com%2Fba%2F99%2Ff772f9bd44ee9b1ddf5a4d9d1d98%2Fap24217447347066.jpg", - "og:type": "article", - "twitter:card": "summary_large_image", - "twitter:title": "Bangladesh PM Sheikh Hasina resigns, ending 15 years in power, as thousands protest", - "og:site_name": "NPR", - "cxenseparse:pageclass": "article", - "twitter:domain": "npr.org", - "cxenseparse:publishtime": "2024-08-05T04:07:23-04:00", - "og:title": "Bangladesh PM Sheikh Hasina resigns, ending 15 years in power, as thousands protest", - "rating": "General", - "og:description": "At least 95 people, including at least 14 police officers, died in clashes in the capital on Sunday. Broadband internet and mobile data services were cut off for about three hours on Monday.", - "fb:pages": "10643211755", - "twitter:image:src": "https://npr.brightspotcdn.com/dims3/default/strip/false/crop/6043x3399+0+315/resize/1400/quality/100/format/jpeg/?url=http%3A%2F%2Fnpr-brightspot.s3.amazonaws.com%2Fba%2F99%2Ff772f9bd44ee9b1ddf5a4d9d1d98%2Fap24217447347066.jpg", - "fb:app_id": "138837436154588", - "cxenseparse:author": "The Associated Press", - "twitter:site": "@NPR", - "article:modified_time": "2024-08-05T06:50:55-04:00", - "viewport": "width=device-width, initial-scale=1, shrink-to-fit=no", - "article:content_tier": "free", - "og:url": "https://www.npr.org/2024/08/05/g-s1-15332/bangladesh-protests", - "article:opinion": "false" - } - ], - "cse_image": [ - { - "src": "https://npr.brightspotcdn.com/dims3/default/strip/false/crop/6043x3399+0+315/resize/1400/quality/100/format/jpeg/?url=http%3A%2F%2Fnpr-brightspot.s3.amazonaws.com%2Fba%2F99%2Ff772f9bd44ee9b1ddf5a4d9d1d98%2Fap24217447347066.jpg" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Tens of thousands protest in Bangladesh to demand resignation of ...", - "htmlTitle": "Tens of thousands protest in \u003cb\u003eBangladesh\u003c/b\u003e to demand \u003cb\u003eresignation\u003c/b\u003e of ...", - "link": "https://www.cnn.com/2022/12/11/asia/bangladesh-protests-prime-minister-sheikh-hasina-intl-hnk/index.html", - "displayLink": "www.cnn.com", - "snippet": "Dec 11, 2022 ... Supporters of Bangladesh's opposition party protest against the government of Prime Minister Sheikh Hasina on December 10, 2022. Mamunur Rashid/ ...", - "htmlSnippet": "Dec 11, 2022 \u003cb\u003e...\u003c/b\u003e Supporters of \u003cb\u003eBangladesh's\u003c/b\u003e opposition party protest against the government of \u003cb\u003ePrime Minister Sheikh Hasina\u003c/b\u003e on December 10, 2022. Mamunur Rashid/ ...", - "formattedUrl": "https://www.cnn.com/.../bangladesh...prime-minister-sheikh-hasina.../index....", - "htmlFormattedUrl": "https://www.cnn.com/.../\u003cb\u003ebangladesh\u003c/b\u003e...\u003cb\u003eprime\u003c/b\u003e-\u003cb\u003eminister\u003c/b\u003e-\u003cb\u003esheikh\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e.../index....", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ-JqzYxoZHcQ5wWQhH5Xq-JrKFFyWbdfS339bDlIrhMrc2Y_9BznDwjN5u&s", - "width": "275", - "height": "183" - } - ], - "metatags": [ - { - "og:image": "https://media.cnn.com/api/v1/images/stellar/prod/221210230748-02-dhaka-protests-121022.jpg?c=16x9&q=w_800,c_fill", - "twitter:title": "Tens of thousands protest in Bangladesh to demand resignation of Prime Minister | CNN", - "og:type": "article", - "twitter:card": "summary_large_image", - "article:published_time": "2022-12-11T06:09:58Z", - "og:site_name": "CNN", - "author": "Vedika Sud,Yong Xiong", - "og:title": "Tens of thousands protest in Bangladesh to demand resignation of Prime Minister | CNN", - "meta-section": "world", - "type": "article", - "og:description": "Tens of thousands of protesters took to the streets of Dhaka on Saturday calling for the dissolution of parliament to make way for new elections, and demand the resignation of Bangladeshi Prime Minister Sheikh Hasina.", - "twitter:image": "https://media.cnn.com/api/v1/images/stellar/prod/221210230748-02-dhaka-protests-121022.jpg?c=16x9&q=w_800,c_fill", - "article:publisher": "https://www.facebook.com/CNN", - "article:tag": "asia, bangladesh, brand safety-nsf other, brand safety-nsf sensitive, british national party, civil disobedience, continents and regions, domestic alerts, domestic-international news, elections and campaigns, government and public administration, iab-elections, iab-politics, political figures - intl, political organizations, political parties - intl, politics, protests and demonstrations, resignations, sheikh hasina, society, south asia", - "fb:app_id": "80401312489", - "twitter:site": "@CNN", - "article:modified_time": "2022-12-11T06:09:58Z", - "viewport": "width=device-width,initial-scale=1,shrink-to-fit=no", - "twitter:description": "Tens of thousands of protesters took to the streets of Dhaka on Saturday calling for the dissolution of parliament to make way for new elections, and demand the resignation of Bangladeshi Prime Minister Sheikh Hasina.", - "template_type": "article_leaf", - "theme": "world", - "og:url": "https://www.cnn.com/2022/12/11/asia/bangladesh-protests-prime-minister-sheikh-hasina-intl-hnk/index.html" - } - ], - "cse_image": [ - { - "src": "https://media.cnn.com/api/v1/images/stellar/prod/221210230749-dhaka-protests-221207.jpg?q=w_1110,c_fill" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Timeline of events leading to the resignation of Bangladesh Prime ...", - "htmlTitle": "Timeline of events leading to the \u003cb\u003eresignation\u003c/b\u003e of \u003cb\u003eBangladesh Prime\u003c/b\u003e ...", - "link": "https://www.voanews.com/a/timeline-of-events-leading-to-the-resignation-of-bangladesh-prime-minister-sheikh-hasina/7731456.html", - "displayLink": "www.voanews.com", - "snippet": "Aug 5, 2024 ... Bangladesh Prime Minister Sheikh Hasina resigned and left the country Monday after clashes between student protesters and police left nearly 300 people dead.", - "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e \u003cb\u003eBangladesh Prime Minister Sheikh Hasina resigned\u003c/b\u003e and left the \u003cb\u003ecountry Monday\u003c/b\u003e after clashes between student protesters and police left nearly 300 people dead.", - "formattedUrl": "https://www.voanews.com/...bangladesh-prime-minister-sheikh-hasina/7731...", - "htmlFormattedUrl": "https://www.voanews.com/...\u003cb\u003ebangladesh\u003c/b\u003e-\u003cb\u003eprime\u003c/b\u003e-\u003cb\u003eminister\u003c/b\u003e-\u003cb\u003esheikh\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e/7731...", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS2o9D0XbnmDtsmWEVzDYCwGv4IHKkzATikOvXEDghsD_uzZj-G6_63zGyR&s", - "width": "311", - "height": "162" - } - ], - "metatags": [ - { - "msapplication-tilecolor": "#ffffff", - "apple-itunes-app": "app-id=632618796, app-argument=//7731456.ltr", - "og:image": "https://gdb.voanews.com/28CFE9FB-9B7B-4474-8342-C7BC5434B54A.jpg", - "og:type": "article", - "og:image:width": "308", - "twitter:card": "summary_large_image", - "og:site_name": "Voice of America", - "msvalidate.01": "3286EE554B6F672A6F2E608C02343C0E", - "author": "Sabir Mustafa", - "apple-mobile-web-app-title": "VOA", - "og:title": "Timeline of events leading to the resignation of Bangladesh Prime Minister Sheikh Hasina", - "msapplication-tileimage": "/Content/responsive/VOA/img/webApp/ico-144x144.png", - "fb:pages": "36235438073", - "og:description": "Hasina resigns after weeks of clashes between student protesters and police leave nearly 300 dead", - "article:publisher": "https://www.facebook.com/voiceofamerica", - "twitter:image": "https://gdb.voanews.com/28CFE9FB-9B7B-4474-8342-C7BC5434B54A.jpg", - "fb:app_id": "362002700549372", - "apple-mobile-web-app-status-bar-style": "black", - "twitter:site": "@voanews", - "viewport": "width=device-width, initial-scale=1.0", - "twitter:description": "Hasina resigns after weeks of clashes between student protesters and police leave nearly 300 dead", - "og:url": "https://www.voanews.com/a/timeline-of-events-leading-to-the-resignation-of-bangladesh-prime-minister-sheikh-hasina/7731456.html" - } - ], - "cse_image": [ - { - "src": "https://gdb.voanews.com/28CFE9FB-9B7B-4474-8342-C7BC5434B54A.jpg" - } - ] - } - }, - { - "kind": "customsearch#result", - "title": "Bangladesh's Sheikh Hasina forced to resign: What happened and ...", - "htmlTitle": "\u003cb\u003eBangladesh's Sheikh Hasina\u003c/b\u003e forced to \u003cb\u003eresign\u003c/b\u003e: What happened and ...", - "link": "https://www.aljazeera.com/news/2024/8/5/bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next", - "displayLink": "www.aljazeera.com", - "snippet": "Aug 5, 2024 ... Bangladesh Prime Minister Sheikh Hasina has stepped down from office, ending 15 years of what the opposition says was “authoritarian rule” and sparking ...", - "htmlSnippet": "Aug 5, 2024 \u003cb\u003e...\u003c/b\u003e \u003cb\u003eBangladesh Prime Minister Sheikh Hasina\u003c/b\u003e has \u003cb\u003estepped down\u003c/b\u003e from office, ending 15 years of what the opposition says was “authoritarian rule” and sparking ...", - "formattedUrl": "https://www.aljazeera.com/.../bangladeshs-sheikh-hasina-forced-to-resign-w...", - "htmlFormattedUrl": "https://www.aljazeera.com/.../\u003cb\u003ebangladesh\u003c/b\u003es-\u003cb\u003esheikh\u003c/b\u003e-\u003cb\u003ehasina\u003c/b\u003e-forced-to-resign-w...", - "pagemap": { - "cse_thumbnail": [ - { - "src": "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS2uyLUKVFCDpJ-_MjZ6dRKW5_LC1zknAIICxM5ZcVuAZtYqupigTOI_l_0&s", - "width": "259", - "height": "194" - } - ], - "metatags": [ - { - "pagetype": "Article Page", - "og:image": "https://www.aljazeera.com/wp-content/uploads/2024/08/AP24218390076912-1722855595.jpg?resize=1920%2C1440", - "apple-itunes-app": "app-id=1534955972", - "twitter:card": "summary_large_image", - "og:site_name": "Al Jazeera", - "postlabel": "Explainer", - "twitter:url": "https://www.aljazeera.com/news/2024/8/5/bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next", - "pagesection": "Explainer,News,Sheikh Hasina", - "channel": "aje", - "publisheddate": "2024-08-05T15:14:49", - "postid": "3096869", - "source": "Al Jazeera", - "og:description": "Prime minister reportedly flees to India after weeks of antigovernment protests.", - "taxonomyterms": "News, Sheikh Hasina, Asia, Bangladesh", - "lastdate": "2024-08-05T15:40:26", - "primarytopic": "News", - "twitter:image:alt": "Sheikh Hasina forced to resign: What happened and what’s next?", - "sourcetaxonomy": "Al Jazeera", - "internalreporting": "Break it down for me", - "where": "Asia, Bangladesh", - "primarytag": "Sheikh Hasina", - "ga4": "G-XN9JB9Q0M1", - "twitter:account_id": "5536782", - "og:type": "article", - "twitter:title": "Sheikh Hasina forced to resign: What happened and what’s next?", - "taxonomy-tags": "News, Sheikh Hasina", - "topics": "News", - "og:title": "Sheikh Hasina forced to resign: What happened and what’s next?", - "tags": "Sheikh Hasina", - "contenttype": "post", - "twitter:image:src": "https://www.aljazeera.com/wp-content/uploads/2024/08/AP24218390076912-1722855595.jpg?resize=1920%2C1440", - "articleslug": "bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next", - "postlink": "/news/2024/8/5/bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next", - "viewport": "width=device-width,initial-scale=1,shrink-to-fit=no", - "twitter:description": "Prime minister reportedly flees to India after weeks of antigovernment protests.", - "pagetitle": "Bangladesh’s Sheikh Hasina forced to resign: What happened and what’s next?", - "og:url": "https://www.aljazeera.com/news/2024/8/5/bangladeshs-sheikh-hasina-forced-to-resign-what-happened-and-whats-next" - } - ], - "cse_image": [ - { - "src": "https://www.aljazeera.com/wp-content/uploads/2024/08/AP24218390076912-1722855595.jpg?resize=1920%2C1440" - } - ] - } - } - ] -}