From 83a886960b4e605927c015aee9409bdff0c912ca Mon Sep 17 00:00:00 2001 From: Utsho Dey Date: Wed, 11 Dec 2024 17:39:10 +0600 Subject: [PATCH] base code added --- app/__pycache__/config.cpython-312.pyc | Bin 519 -> 507 bytes .../__pycache__/fact_check.cpython-312.pyc | Bin 11344 -> 3800 bytes app/api/fact_check.py | 219 +----------------- app/config.py | 2 +- .../fact_check_models.cpython-312.pyc | Bin 0 -> 5577 bytes app/models/fact_check_models.py | 109 +++++++++ .../fact_checker_website.cpython-312.pyc | Bin 0 -> 3341 bytes app/websites/fact_checker_website.py | 99 ++++++++ 8 files changed, 221 insertions(+), 208 deletions(-) create mode 100644 app/models/__pycache__/fact_check_models.cpython-312.pyc create mode 100644 app/models/fact_check_models.py create mode 100644 app/websites/__pycache__/fact_checker_website.cpython-312.pyc create mode 100644 app/websites/fact_checker_website.py diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index 53d89a61cbd15bc0b5b4e4eca7e37df2c7c4ddfe..267bba1df1c923dade63cc9cc26619d42bbdc2a5 100644 GIT binary patch delta 72 zcmZo?`OVCGnwOW00SGn(N2aq(&P74p2ENHx8I1tr#T7#U delta 84 zcmey(+|I&#nwOW00SK6Pg{4bPF)3E?&BKo=I9&}@9g2~>^<@MeidJ!k`K&` njEuJ#IPWsZJ!If+;e8+@H{WZf*9CQ_i!#m^7$hg3WHbT*?>`xC diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index daa45a81346a8c905d7b637ae6176f1e957e0533..56cc8981234a04f5d9b6da8c11310f6f86677fde 100644 GIT binary patch delta 1823 zcmZuy+iz4w7@s+py#>9+4A528vs?`*U4}NpH3xqfizx}@7%=gWl z@3yy&|Jb+uZ8RD}uqv<2&V3z5=vSe1RPA;a9d4 zThSGdQ!Lf?={`?ZEx#Sm19ngkdb-aF*e z;7jeh^)3lbA|r4H89^d146=*cQ&D+iQ|~5S3&Skin_3Q$un{IP6aNoJE-06ym)bT) zj6{q$30D*&aYlJ~%cez1OWqT1HHinujvk(=HC#e@^rd6RjuuXrN!>MT)hP}0fKhUZ zYuW^4Wx_1GU=Z_7PESw1Two{U{IaI-qw6OU1EHHhPKb=)eM zHjfr4t)30fQw>T_;7f9j=05=Wfh%TgO-U?ZU@Kvk=n}I4iMf)>@)dF zO2bo{%0s4ORvougEfcD;6Urj)VO-gPh_2nKo&ZEU**(y0snSl4AhUpey9Sm@g zZUv-?4#dPp6yW|Dv@XeNeiZ@GZJqrGkv-mPcZ-KRT(xTku{6az3(oF+B;YRqa~(iL zjIvB78{j@l+yboGet~#_A+zNoq+G=nWI0B{_)PC>zM8ag5#0rr7ap zq}Trrr1qSAUVcwi(UM{WTFNOz_q9M`%h{fAvg6N4vQM+4I0crY@v!TE$VZGQOXi+F z($`^HXhD4L6U$9^h!(yCpCMKYKVseN28WOlV;|QSUCil! z9_QHA{_G@AjZaS%-FHR(^JL5%*^x*6>ryM?lXv6GA$k=qUwreND^1-C3hZj z5{6Q78;(a(jjiOv`l9n`b2R|mvAH+ zM22DoZ_>z9T=h&GaX6EqTiJI5Ba)7oG+4nJ8%XXx3B961H_%1i<<{JiC5q9sonocd zs2bc)iPNy4nEXz|wo7#J3#28f$Qum^Py|E(xMDyfQjs$%N|gJ{wW>?1E)BCQgMAtJ z?3!INt6X+mO2t^?vPFanPYu!mc6V@uJR|7Dsgh-i4ma9Q;h69Myu<5aN0??h%WDt_ zi|A3WT^%|N`y$KE$50=?qx&mpbvsJ+tQ`5Dn-!$oa1AiVJyvW+Lyq-kSpX`pDQ`ICLY2LAjAv1!`S| zA@#=od|^PnnfAf%%>fmdn?o|l+Gg&rlpw2hlZx@@1&k{TPOVB+(7DfDtee$2>O*K5 z@ldH=AA!JC%(+KF=Gz79HM2AFXeJdm7%!Go{DV-@wj0UF3X3K=DN5pT8$ye1)=QF? zE|_*3bB?J)TXZpOpd4$EB@sz_K!7)4#4LIccn)rDSmXem1yyvQa}i)w#u)#CcKw9H zcTnFQwD~R?TbCp(ts#JQAHuI<_SMifX*I%b4<8(S5##P#={>8+6ZRJV^n|sZpd|g# JEjaJW{{TIN%FzG- literal 11344 zcmbt4TW}lKb$79gH%L4QzA5l!36@CeWm}eQMKlFUlw{IFN{$8BK|t)11cg`bE=3U` zQ^r#^ zTVukRbcJ0EXG*w}?y$R1?g@LK+??rarV&BxxF8hI ze8-d!1DeCtO#;+&!OlB4M9`pGyPxeFOiw5hRV$92I`wSmd`!wH@pLL^RIN`(W$Br; zC?!;5Z(K@A{mReUtDBoW{~x6FmM3xK1KrP9$N&92wz6?t&%E&yn+7SQx@ZyL%>NC7nDz zG|(SP#bALnrgwn7AgLiMtPKPB26u@l9AsFU=9FU1Hw!uHn&!rhdX3UkKZGHom~`yC zVVZo^O~^F&W}(j^a*gYU&9=qmcuH2HshC8aP+hgg<@89JCZmdKlqWK9KxR=IiB2Sx zAg>Ctlo(O@^zgW9jYMLJs4Pb!GEO=Wn7x0XYb>3Vx+WBP3>G)l6&2-7Jdudfu90X= z=^2w^XS<@A4E*8@6Nw?JGc!eNp@(ek$F`@lFmeSH%R2ykG)r#Vs}^FH53JhTa$f(X zr!GGA+NYKrEw`P%`N{deT$ERwtxJ~HHKV3jt|hXt^h z))oPjsN*z|cu5esCjkrc!YCR<{<@$aY}mv~jm1*a1#;c2A6(eXN-f1wt7t20YmuxH zkJ}0T+Y5DUqC>0@D@CX165W@DICtGs?9D!94SLmz5T$84D9M?0N|scUC@Il+qBFY# z80d2oQb5Au0FrC~Xd|GE#pQr9m5~DJ5v}YRr`knHj?p;dJ=MlqM6kt%4MEyvG>($3 zoWjS8^bG}tq*L;Pshn0p^&qcNFP}{m5gFK62mD#rUTKXCns%+79!l$R)H3!EOLB zY4^CCPDNy>nv6!I^9qdy9dsL32q0(%pz^XpRc9m;O{LOG6vZ&GHAU&MsXvO;Pp#O- z0qi6t;Dk{M)18QCJixf64POy>;3p3Q_!7xEoG+ia<*#4tS?c)os{i@f;|u(X@cgZ+ zy5*hEtXB2U_RYyFLjSG0ZC8Y)z2Vim7iN#l?O511-?1XR@PW56XL0BJ!Cc)#x!Ol^ zzLvXIqtBM-&3jDi#OyPF@uW694!VKjklHY0q;4uW7Ht8VZ5a1aw>Vr+P zR?NlbYhUiqSsgDQ%h@U}^l9tDdK=ay00Pt6^8hxl>j9z##799q=mN$f8g+3Xf~e8C zE^M7L1kI}T*@@vqTn18B1(cZmS#J@uT@Fl2!*U$d_(L!YP!EvZpwn>o@mHATG)K3C z)+*>Az!#HP)J(Gf6KWXrS;}xu)7%^6n}#9KXrNm8{;YWnOoW47U0DleWXOTGw*hZb zMV^c+W7iBSA4|w8cTPr069_P|u2vKb3Mqn9&sGC06$E=CuJ77i^rnsZLqoP`}+(6*Te zJ&Irxp?nH)2ym0-{Q!VaY!xr}6?o<7YTdCCuN?cpSUGoY#pu5a+R44f6H_(IX>^3$ zY@j1N9ZYK%0BlW1q9JUQOp>`kH;gj?X91kGfD@2g0B0-1G5P|Wy$ok6s3eDIzHZUa zGh6{mOAxJ$;|xKYYVS!z z`XC6e)aZSql(tr#ceR)aXQXRO$hV6|CHR9&C3Rf1ZHa+EJJ>cM<_Rt!FUT zkANN7vzXg-Mtz7wfMQq18gNF=niZiY*VsJUGgq^)ec4*KA~fgfx6Ov;4lMM{KeZxk z%LUp%4q6ty7b{kTwp{D>*=H6GEcRT4vhBBO8n2A5)^yIETxeYtI<@n|A+|n09P@$a zr}Mpjer7lq9mmd30^^}vHUR&dR3keJ5OJ`>{i+3QzIZa4QdBF;vE8?`tae-vqW4oU zLv4RgW6q9%?c1ik9K-?y2jC~8$5^2ETvN;JvAMAYc|N%!wB)w6g3dU<$j{HL2(3AH z)xz0jcgw2KqU{uRuwkc)4)QOdqVBS<7hICfOwi+eMQ`(OUa5TCaq@EhC0P9b2Inr)V!X(?ri@gXk4~mkpzwSaq2P zSRwkwYQWYo6Rx({qid=zSg*Q!bsJkZ<2ti#12jGwPel^}P+)Wl?6#;9n2gE+G#NUW z^&p9w=r*z%6lEg_PQ$3AdK9n)K*8gI%aM`vL`tL`P^_A%B!fF3t7ds3nT*mYx(CqN zijZc~1XznZDC4u94Y&Z(P}V+_o}e))fXXE6z*z@+&<~Jk)`|1drbRnr>15DA4?{N; zWuz*cq7&e@(c_3gW(1pW2=k1ro`c*SJYf*@723>YHjb>;W3QU80m6X^dKnx5iN+=I znn2HBA1@&I3j{ABKpEQ~bZ-_FjQd&rk`Tw1Di1cFDu9|)v`!dlGcsn~;k=mDM3JI5 ziw;!a6u}|*$^Qu8OXQZ@zaTH|=wEdYfV9}RA`IMWYQ55PwR^Rxb4l*`sr(7TV`uSP>p&mEIU)UjRW!R4|S`PDY%2v(?0q$ zl}<+EDf$$QPd|eIXL5~aN{r%CR0F6zONPeNG_Gh|cfX>@GM#BUO5<9SG(5D9DCRVB zwr|?cGRu_h7^7}hU&2OK1Y6N-3JVb6B;~^ZKs=*ypR@T^1m7)R?Um-M+|}V#-yV?G zu@zyDZoRjI-JYwdy=xYbluSrUrV=S(mq8bm;gPk;sdPq?Q3zx!GwDQJiQ_`qyPZlH zIMNEu!^1I#wM8>XJ1p;*kRe7SMyIs;sVU~Cu@?`BzcCO4N;;KE%UXx#=txFuH!_h( zuyJ57vmS*`C}U|l0-+@^`dJNULV~CUIDhfNHyfM5|!2Y0T}wt?Znzyc!+wtw$_9Jfn?ckmgs`3{Sp zR6;TMG^+JfY{N!kvw>AYz>H4e3gp5J*ht_lkFnFknFUI-A1;=b&*BbKYM>VIWHmhi z)T7$4S4|si5Cq5unAZLpK)E2$&4dEm7aXv#W-91x&m-Gkx0aa-Xh`Tv9z-Cj`Rf%$ zp@H!llUP|W0&Jp57Zakh%oVdwxq@!h)vJq)f=AlPL`3}eJa9|{#vhpK4JhfrFa&C# zvaaJHbZ1M(Q;`H53`8Gn$>{k)o^FR#s%CKG6(A;x!Qia-GcQCtvM+vq?@Y%F-5p`f zw1MJ9gGDt%_yU+OrCM~WnvwOLy--LqSi_H<7fgf*#1A8y9)7S}xRp!zCiI3$wkxne zEtTBFm$G;RLzEa77KpLf2JVZL%qUXTcaY2j*{60AojbTLC2-FtO zs5bEQCnV=F@jeRSg z<4ca?;4UKaD^p9B8fLuqe=J6$c^=cyN153E0=~{7U=rJcIdtPTE5s?pAy|Z;jA9!s z3~x=&V$XT2a^C8kZ|^#UQ%qnt46PBP(R9~As_L$6U)pzU)z_y9@xEIv+ut(0^|>2^ zt1X9@gu2C^72)s);A&2-7;DkNtVLI|j^#8190yd*5bTYdm8Mk7Fhunu;J;}G7c=Qh zR8l5rDpp`6s80y0>zCoT_FrI0oFE*qpp{G$3LZZYf^J>F<(AGUKnP^o01*Lxs~93O z>t*ANpct`pOyfF(Yp)DWbJM8F@th&e7^jWnsQxx3owPUtk7=~tPx+o|h4g$V21cB|kP#MQa=BA|2?Ycu> z~iP04zV421@t>+1(}Ufo`Ud*7PJ(udTtm_FQ!P}B zSjs9tDk-XqM&rPH5V?=%?75t9GO}J_cG~j9Y_)FR$=&tT=AaB=W{_O^D&u;$hu5R*l7ynL| z176;FVt3C@{@tBM$iKUrhf0h@R3i(y2Zi_oP{EzA1E8-#m-MRuKzmi{hk-X;aCvIg z-@#Ka5JU{x6cb7?CTab=E@?-}Oqn#GrqZpWyuBnrMNZMZC5ZWL^nxBFH%k$empBD| zVwOY}S1fzC)B-*WzZt`{VH~g8HYL#t7h*PD{)03&93d})=QG1wNlB)bq*`ee(`4MW zsWW(;=HDzx>zCovgU`cmyv|?Zzic#;89{VR3+EtAKMgsY7hKgF3)lVis3}TPQLKuC zUidcmHSR%s0N4JF0+}&^j^VbF(@fKB<`IjudYU^b^sar8d*W*uChzY`E^z_j|@NZ5bkukyrk? z@R5LVa)a&!eFPs5yhIegJ|rjrO zBM-`nJI`*hHHl1j0W#aj^ePj}GTeZsW6^|ssCX5+232Ha&RKG0&vy=f>)@<*u?Nxg zdBFCoCM_bU-&}%^8v~aR*G|6%73kjq$kz9#%XE04;10m$F+>qnSN}j{Ff?@f4uNUA@ywKafpUE z(Q9p44#uQj{yKMw0~UQoAX7$Wz?kTB&@GFgYB9=gcvT^c$kEp!2LjsAxmz`AyI1B> z+_yeCDy7iTKa_3S`e^j>MxX)t6m-Tk=RLx;f11m+KYZi#zn=N=nV-(AK6N_JlZSSG z=ghaxT%EZ=Z-^`HJwNSUZtuO>etfz8_%EJVdOp0;{=&`n$Z~sR>9ZqCW8*j5&)#MI z{82``_@!=lx0j<)$Wjr(P5?mAL@hHB{uW+U^U%QQ!Jg0%-2J7g!t^_3UdkYr znjqi{VG7k+G%-{o#IL~7L_0T($&mTbv7yWlQ5-p0hT=sTf@lW;MEQ3ch9dukBb8 zd~m1pzNhgX@f)k=OnF0;|$hUdkuW4N8!DGAFzZkmMf77>p*|+^yzF?k*vL#mlzEElN zKofVvO;^jZt7WNm->Pf>NBM0q^4;CUQC+sc{ki&%2bePJKH_T5RaU*4o=@MdZ(98P zO8riVs{XDq@NKU~N4d_&*Lft&HNv$p*z`R2yPcg^Kfki`35fi;ZR=L6%QSl|?_Gku2aCIZ&jWM(Xx%|vup^w+lXJLlJ8SL{-dgoLhkMD_er4o4$!{gEMBaM(hw_a* zKREZ+*plZ{tB%9Jb@=|2H$$~Q%Xq8wQg?H&m3zCbHDolrW9E(=GQDGQV!k>MBBpm9 z^FsbzTlapb^lyhac=;JIA~bq>O~#*9xAk(ypFLrM(w}oi!2I0ALp~T(%_C8;oilL? z)`7+Z-~&vFa1$1fQ50F4gN}9)WqvqPooa#)15nDNJk{>I&>5s?)6j2V1wn%E(r6{V zqccd+L02J^iSbDGVMUSgR5u26G&?#%wNK~lWuKC1UnsRZf86y7!z&y`! z+n?$7orC)DSa;NM z=0&IjkM#ovu3^EuAg&Y0tXu5RYkZx+Bj4h#w*A5A^PApxc<%DVTC;fW>hYVqdX{(f z{37~m%d_`*D9d-cu$I@msn+4;U59_M=hv2J?(tA|x8q^EVb;9Uc`N2@#AI8uW9~WlQr3Hut6t`+bAl)5ZF|46 zYGG!nq8%7)%O`yE;xy&W#89(FtOi5g*UNFfrEOh#!ZQ0q_gH4F3JUMC!av@N;=2BL X?_roXy9#;dKtqTV-mPm0nN0r&zV3hw diff --git a/app/api/fact_check.py b/app/api/fact_check.py index 3e7a12d..dcd30de 100644 --- a/app/api/fact_check.py +++ b/app/api/fact_check.py @@ -1,11 +1,13 @@ from fastapi import APIRouter, HTTPException -from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict -from typing import Dict, List, Optional, Union -import requests -from enum import Enum -from datetime import datetime import json -from app.config import GOOGLE_FACT_CHECK_API_KEY, GOOGLE_FACT_CHECK_BASE_URL +from datetime import datetime +from typing import Dict + +from app.config import GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL +from app.models.fact_check_models import ( + FactCheckResponse, FactCheckRequest, Claim, ErrorResponse +) +from app.websites.fact_checker_website import fetch_fact_checks, get_all_sources fact_check_router = APIRouter() @@ -15,199 +17,6 @@ class CustomJSONEncoder(json.JSONEncoder): return obj.isoformat() return super().default(obj) -class ErrorResponse(BaseModel): - detail: str - error_code: str = Field(..., description="Unique error code for this type of error") - timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) - path: Optional[str] = Field(None, description="The endpoint path where error occurred") - - model_config = ConfigDict(json_schema_extra={ - "example": { - "detail": "Error description", - "error_code": "ERROR_CODE", - "timestamp": "2024-12-09T16:49:30.905765", - "path": "/check-facts" - } - }) - -class RequestValidationError(BaseModel): - loc: List[str] - msg: str - type: str - -class Publisher(BaseModel): - name: str - site: Optional[str] = Field(None, description="Publisher's website") - - @validator('site') - def validate_site(cls, v): - if v and not (v.startswith('http://') or v.startswith('https://')): - return f"https://{v}" - return v - -class ClaimReview(BaseModel): - publisher: Publisher - url: Optional[HttpUrl] = None - title: Optional[str] = None - reviewDate: Optional[str] = None - textualRating: Optional[str] = None - languageCode: str = Field(default="en-US") - -class Claim(BaseModel): - text: str - claimant: Optional[str] = None - claimDate: Optional[str] = None - claimReview: List[ClaimReview] - -class FactCheckResponse(BaseModel): - query: str = Field(..., description="Original query that was fact-checked") - total_claims_found: int = Field(..., ge=0) - results: List[Claim] = Field(default_factory=list) - summary: Dict[str, int] = Field(...) - - model_config = ConfigDict(json_schema_extra={ - "example": { - "query": "Example claim", - "total_claims_found": 1, - "results": [{ - "text": "Example claim text", - "claimant": "Source name", - "claimReview": [{ - "publisher": { - "name": "Fact Checker", - "site": "factchecker.com" - }, - "textualRating": "True" - }] - }], - "summary": { - "total_sources": 1, - "fact_checking_sites_queried": 10 - } - } - }) - -class SourceType(str, Enum): - FACT_CHECKER = "fact_checker" - NEWS_SITE = "news_site" - -class FactCheckSource(BaseModel): - domain: str - type: SourceType - priority: int = Field(default=1, ge=1, le=10) - - model_config = ConfigDict(json_schema_extra={ - "example": { - "domain": "factcheck.org", - "type": "fact_checker", - "priority": 1 - } - }) - -# Sources configuration with validation -SOURCES = { - "fact_checkers": [ - FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) - for domain in [ - "factcheck.org", - "snopes.com", - "politifact.com", - "reuters.com", - "bbc.com", - "apnews.com", - "usatoday.com", - "nytimes.com", - "washingtonpost.com", - "afp.com", - "fullfact.org", - "truthorfiction.com", - "leadstories.com", - "altnews.in", - "boomlive.in", - "en.prothomalo.com" - ] - ], - "news_sites": [ - FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) - for domain in [ - "www.thedailystar.net", - "www.thefinancialexpress.com.bd", - "www.theindependentbd.com", - "www.dhakatribune.com", - "www.newagebd.net", - "www.observerbd.com", - "www.daily-sun.com", - "www.tbsnews.net", - "www.businesspostbd.com", - "www.banglanews24.com/english", - "www.bdnews24.com/english", - "www.risingbd.com/english", - "www.dailyindustry.news", - "www.bangladeshpost.net", - "www.daily-bangladesh.com/english" - ] - ] -} - -class FactCheckRequest(BaseModel): - content: str = Field( - ..., - min_length=10, - max_length=1000, - description="The claim to be fact-checked" - ) - language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") - max_results_per_source: int = Field(default=10, ge=1, le=50) - - @validator('content') - def validate_content(cls, v): - if not v.strip(): - raise ValueError("Content cannot be empty or just whitespace") - return v.strip() - -async def fetch_fact_checks( - api_key: str, - base_url: str, - query: str, - site: FactCheckSource -) -> Dict: - """ - Fetch fact checks from a specific site using the Google Fact Check API - """ - try: - if not api_key or not base_url: - raise ValueError("API key or base URL not configured") - - params = { - "key": api_key, - "query": query, - "languageCode": "en-US", - "reviewPublisherSiteFilter": site.domain, - "pageSize": 10 - } - - response = requests.get(base_url, params=params) - response.raise_for_status() - return response.json() - except requests.RequestException as e: - raise HTTPException( - status_code=503, - detail=ErrorResponse( - detail=f"Error fetching from {site.domain}: {str(e)}", - error_code="FACT_CHECK_SERVICE_ERROR", - path="/check-facts" - ).dict() - ) - except ValueError as e: - raise HTTPException( - status_code=500, - detail=ErrorResponse( - detail=str(e), - error_code="CONFIGURATION_ERROR", - path="/check-facts" - ).dict() - ) - @fact_check_router.post( "/check-facts", response_model=FactCheckResponse, @@ -225,7 +34,7 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: all_results = [] # Validate configuration - if not GOOGLE_FACT_CHECK_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: + if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: raise HTTPException( status_code=500, detail=ErrorResponse( @@ -235,17 +44,13 @@ async def check_facts(request: FactCheckRequest) -> FactCheckResponse: ).dict() ) - # Check all sources in priority order - all_sources = ( - SOURCES["fact_checkers"] + - SOURCES["news_sites"] - ) - all_sources.sort(key=lambda x: x.priority) + # Get all sources in priority order + all_sources = get_all_sources() for source in all_sources: try: result = await fetch_fact_checks( - GOOGLE_FACT_CHECK_API_KEY, + GOOGLE_API_KEY, GOOGLE_FACT_CHECK_BASE_URL, request.content, source diff --git a/app/config.py b/app/config.py index d9de9e9..a13fd4d 100644 --- a/app/config.py +++ b/app/config.py @@ -3,7 +3,7 @@ from dotenv import load_dotenv load_dotenv() -GOOGLE_FACT_CHECK_API_KEY = os.environ["GOOGLE_FACT_CHECK_API_KEY"] +GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] GOOGLE_FACT_CHECK_BASE_URL= os.environ["GOOGLE_FACT_CHECK_BASE_URL"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e2c8b7cf78c3bf747b3006eb0a7078967671dd0e GIT binary patch literal 5577 zcmaJ_+iw$B8lUlu?{*R=j&VW)PC^N@+ay3*7AR$DO=!Z!R0;HAwsi;3AsOl$XT}73 zsgxEfCA%xF`$VL&5-%$ikq4?>seRc0V5L=%y;|pJtG3#g<&7xbmp<+Hov|Iq%QBX~ z`TU*BocS)l^E>{l-|rFN`pawY&ky(o;a~WpaXISBR!|g#dx9otVoFGhaWQR++tN~8 zO3QIMZI9bUY_p{tX=mJ-cEw$3cihcwQp%I|#=TsZQ@*q>?h^%@@Rp$2?+TiOh>s-e z8F4?iI>G8{v378)8?2rdYbUpQ!Rl+Vc5$m8tQ{@Z0JnC6wX4P2&8-2jc9S0BeI(ac z5#;tBum@Xu_i}45SVJw=P+^x6eu@JXJ9EFO8stJ&BPnJ-qmz`zoYSV6yG&EeeN#>8 znrdb#^F*_mYx?|YJ!vWulkv{v^SWU&*TtNvXESO_5t%%d$*1A&fPQi<%I(T^5K$WKBE)R+~WVnoX0|a9`HNweIF@-hx}{Vg4yfvvigixopND z%%Kre)l=ie1D7-UjXa4EY>p(sWsz&3m-CohbMtys*RI=}rl!(pn-dfVhQPAp{8WFl`G3$UKTgsK@iJ(W`F#5FZ(Mi)r(;|VpFn@D3y7~GghqBUVz z#&dl1%E`H#tSPbSZmCoe1sbQiRM zlR7nWP0gpw1U@-S3(So7c1_+v;eNoXt}JOF(WzET!<_#5ZZ8mHkr&*N;D>8 z6LXSVFe*ig0luKf4);&Z&R(2NL@%D6D)t{fc=(mELx;x>9=mkt$njT>9iKcne(d0@ zzc})0(Ko^0a|~BwC^GGYRnl%011JWqMF*}M*-XNKu4y$vZkbe7e9V zQFJr}qMdOS#OFfQ*LmmMMrikHbbajIO6a|%vn8o4zqirb|K#TvD!s9#nPsCa$2R)+ zK9JWB$1D9;mQF7pC{3=6mE|ix_6$~Cfoe#p_V2Iuy;=AMrFx_x)1s~-Q| z8SA~U-;VdHqpPs>9<;Q-*9#yEV5tr}4d8NV_6E#o0AY=ajk^mr#mU@n=dY%81M-{6 zm@ctmPt!7LL>9?aLpKTSf>kgX)nqF|p!e}xU*&C)j@}lpH_-%I>X#-q)?kGGPY+wO z2gK>N_@(d%+njJ)1Z0t7Mdt$S496!XiY` z;fc!p^?igSaOuTe%~u=W(Xa=G7%ziZ5`OghO6tAT7pVuwDxtB*fo}%B9{Bcp<yH=)uw0A7uEZakyHpw2?k_5*tu3346)9lD2j1FE~cR@Tqk2G7{P8`Hp z&l_GDU~_@ZUAM_tEnxGs*m!<{&D&ygU_aPOeVTL4)re=@51XN^xp~Fe6c6)8Q>vbx zB{y{f`$VphfH5ZJVehw_x|t%(LwWmY*w2~Y1h@k{n+5R7%>QI3nX$`rkO0m)j5A+K z&CKW3c@l-aL+NOb3HLCs(c`c_D;&6)(brJ$h~eVs>nM2GPNCKor=w^?u@An++aMrL zo#C<^t_}_@MVG^+(I@WyvOHAXy?1G9`DiJ+@_Jd`TaD}kEOf2*eBm$4`>G?OOBYH< zSEFA*+vrAk@WFf~Jic_kG*Xertr+4Xo*zSe<_lxksQpF^oe-ObjmMAxF=@pIzyJrc z^LPNvftO>9MhRcBy)>N#KC zGrTmtyihV$(q(zLx_1Qjja#eI$|q%cq#Ecgef%UaT#<(@PvHbRJjHoA7`OfgqSaH7 zlaeNCwl%3q?_fV54vl-I-fCZSG+UkEDTn5)=d(oIE&1#rUgG0kbXZw!jc>{K4w&0@ z%~tQ@sn61>d0Q!<)_vcm^=QHSws}$Oy)S|EYawkHSi_ti_BChhD)cM6S>Q~ACp9SI zcyZrFs?X~{f+Mhr(E@Nn)r>5vMg+OU7^f9Pv*6p#dr-tjs2$rpVcjP1fmDLW)<|5- z<}(^a5MoYB44_8_a~k<{TBQYw5Lxt3SzIT=M?6KGFLv*+MKFho-nnd^CP@T0l%fw; z9f=~5Bed9w>#~+b$CKH#Vxto<4Mm(}@+F!lU^#{sEJk3IQPg=_odNABMp&@{BJh

`SOz?y|pM{0tk?I8Y z3U_P{b|dEsBIgQjb1q!>jw);H?^{7=*{(88j7zW|$OTD}na4_jOTe6%Z#z0bGu#=n zXz&ol6nmTdneWUS(MyTw^i=fR)GTwyrrw=P%*|YyYIo>6@MMZ2244eD9e0Il&(PBO zYS->h>zoRkcdQ$UEnZt6f!JPm>oE}OR<>9o)U-pB;3Kcuno{Hzl*pC`WM3^>qb)2g1K2znG(P7%=`O7xegp9R5eu=TleRL@e~ zY*bydWX7{}p1up+pe{bZA$+2bP>XU;n<@& z8UfSb&yFTB_7=L70PAW7uYNul=fTqdEdl5a;1Q97V8qNuuEKE~BHch+_?yC$)-#C| z$;`t6(UVqh)pZ(xcVJF9RGB1W(v$G3*z@5Pb*%WouMdARcIAz+I4b)90x=_)6Hb^w zzBA0#;5R&Lo+66S53xrGC$EEbB1dS#Lc|x7^yd-u#@icS6JF4|wn_VE;`2#+V-%2d zTdepq_^SqAERy6WP4o*%=gdL`{;Rm2H(*~{02XfK)FgmBvjdXqIp%?LSDx@A7oCLX z8n{0-_}`Gv)5H>%Ox<-qHBm z!n!NWX+%u%wU-UA69RLz6wo~W#K|eI9Zu)d_%noJ`*3^}f1*gh*T5(N*7t;~E^oD` zx7xF-8a!Ns!>t1ld~Qpy+Z~%ep|}6R==$VzB{*Z1)6B;3=wsXC8{f`WhEJ`_{j1Tk zdiabTZZ(ff43yk80dBRR zL!2ln04d@`txFb1mZ3k~YQBEaxe9&YRy%4F2TDC9ttNm{b9rG_y(Yk|*54ual?wk6 I;KrTyU$OlTbN~PV literal 0 HcmV?d00001 diff --git a/app/models/fact_check_models.py b/app/models/fact_check_models.py new file mode 100644 index 0000000..6c85771 --- /dev/null +++ b/app/models/fact_check_models.py @@ -0,0 +1,109 @@ +from pydantic import BaseModel, Field, HttpUrl, validator, ConfigDict +from typing import Dict, List, Optional +from enum import Enum +from datetime import datetime + +class ErrorResponse(BaseModel): + detail: str + error_code: str = Field(..., description="Unique error code for this type of error") + timestamp: str = Field(default_factory=lambda: datetime.now().isoformat()) + path: Optional[str] = Field(None, description="The endpoint path where error occurred") + + model_config = ConfigDict(json_schema_extra={ + "example": { + "detail": "Error description", + "error_code": "ERROR_CODE", + "timestamp": "2024-12-09T16:49:30.905765", + "path": "/check-facts" + } + }) + +class RequestValidationError(BaseModel): + loc: List[str] + msg: str + type: str + +class Publisher(BaseModel): + name: str + site: Optional[str] = Field(None, description="Publisher's website") + + @validator('site') + def validate_site(cls, v): + if v and not (v.startswith('http://') or v.startswith('https://')): + return f"https://{v}" + return v + +class ClaimReview(BaseModel): + publisher: Publisher + url: Optional[HttpUrl] = None + title: Optional[str] = None + reviewDate: Optional[str] = None + textualRating: Optional[str] = None + languageCode: str = Field(default="en-US") + +class Claim(BaseModel): + text: str + claimant: Optional[str] = None + claimDate: Optional[str] = None + claimReview: List[ClaimReview] + +class FactCheckResponse(BaseModel): + query: str = Field(..., description="Original query that was fact-checked") + total_claims_found: int = Field(..., ge=0) + results: List[Claim] = Field(default_factory=list) + summary: Dict[str, int] = Field(...) + + model_config = ConfigDict(json_schema_extra={ + "example": { + "query": "Example claim", + "total_claims_found": 1, + "results": [{ + "text": "Example claim text", + "claimant": "Source name", + "claimReview": [{ + "publisher": { + "name": "Fact Checker", + "site": "factchecker.com" + }, + "textualRating": "True" + }] + }], + "summary": { + "total_sources": 1, + "fact_checking_sites_queried": 10 + } + } + }) + +class SourceType(str, Enum): + FACT_CHECKER = "fact_checker" + NEWS_SITE = "news_site" + +class FactCheckSource(BaseModel): + domain: str + type: SourceType + priority: int = Field(default=1, ge=1, le=10) + + model_config = ConfigDict(json_schema_extra={ + "example": { + "domain": "factcheck.org", + "type": "fact_checker", + "priority": 1 + } + }) + +class FactCheckRequest(BaseModel): + content: str = Field( + ..., + min_length=10, + max_length=1000, + description="The claim to be fact-checked" + ) + language: str = Field(default="en-US", pattern="^[a-z]{2}-[A-Z]{2}$") + max_results_per_source: int = Field(default=10, ge=1, le=50) + + @validator('content') + def validate_content(cls, v): + if not v.strip(): + raise ValueError("Content cannot be empty or just whitespace") + return v.strip() \ No newline at end of file diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fe153f4fe7eb6088bf019108f8b358e5f779ac9 GIT binary patch literal 3341 zcmbsrOKcm*b(Xs%S4&c)C|R;4#aPEi0ooSnG)dz&t`Ui*Y}cvmin7xXO^GFUNUglw z-Oemclb~9`^&vq5q(Ki>krW7!iw&!AjzQZ)om>JGC?v#)*{zBM=%wzZv?Vm3_Ru#& zQgQ^Rmkuy9Z{GjSzV~MT7>Otd#-E;jv3xg#(BIghH#h|j8!-W)Ye+}BV4{kU6Dq!( zPhe-CDdt4jMN_KybAFDK%s?fW3s&TuTf9XIx70gFkc!$Xc74k4mJ^1 z7KVG~ws$?S61*vP&Tsf$+arP0BGTor)v5bkJO>O<;z9%5boE*Fe5PgaZrz9 zp`_@st5Plsun!0KfSQs|9{^0O?khsH%!3-NJtz^s$4n)Bo^-+}x}NmBLEV{z;6+Z& z&7I3!E@Hrcj!ioXgIdYqoZ(W{G`St-L);}*citu?h#Z3C98nW% zI(2PgboD8k>2kY<)d&@AyJ8xba1ss!*h)IY1_c$(w3+ZdNHCNTy?_oV!}25-Lic2c z7@*FrC!&p^TCJ9J=drFCW}UhkNm|%#?CoMphNW3WLo@MZhhWM>O&0XV?k?T1bnIYQ zEVrO@Z@E5wUc0Ed#3)oP%&{@X4?b&U41CN&oMG$&#pDtuUD*MKAwjzSF3SMF%qlHr!PHxOl& zx}?q`pj{8RB^}dw{z;i<>bCcVE$YKw-1Bwvn&(}fCmy7fr!cK~YPgtsLCrDp7jfN_ z3mV1wDlt7jq%Ns@5@YZJ1iMvY6(v?8-4TZU=P)<^0U!Ge5}>2>p(Y}}ra(h3O8`D} z;bs@UA~aFcr~8`X0`q9c&_!K(%fBeO{vKf=(1W)3rqGmD{5QMui~gp6p>LbogKl;K z0RbJl;Lg#bXVHG-ZpE~~BHA$yAd*=OxX~V354ZzcaPZNQya}Dn9qe&61HJg)M8B5w zK3!P}^?KpLt{&I+-t%oG+>7#xphp%1KMm9bcZ&`DdipDV^>6$2DBF7U|FK&`m&F%R zP56yi6QF^eQ&bq!7@C;MT8PSXimD}IS5!@<4lWwdNmQ0PDl3_?3YD&&wC%Eq)oCWd z+ow8t?lf1`cpM-V@=vvi$}&(rKl{9D*`V3BN=CU#u-@=vYvO$N9(%+Reoq8$!Y}nbDUl#kAwnPVBpup&B1l*U zlQ>)01(G4QjKjhVb>A=nQzpm0GmI}!rl7auRMn;P5Li704})WvrbbdMe*Q62n&YHu zxWF3AN=)1uyD`?Y`dhhYyXa6wpy9@7~S{=!>#-48N+P@i;hWbB~QFQPX{ngTDBp6lNDA=d`8w`hs zzp80{WOVt7UtVgB9r<5^Z4s>hhkgrUz9OWb6s}H=Opc@5V+_7MKA1it-cC)Tsg(GB zDh!ABpFNU3Bz|zn5AX*^L|`g{mwfC5FHo`h2iNR9b>ilqEFw3!x zQCP0Q>#zX(&H>g0_|YN}AV6sfmnjod$poxl%EwHtQqZ;IjeU^&d2l7)btQSsw2PWa zj|0$QVOK&~e^^4-e{d^zBe#S$-~>tu`=NwN3}(-spG{}7UZ4{Pm<_|(#w_0YgC4z2Bf=1%D7s&tgFyiN>w z0r>ra#zNSI6ZWmh0a(0XNrM7*3|^<+I22H_0_A1W|`U(vo#5dH|^c6eY!`ImoaIPhsWeqTg=yO%Fr|K8oh>9xb@54Dfwa}PwIwUaT% zH8{9~>$$bV&wcpB$MW|dh(NnPaWLdt3bubJBm|`uiM2)8KkXlA`(cM74@cWU2FWNi z+U{eJf&$7$h(Q+wzwqtlp=En*PjV9h{KbQc4=jK2;2>zGEW&rjCyz_F{o_+&;J?kq Bfqwu1 literal 0 HcmV?d00001 diff --git a/app/websites/fact_checker_website.py b/app/websites/fact_checker_website.py new file mode 100644 index 0000000..d6fae44 --- /dev/null +++ b/app/websites/fact_checker_website.py @@ -0,0 +1,99 @@ +from typing import Dict, List +import requests +from fastapi import HTTPException +from app.models.fact_check_models import FactCheckSource, ErrorResponse, FactCheckRequest, SourceType + +# Sources configuration with validation +SOURCES = { + "fact_checkers": [ + FactCheckSource(domain=domain, type=SourceType.FACT_CHECKER, priority=1) + for domain in [ + "factcheck.org", + "snopes.com", + "politifact.com", + "reuters.com", + "bbc.com", + "apnews.com", + "usatoday.com", + "nytimes.com", + "washingtonpost.com", + "afp.com", + "fullfact.org", + "truthorfiction.com", + "leadstories.com", + "altnews.in", + "boomlive.in", + "en.prothomalo.com" + ] + ], + "news_sites": [ + FactCheckSource(domain=domain, type=SourceType.NEWS_SITE, priority=2) + for domain in [ + "www.thedailystar.net", + "www.thefinancialexpress.com.bd", + "www.theindependentbd.com", + "www.dhakatribune.com", + "www.newagebd.net", + "www.observerbd.com", + "www.daily-sun.com", + "www.tbsnews.net", + "www.businesspostbd.com", + "www.banglanews24.com/english", + "www.bdnews24.com/english", + "www.risingbd.com/english", + "www.dailyindustry.news", + "www.bangladeshpost.net", + "www.daily-bangladesh.com/english" + ] + ] +} + +async def fetch_fact_checks( + api_key: str, + base_url: str, + query: str, + site: FactCheckSource +) -> Dict: + """ + Fetch fact checks from a specific site using the Google Fact Check API + """ + try: + if not api_key or not base_url: + raise ValueError("API key or base URL not configured") + + params = { + "key": api_key, + "query": query, + "languageCode": "en-US", + "reviewPublisherSiteFilter": site.domain, + "pageSize": 10 + } + + response = requests.get(base_url, params=params) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + raise HTTPException( + status_code=503, + detail=ErrorResponse( + detail=f"Error fetching from {site.domain}: {str(e)}", + error_code="FACT_CHECK_SERVICE_ERROR", + path="/check-facts" + ).dict() + ) + except ValueError as e: + raise HTTPException( + status_code=500, + detail=ErrorResponse( + detail=str(e), + error_code="CONFIGURATION_ERROR", + path="/check-facts" + ).dict() + ) + +def get_all_sources() -> List[FactCheckSource]: + """ + Get all sources sorted by priority + """ + all_sources = SOURCES["fact_checkers"] + SOURCES["news_sites"] + return sorted(all_sources, key=lambda x: x.priority) \ No newline at end of file