From f7b2d77ce485c3b1baafc2d0e723ba62bfbd53c2 Mon Sep 17 00:00:00 2001 From: smfahim25 Date: Wed, 9 Apr 2025 17:02:56 +0600 Subject: [PATCH] added perplexity for checking fact --- app/__pycache__/__init__.cpython-312.pyc | Bin 147 -> 165 bytes app/__pycache__/config.cpython-312.pyc | Bin 646 -> 735 bytes app/api/__pycache__/__init__.cpython-312.pyc | Bin 151 -> 169 bytes .../__pycache__/fact_check.cpython-312.pyc | Bin 16843 -> 23123 bytes app/api/fact_check.py | 266 +++++++++++++++--- app/config.py | 1 + .../fact_check_models.cpython-312.pyc | Bin 5979 -> 5997 bytes .../fact_checker_website.cpython-312.pyc | Bin 4880 -> 4898 bytes requirements.txt | 2 +- 9 files changed, 225 insertions(+), 44 deletions(-) diff --git a/app/__pycache__/__init__.cpython-312.pyc b/app/__pycache__/__init__.cpython-312.pyc index ba12e2f8b81a4d298ac0fe8ffaaf83d795a34fb8..477fd12b0fb8e52ada45823f3349caa6b2ede074 100644 GIT binary patch delta 76 zcmbQtxRjCmG%qg~0}x0=Cr#uwHg(YtElw>e)-Tq}O-szk%+1IzE>6wUcgasK%}vcK cDb`O*OfJz)&PYwpPA$?+N(54QDH9Wo0As)zE&u=k delta 58 zcmZ3=IGK_AG%qg~0}x2<4x7krtZby8k)NBYUs_U}k)M)Ush^lqT#%WQlUSsmmY7`P MoRONGJ+asb0O+t1%m4rY diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc index f94cc0b8b460774ed4b789e858d80b86129ba038..538cf221080cbebe2635db881ffb711bc7850df6 100644 GIT binary patch delta 212 zcmZo;z0b;fnwOW00SK;1eoJ3Ckyn!O$3*qp6aB=wIVFLjQCyXbnp~6F7}JG?0$hUv zd|V?uLn7lH13crsT_Y#wFv>G>Pwru~a4G^BvXbF5$f#dV`k}?CMaBBXdbw$d8JW2m z`NhSldHOE-$)&lec_qdAX^F`ty2%--$=Rtzx=D#ZDlcX7dqznvL7;X}>Vk~UMM2#Lz8n04{Z*Y+GxRR j7l%!5eoARhs$G!)&;UjtF3y=8%@j0wFOwV#ACLe)-Tq}O-szk%+1IzE>6wUcgasK%}vcK cDb`O*OfJz)&PYwpPA$?+N(54QDH9XT0A~CdJ^%m! delta 58 zcmZ3@JxF9npC$UIBEit*o MIU_YWdt$K}00V3jW&i*H diff --git a/app/api/__pycache__/fact_check.cpython-312.pyc b/app/api/__pycache__/fact_check.cpython-312.pyc index 39e026b459e74501494bf259f6d8712e331802e3..c3f0bdb8815c267a1c6af7007a2ad753d4b9b210 100644 GIT binary patch delta 9458 zcmbtaYj9iFaXuGs0w72N#ET@TOMox%0ZP=%q9jVRNJ_LQijgckq9ijA?j;Et5BlCq z(}aOQjWZpl)l8%!sid)8F|Ma!M@q|7?o{ryjqG^F_QWFtfjWe*>9I4DAI+bJmYq0N z@}s*4013%*W|~67#d+@T*|X=I-4FWN3rzMk#`LrDay^01YsY^(nI5}nawr-V7hAiM zijZbZ!x2sq(vE52TN%=Y%f`yW`Z0ajFlGoF$BYbtc2%f6Ts~G#%j%FRTrpM=t{khR zbxo)$Y#uX*En^m1*M_RY)-h|?HfE!BUC17Gj5%nzEaVK=jMdPxK2)0w*NxSMU1P3r z{aAf?)7YjkJI00^#u^x+AiQe2A9cR}!+WkV`IfOp7{gpEbdNPb&B|A-_clY_#y79m zTcGYJ>2Ku>eA3Gqc|GrS0+R_9SN?6)SQ}^Jv9*kM@%0mmLQe(sw8I4K@xsVTsCRHx zv<|Jjaebs2dO9f&3-9A=Vp>8T;HKfKVN4f}>EW!v!p4`d;OzI+?pH+5IbiB$zG*s; z>eE@eoTX}Bueqkk8-uIm>n0Rj-M3Zi*6E&CcwMP_MW2_Eb(~+~rC^x%DrD2q(Seb^ zq4B=aq4C25CuQ^Kz_HQcfv*oeelqnv#T=ta8I=c8PbnL3d|!D?ao7O@WR=7}C&>&i zDxt=*?7hdjPKdl9c8R|5gnud+o{EYhAL;6k24=&2L=w9u`~j&mFvSO+=7r9)e)x)T zDOS^=Zb1&IuW7z$FNe|#@b}r6j*!EIA(w~*)%-_1AQhHI$g~=lZfA-c%Mb}{od3j0 z>Y1JUjw?7Nr@F4bR%ky?I8B1#wD3>2!ShA(vVtpnNtsY^dd|QZx$>73oQbQ56WRl{ zNd;H=k|LojBT`k$Z?9|Qs;--hVz-eO7>+y-CirXRCFP&1)g+;ctER1~XKa=xCa#Dp zuUoF6f#CB393eVVRT!y`tEcU$ALv|4^|UkfhOV|<69>0IXXC7t+9f6lsEu(w^gUy%hHa=f6+08<>r-4`nqrC@=oCx2sYLo_ zFpj0Xu;+=2;{0U&Bq?(3C*NfPib)bsoY?|KVx*l#-Q$e3YolB=6u0SNt+>|@Yll0J zz20z5ljn0$aek|cBr3s;imGgPZ!9HICGB1pSS9T#mW!$i+qt@gIbJor50+Ka(NE4) z5^@?=m#`$NKMZ$UF)j%6xR>j~XB+Tc{DiSwO_ffM}tqW}`5o#?FYW)hKwz&1- zLTwKbYNJAJd7({K=|FKEaS|_&SHvshRdFlVEDaRfrw4I8is>PAhh$69(rD9-(-%;H zE#Y6$7%xD*^w==uIN3=qD9*o1>0f$?gtrX+OxG1l8{dsYI0a{ZQ?X^5&6iRKb>$U~ zxK^rP?{T9SISYp{Q5G)~njSnz%ChMus=c+SeepY?ix?!lpXeuEa^Oy)w*NFoo>QJA zbKrvv_+WyWgH@dWGY{E9=n7SEG0@|_lcZfJ{jWOghWl^2-f(GuQE|@8wUwIxBp5$o|E3+TLHa!hs^(Syk#dglwvWUb zMtNZ-#6K65=2>_M*r)kOa|4z0|Nz#ni)79ml z3HoN%=lcA?uE3OE>Iy`|z)lKABVw%5KQj{w2K-p-niivxnD$ZLFYp5ERmvKFAi&Q^ zF(XKl;HBu4=4W`hyl+;TiVDFPog}Mal3$QD;=CyFVgDSHB;;gtmIWrPpPdo-Kv3ja zd}7$b6XRq1BL2{PjAx}O=!u3x(Yat`k`09X!7%HOa4g45ybumXcs4k}2Bj8}m4sQo z1Gt1l_=*bbj9-v~{!nP1_W0OQA^L2PLy3W82o}IT%L`mEAa&%e#PN{;&xZJC`4HQB zXz1Xf4)(}E|Impe9qjPYC%qk1Y9J`_tiXqY{<9&T6{E94fERu2U{nbEC3YU_&@0Y> z*+7oSK6d=*2s;;)rr3!fAL7KGg3RQ7LOY6#*jB$F_~+T^1bgDxun01F)EM<@#T`Wp ziOAQ_ijn~KC}_n$Hxu$lsIA#nFpNJK;yF;n4|YeZmCo=%$e>CJ(GV|d3wtK3X5sOV z*$3A*0IVJq^TzIBVS+jw<@k`S3-hArpX9|~;R%H}M2j&-5%2|&wS|cS-taNa>E!diw$ll(8Uhj6G+9kIEoQoZ$6^&5bvi)S&qiR+f?!Jy zTu(I>oSc$1VV(=lhGivqh|qz;h|mdNf)7O*kJBw;CxmF2jkyZ7KoYi*W6#dBrN?H- zs}X2)q~W4re=s7erJw|6Dw~--8w!e3ydaw}?iw%FIaw=4f-^I`By@qWdx{?R9-|jF zqhR-jF+tL-h}$pvnkx!#Kp`T!&zPsqCyWF+cB8mM=6R9 zD&gkbr*6T=!PA1^6-Br6#k?Rx45EMHO%N2tSssUm{hg{C zMpBK$21M4M7Cxz(jjBJM}RDgAjqhpWLqdy*uOhjeX zEdZv|WX#Q`Y29r>vahB@{jfcC(@)rXttEHtK_5 zeb5w+k*vZ4FRNku!y;TuaEN9E3of|~XDTDnIavp-6X@4M19o_TcpXA3AxT2rL)eQ$ zwKM*CT(GPOgo1GI$-3eO$Qmfkh9udtaj%ZAcN)N_MV=c6NAkn5@=IWqio!)S9)_D1 z97Q(MYnvYY^)`c$^v{hKM&=#eBtXl?&<(ez?kF3Jmj`TYlk=4)oL$f^fP`RSy z*1!=$6RbAS}7 z7}M5A?&x-r_{7|pbF}4Z-FH0O)9xK<=gv>HstV1ygKH*YcCKlNvtdo8 zv`#ahYIWtNwQAzrlyhxcKCyf-UAyN~wG!H_CaeBabrqe|`~;)cuUZ@{md1>wap~9{ z%jPf05L0GaqjY&N=ki=Vb>-BOWqC4r>)_>6S=aE2eR$ze&RD%-Y|0p$?igEgR@bF7 z7tf@|7Y1|g)+J}gy>+=SB#l0uv-m~iQt$6lkJbPE`y({(GGRgXFpBglqObbVG zTFZ)~J>zI!t!Z7U>B`h}t=gMb?B0ys3-)lE(#HBVBdKk^+I6LC$-8_kTl2_DO>d^A zH(RrR;c(8@v|{sSY~H2z*PdV5c_g#*NY*y8aPSjX>(%iq<4dP+c4u9^E3W>Gt3T@+ zTo}nY+gF^)u8gy5+4PoaW&erH{u5c}6AO>6*+^Z>)qPj?E$v!9o2}jTTZ`UqS~C-? zbGX=w_h;)($bi-)#eRX;1j`3SG77ivtSuz`P&crGEH7Is5Rv-}b)Y%{qoJs6VhcR^81%Z2Ljm(usdO_4lW4R$Y54>)vzm z@GsrF(mhAA?vV?J@2we-!yoSMKw}RvDDiiBk$L(l75w|ppHL8^^)At?^^W3+w0p06 zmi*V6mmFzp_Z{6GwvF<3{X{x?KdrvLq#8m9h1OvCa1R-bpY zoq409W@ImUQ^O3mY2MT-v8;EDv?||pGsB&lH$B}@{;AD6yjk(n_I@)|-rl=oq(Sq} z)&osYdCy^l8SgbRBh8xknp9Z!GQc(YUK@jzP6h{U?rR&VSKqEtLgluL!E(J0%gs3G zcB^KjM|<1HjBGL9?$Tm;2kLQq7jnAYL(6;1Mq8EdYx?Y?9%b6aKqXzThVHaSjpbG) zlmi7M3f2}S1Rnt<{C##4$R((94B%=c%p@3uv*Jvg6hricBCeQ*yY9hHLK#;I03AT6 z4u6m#6!K3(AnWlZR2!*&YKzsHR3TQusU^ft=qIi!N`M?}y^NU$ARwx5s^*pX?~N3| ztBt)nkbg(n{EL^5b|8ZE>G2+?q)(U>duR!pW`$4>3;942<4hI;jKY&y%3yskEga*e zSs_xw0%jLSNBm(LVYp`_Ps0luabrl1?;Hj_6A;9Rlm!h15xfHF^c9q~$-)c_muSrT za8;<50wz);Uaf3|`Jv!hALSuD53RB$!b_oOApc_1V1|X}{M)V0zuI)1zVP}22jLKm zNai0sH5Ab-`3Fu2;9Mj>hClH%kaJ|stW`Td(s@6yx-JJ&XBPCis@jz*cc#j{p!vYL zY2n$l&bN3H$nRDS)pv+`sRMi@MB4!g+}U@P;4>Ln4fdfCN2A00iuQ35aw3G4|THma^C8*}F>F8#dTe zb@i!V=&A?d0HWUTN$4gC00KIY2q}I-&y~mZ&qA7hURQFM^hJF+)6@O1m&|#4yllDx z_1d!D$B^@0fNIKMO%*5eUjBcVWQv=pB-6tsB|(y9LlVkG08*^Lc3RTI zN($O=)y2_q#c|@b`We{f;+=xAc^>)2>d<62|BqYA~ zT>)Qt7v1OI^j3O+BhB6J%Y5i{g6EYMzH~M9v0N}2Ko$MMd?Ee*%0(1zK1hqG9E2NC zBg&cHSR=v#Fr0vpnQ#z@S52eu2cm5nQ+rjh(L#itJk@hrJk#Ea`E8tLuW>L(2_8{W z&36YQ5XuKakSadQ$4e63*yx|aB>4cBygCKtV#9eH1#x>X1p*&lq*Xb^TSjSl8CY%^79}!j^#WQRaIQr#$3~SQ+{SO z7%f?Z|2BqIm25hI2!Jz)6`(rank65a(F><=bx7WVKT!uS>^bt8nW#;WVpX#_TYb&~ zd6_L|cS8DV(wrN-YgRinX;n3(?A48Ti7Kf!FKBa)hRX_?&9p4-U1{&lwD)G)_uq2A z-T6jm)-k$JE*l&^`RvLOU4ZYch z{h)GB##moe`L0n_2kaflzO`tHx~9vIULCqJv^cqxxOw&`;n&0OoP9s|ZZOmPq(58t z^|a0@8|re6+gBQQXBv0k?8`Rxt~B;%8vC=2gTQ}p#@JBgk0vo+X#2UrwK%bK=I713 zbFQ`p%|a}rt65W#h9{IzNn~`cPYhM(j(i4AL2k^Lx9JqJCG`u|(u1HE;PBwD=&67N z&PD(T%Y%WkJ`%-n6%a7+b0`-;^8Bb9@7AOkrtQX=y7OvH2>1%V)UO+CHj2BdFp-f+L9XA~AcNiZ6xxytRJQ6<=4#_%JDICSB9m#0Q*0`nH14UU63JH>k@Vl^peH7?x zz|uP2>8(U$j|ksS#T%QLOQ}~HI}BH`VG&6v^+BWO6s(Mb$oUglNslNEfW|3On@=Gs zVSLHCqQyL1Rz1l_cmZ)^igF74OjMABe}VD!%#8z0C(F0JjI#a)$cT_P3D#dh^0!D{ zMS=#R;YU9B_`6iP`$qDI*w}_-35eG$;C~hnt|OraqNe&0R?#5BKOmu&poXO8dx%Xa z{99^LnC3O+?SzT3@H&!zC1ei~N^FIVH;}xB_Oru*{HEyiBDgZK*~g&qZDMBT%y68;0r_bEz56||xJ{FWH< zP&0i(grOlanEFBP3?Wch zSDR{;m8xyCmRhaal~V1Gwn|o|BGOj-E6s~Ym3E0p6?RIM{@9gv)&1c-D$;bN-E+nm zNVY$Ah|ipJ?m6dv=iEEz-ppU3U!I}%FKsppf#=T;+W}pYPZ^`e zlqqUTnWN^EC2FBWB%(f9lCnmvDO=Q*vPbQj&X9DZoKa`0G+L_3e6lR%in>zns9Teb z$#b98WK&hqs#JBfI#m;`N!3PcQ+3h0lrQSjOe{%%YJGG)B^;3q!x2RE6=}NG{2~GP)6Dx3qCZZUVU+#ME9yx{Xec2aUHm} zNsYroE@W}Fm9GtavEl+(NKvemyaSx*c~ifV^OkWg;LY`Mp=Wifz5C$7?%r@L)VDv@ z6Yf{N%W@>tbu`wsFWl7=+Y>qxjvYJPtJ)9tg%5=G=l;ZvQDbhau8X&Y8OyL-QrGGV zP}MOgDKR0LjLDf%7MJ9i-|7x?JG_8H)hp6zMWvFg1Ig#}J$|ljD3g-fMiqG|BTD0K zf+&w9l1YKJ4G3|iYes8 zqU8g_l)+3Ux#0=~*}9U&BHERPqC9CT!hfIqPG96H>4(LbGX3224t>j4;Kt|}Ij!p_V-x_K zS_bkW09lJeSj7Pwxd6mgx^}wU$-NcXHOnQIiLSNdMF^t%15RA`0ry1JAm+OU+#TDA z0hY1|@+T{Zw%X1`TU^%FG@^w(Hjpjk46Z+#>WX1yItKfsA6zIIHx?}apc|us@`?3* zECZju>=S%)B0ZRteE2>4;`l-PPL4`!JUH{6!%VZ*u1s2y(n{;m@eyglDU6IH6LCRF zWYTTJawdIX!XTwvj~)3Zey#z7s(VcuXU)K?)})Xg92EwoE@&L8he@Xr(pcZh4*g$49Evh_|1jrXL%8H&6$ z*ai&lLsE|fcRjNq!R{C+(Q3|i0LP!wFG*SQC1bjTE+30lKy->?jPlgFxgeXqW#W zkKQ1EFY$cke?E7zVoTOpJ6E@5&c5}H?nQ_Dj>C7$;hT4?Um}|fjy#g{1B-RR`MS+> zrs{9Yo9~l#X3yEk-zpjwJhcnHjSH>YK0W@)@rCNzdnJ6i{cI%fwAwri{*51bKlCoT zgVU!jkL7h_eai#FtuI?5Ty@zyraVtdJ$IaeTh74ktaHyuas8RnIMV&r$&JXvpqq@EVD`E$;_F`CzHpu~^f*Q0-r+ zZJnzLJ~ZfR?Pm`>v=IO1Y5Tmd^NzRkY|o;n^xDDMSy zkMed>-AfMY=4yIw zJA1!zR{oebfa&ABn^f(lj}}~^M|o|QA09?1aeD3(6Tq~Vz1{n(-npu6m%2WuGug=W zb90`K+tytgx&?3Jw@odJwf^(|_XqA7bWLUV41Bx&0pY!nIq!xw0rHUWHpfzrmcR=B zAM+6k@gFaBujsWh{ag0Ko$ME(ihW-Cg@=X>{1;vw@->b747%(UeO1_|zfwoTM$;8v zNDso*dKxyHt~Th9Z{on}YO@tQuC~)~34e9F0r?J1-&q~0>$& zAes()fJiE&P zqc=H>dDFn}t1#Y#>Fw5=B~~K}E+ZJtTxfpA)WEvnQ(@2J*JK%G$>sjT_Mo;C$!;Js z!EHHeX#q~S2_IE=E_AqZ=2G{&bY{H$D_vV3+HD4+>W36%|SRD*AyOU@LWD$5Ht+Brzm76%&wP zqhcp=p`Mv+99gU}6M+QESOWQBBtuBBK4E#VX*NHRV3-_44DO{ui0~OYaq)w&Z?u<}VxqC0gItjsTfW!O-*20OS)u zX82>%bkkPVI3U1g8A&i5219R#{~1(`f;^s%Co-xrnHd~}dr9uQ7pqHE3+$#~Jd++s z3~I-1uJWbDW^MneejFN;PRMYdkb`SZJQiElGEeT_OP;I`ek3C+T6)^{cmM-L%m1;dBb5(yX4R Optional[str]: logger.error(f"Error extracting text from URL: {str(e)}") return None +# Assuming the enums and models like FactCheckResponse, VerdictEnum, etc., are already imported async def process_fact_check(query: str) -> Union[FactCheckResponse, UnverifiedFactCheckResponse]: - """Process a single fact check query.""" - if not GOOGLE_API_KEY or not GOOGLE_FACT_CHECK_BASE_URL: + if not PERPLEXITY_API_KEY: + logger.error("Perplexity API key not configured") return UnverifiedFactCheckResponse( claim=query, verdict=VerdictEnum.UNVERIFIED, @@ -56,51 +58,229 @@ async def process_fact_check(query: str) -> Union[FactCheckResponse, UnverifiedF additional_context="This is a temporary system configuration issue." ) - headers = {"Content-Type": "application/json"} - async with httpx.AsyncClient() as client: - fact_checker_sources = get_all_sources() + url = "https://api.perplexity.ai/chat/completions" + headers = { + "accept": "application/json", + "content-type": "application/json", + "Authorization": f"Bearer {PERPLEXITY_API_KEY}" + } - for source in fact_checker_sources: - params = { - "key": GOOGLE_API_KEY, - "query": query, - "languageCode": "en-US", - "reviewPublisherSiteFilter": source.domain, - "pageSize": 10, + payload = { + "model": "sonar", + "messages": [ + { + "role": "system", + "content": ( + "You are a precise fact checker. Analyze the following claim and determine if it's true, false, or partially true. " + "Provide a clear verdict, confidence level (HIGH, MEDIUM, LOW), and cite reliable sources. " + "Format your response as JSON with fields: verdict, confidence, sources (array of URLs), " + "evidence (key facts as a string), and explanation (detailed reasoning as a string)." + ) + }, + { + "role": "user", + "content": f"Fact check this claim: {query}" + } + ] + } + + try: + async with httpx.AsyncClient(timeout=30) as client: + response = await client.post(url, headers=headers, json=payload) + response.raise_for_status() + result = response.json() + perplexity_response = result["choices"][0]["message"]["content"] + + # Attempt to extract JSON + try: + parsed_data = json.loads(perplexity_response) + except json.JSONDecodeError: + match = re.search(r'\{.*\}', perplexity_response, re.DOTALL) + if match: + parsed_data = json.loads(match.group(0)) + else: + parsed_data = extract_fact_check_info(perplexity_response) + + verdict_mapping = { + "true": VerdictEnum.TRUE, + "false": VerdictEnum.FALSE, + "partially true": VerdictEnum.PARTIALLY_TRUE, + "partially false": VerdictEnum.PARTIALLY_TRUE, + "unverified": VerdictEnum.UNVERIFIED } - try: - response = await client.get( - GOOGLE_FACT_CHECK_BASE_URL, params=params, headers=headers + confidence_mapping = { + "high": ConfidenceEnum.HIGH, + "medium": ConfidenceEnum.MEDIUM, + "low": ConfidenceEnum.LOW + } + + raw_verdict = parsed_data.get("verdict", "").lower() + verdict = verdict_mapping.get(raw_verdict, VerdictEnum.UNVERIFIED) + + raw_confidence = parsed_data.get("confidence", "").lower() + confidence = confidence_mapping.get(raw_confidence, ConfidenceEnum.MEDIUM) + + sources = [ + Source( + url=url, + domain=extract_domain(url), + title=f"Source from {extract_domain(url)}", + publisher=extract_domain(url), + date_published=None, + snippet="Source cited by Perplexity AI" ) - response.raise_for_status() - json_response = response.json() + for url in parsed_data.get("sources", []) + ] - if json_response.get("claims"): - return await generate_fact_report(query, json_response) + # Convert evidence to string if it's not already + evidence = parsed_data.get("evidence", "") + if isinstance(evidence, dict): + # Convert dictionary evidence to string format + evidence_str = "" + for key, value in evidence.items(): + evidence_str += f"{key}: {value}\n" + evidence = evidence_str.strip() + + # Convert explanation to string if it's not already + explanation = parsed_data.get("explanation", "") + if isinstance(explanation, dict): + explanation_str = "" + for key, value in explanation.items(): + explanation_str += f"{key}: {value}\n" + explanation = explanation_str.strip() - except Exception as e: - logger.error(f"Error with source {source.domain}: {str(e)}") - continue - - try: - search_request = SearchRequest( - search_text=query, - source_types=["fact_checkers"] + return FactCheckResponse( + claim=query, + verdict=verdict, + confidence=confidence, + sources=sources, + evidence=evidence, + explanation=explanation, + additional_context=f"Fact checked using PlanPost AI on {datetime.now().strftime('%Y-%m-%d')}" ) - ai_response = await search_websites(search_request) - return await generate_fact_report(query, ai_response) + except Exception as e: + logger.error(f"Fact check error: {str(e)}") + return UnverifiedFactCheckResponse( + claim=query, + verdict=VerdictEnum.UNVERIFIED, + confidence=ConfidenceEnum.LOW, + sources=[], + evidence=str(e), + explanation="Failed to contact Perplexity AI or parse its response.", + additional_context="Possible API issue or malformed response." + ) - except Exception as e: - logger.error(f"Error in AI fact check: {str(e)}") - return await generate_fact_report(query, { - "status": "no_results", - "verification_result": { - "no_sources_found": True, - "reason": str(e) - } - }) + + +def extract_domain(url: str) -> str: + """Extract domain from URL. + + Args: + url: The URL to extract domain from + + Returns: + The domain name or "unknown" if parsing fails + """ + try: + from urllib.parse import urlparse + parsed_url = urlparse(url) + domain = parsed_url.netloc + return domain if domain else "unknown" + except Exception as e: + logger.warning(f"Failed to extract domain from URL {url}: {str(e)}") + return "unknown" + + +def extract_fact_check_info(text_response: str) -> Dict[str, Any]: + """Extract fact-checking information from a text response when JSON parsing fails. + + Args: + text_response: The text response from Perplexity AI + + Returns: + A dictionary with fact-checking information extracted from the text + """ + import re + + result = { + "verdict": "unverified", + "confidence": "medium", + "sources": [], + "evidence": "", + "explanation": "" + } + + # Try to extract verdict with more comprehensive pattern matching + verdict_patterns = [ + r'verdict[:\s]+(true|false|partially true|partially false|inconclusive|unverified)', + r'(true|false|partially true|partially false|inconclusive|unverified)[:\s]+verdict', + r'claim is (true|false|partially true|partially false|inconclusive|unverified)', + r'statement is (true|false|partially true|partially false|inconclusive|unverified)' + ] + + for pattern in verdict_patterns: + verdict_match = re.search(pattern, text_response.lower(), re.IGNORECASE) + if verdict_match: + result["verdict"] = verdict_match.group(1) + break + + # Try to extract confidence with multiple patterns + confidence_patterns = [ + r'confidence[:\s]+(high|medium|low)', + r'(high|medium|low)[:\s]+confidence', + r'confidence level[:\s]+(high|medium|low)', + r'(high|medium|low)[:\s]+confidence level' + ] + + for pattern in confidence_patterns: + confidence_match = re.search(pattern, text_response.lower(), re.IGNORECASE) + if confidence_match: + result["confidence"] = confidence_match.group(1) + break + + # Try to extract URLs as sources - more robust pattern + urls = re.findall(r'https?://[^\s"\'\]\)]+', text_response) + # Filter out any malformed URLs + valid_urls = [] + for url in urls: + if '.' in url and len(url) > 10: # Basic validation + valid_urls.append(url) + result["sources"] = valid_urls + + # Try to extract evidence and explanation with multiple patterns + evidence_patterns = [ + r'evidence[:\s]+(.*?)(?=explanation|\Z)', + r'key facts[:\s]+(.*?)(?=explanation|\Z)', + r'facts[:\s]+(.*?)(?=explanation|\Z)' + ] + + for pattern in evidence_patterns: + evidence_match = re.search(pattern, text_response, re.IGNORECASE | re.DOTALL) + if evidence_match: + result["evidence"] = evidence_match.group(1).strip() + break + + explanation_patterns = [ + r'explanation[:\s]+(.*?)(?=\Z)', + r'reasoning[:\s]+(.*?)(?=\Z)', + r'analysis[:\s]+(.*?)(?=\Z)' + ] + + for pattern in explanation_patterns: + explanation_match = re.search(pattern, text_response, re.IGNORECASE | re.DOTALL) + if explanation_match: + result["explanation"] = explanation_match.group(1).strip() + break + + # If no structured information found, use the whole response as evidence + if not result["evidence"] and not result["explanation"]: + result["evidence"] = text_response + # Generate a minimal explanation if none was found + result["explanation"] = "The fact-checking service provided information about this claim but did not structure it in the expected format. The full response has been included as evidence for you to review." + + return result async def generate_fact_report(query: str, fact_check_data: dict | AIFactCheckResponse) -> Union[FactCheckResponse, UnverifiedFactCheckResponse]: diff --git a/app/config.py b/app/config.py index 6e7437c..6a54faa 100644 --- a/app/config.py +++ b/app/config.py @@ -7,6 +7,7 @@ GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] GOOGLE_FACT_CHECK_BASE_URL = os.environ["GOOGLE_FACT_CHECK_BASE_URL"] GOOGLE_ENGINE_ID = os.environ["GOOGLE_ENGINE_ID"] GOOGLE_SEARCH_URL = os.environ["GOOGLE_SEARCH_URL"] +PERPLEXITY_API_KEY= os.environ["PERPLEXITY_API_KEY"] OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] FRONTEND_URL = os.environ["FRONTEND_URL"] diff --git a/app/models/__pycache__/fact_check_models.cpython-312.pyc b/app/models/__pycache__/fact_check_models.cpython-312.pyc index 64a94030489fd6cd337f62bc6b5b437c1b1fca86..23d388ce022d3629b96865d1e81900018da243be 100644 GIT binary patch delta 79 zcmcbu_g0VlG%qg~0}yaaeA~$F!DJeuA6lGRRIFdDmz$QDk(ry3UtFA;r|*)VT$-Dj gS5mB>mY7_ko1BrFoSj;vo0JHo@=`XJF+CLl00iC}qW}N^ delta 61 zcmaE>cUzD9G%qg~0}w2GnY@wPgGt#*KO;XkRll^PI3qtLwNgJZrMMt7CnvE;KP@r2 P#5p51IeT*l(^C-ud{Y&~ diff --git a/app/websites/__pycache__/fact_checker_website.cpython-312.pyc b/app/websites/__pycache__/fact_checker_website.cpython-312.pyc index c943a2ce632655db9ed91fc3630d29d43816a2d0..68d90e7e90fa78455a9a2e12c2c3c30be763aa9e 100644 GIT binary patch delta 79 zcmbQBwn&ZpG%qg~0}yaaeA~#K!)F?)A6lGRRIFdDmz$QDk(ry3UtFA;r|*)VT$-Dj gS5mB>mY7_ko1BrFoSj;vo0JHo@=`X>;wxhT0N$4x;{X5v delta 61 zcmZ3aHbIU1G%qg~0}v#BPu|F#!>8=7pOK%Ns$W`CoROcBTB)CyQe2RklapAapO%