From 8c48af11c2da31e8db9e08cec53eb6b00cc9fcec Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 2 Oct 2023 14:44:11 -0700 Subject: [PATCH] fixes to get optional params --- litellm/__pycache__/main.cpython-311.pyc | Bin 53167 -> 53083 bytes litellm/__pycache__/utils.cpython-311.pyc | Bin 139254 -> 139316 bytes litellm/llms/palm.py | 76 ++++++++++----------- litellm/llms/sagemaker.py | 79 +++++++++++----------- litellm/main.py | 1 - litellm/tests/test_completion.py | 2 +- litellm/tests/test_streaming.py | 2 +- litellm/utils.py | 14 ++-- pyproject.toml | 2 +- 9 files changed, 86 insertions(+), 90 deletions(-) diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 6ebbe2ec9899d6b453a336e2d01d6e0387abca44..cdb85c0d8773c59eb261e48de2f69f133d9f77d1 100644 GIT binary patch delta 11475 zcmds7YjhmNm9Cnn^|D5qmqwDU_tQeMu&k%C(94!>{KOCZ03o}CE!i3ktp<$Uoso${ z43>@TG7AeL$Ru%+gR>EY z8euK@wS6?z)%Cbl_tsbU-nyk1o;APw9dp_b($bO{8~Wi@xA1@+C!U z$ap`J9N)vI@$E=VrX!EJY(kBsX75Kr0WWnYIf#1&bmL#1g+7gfjKp->o4u&Wa?s2Q z1&g#$M#Llg#0;$wn}Bc9OV4L}S36@$eUfuB%fMZcR3s!5pHu{c+eo(TCOI;E$xhK7 z?McSz|77Q-<&LLI`E2rXvFN2sa&p)zy2v7Cmn}Ts!j=*kV;7DxLqpd zD^pre%A{h3B&nE`MS(-g6`=}lu|_2gNG+8R$&m^sE9mA1mjxCl3wX#1N~H+~RN1IB zF58&fU(SFWr9Gexem@${fMs2x%wRYL@;Q;bl~hUSegsRs4i+*u(m%+l#H^$q2RG?f z5&;gXWw&nVY;_ufXZCWjTy23+kJegp1~}u*YUO}#<<(MHC=D+LAk=&%5Zq+xxG5e6 zNnZ~LZpjJj=pwaIAY}Vj6(O-PI_Y90G|6R=HTK~a`seEGTynEcKpM3A@nj{<$ai;H zi1WHW&j~0YV?2xnFmklbQ9y_D|?l7?|hJY;p6t3cykuer(-kOs{~8po}w)|wMmJ~WHg zYSL@1w!*Ac2Cb#ZZT2a|E@l)63derVah#HqN=dV()evck$*+j!EqYrH4cd5vLb-G-!~Z&I z=NrQX>b5tv|B3FV2T&@#WvDzX~p(_tYHRB?kneEVH*?{y;eXU$6 zu2Tm9o=ep{_n->J4eB=t37iGVD{fLtph~I)2y|TlKtHd6S}vRuNk$+Q!axP2eR35u zf7>NRaQOxyX$BgE^e70ZUMGl3bw+_oal4uaUaHhQ_prB>ID&x`T}MrT@)B{k+IW6K z02wT~VHp&l1U>)=fpn|8DaeK3@SiXUp<10Gs9GB!YE8tQIHKB2wSGWzFTlCd%p`bk zYxPT?KpH_LSBbqUv3ZN5>W0OUKpM#s6UQ1=9CUrTJ@_YsENa#10U*xB?U zL1xR<;Q&8&z|TenKj8SuiOjWbx|&yiEOqvBKPH=^Djj6jqjgDSxOIdyYcW47D>;QU z$r&9O#$_8@@iF{6bVJmuG7!A8m&cLe9sG4V9BnorMUSH`O0DL>CfH_Z_MyNzWQQ0} zFt$1hzRBD576(o*n&t&CYL0_YqVo;rd8#2>$Af0oG4OZjnBS&nUar>FZa37{NYx`I zvO}(xYT$i`Tyta7lBzvY4cR$vf|+O&iTKT)7?8#8OR02nm+@#fmgaxMunihi2!QH% zL{S}ds>&5K=!)HXMhu#CkD)e3SHw<9UMdc&!+nLSxYJ-=!LTg-x|)2_^Y zRjf(uz?74H2GGsn?F0!iG){!jTJeE|2z5(yI)DJtS_#5sG5wkI%bqo8vhPAFxP|aj zp830lWUq=`#Uo1b%OJ^nB9h!n?tzHgQI&lB?t-Hec&O^boL6Frbzfoj1(W;dFuGBj zJEz}vk8zeoFv|nVEQd2mKR*E&z5bQzytvnYfMY`M^|z`m40>t6u&z^FmukgF21|@G^A+4Ypa0X{#{+@a(z1!>N`L~*jAk=%c z+1qL%4@kujDT@p0Mh>Ah2^Ou$grr*rHqf>fuUk7Vfl6S&JRDgaA1!Y1(2)iU&SGPE zEmB3ihatAmU$-o3NEDQa3pKtoWZ8q@SH(sv?EW7amS>AP2Ni}iugOyg$Azn3jXD>g?#Q97gWKfx!R>Mf z+$k`;^m<)ub;#17rz_R)HVd#KNYVgy6aZGXDAokUOdC+u&XH7V15kDFY(+eXnk?~- z%MHM%b704;@R1_$@zRwoS^rPr(`ST_)&H~sKAKhq6E{j^ZsPQDn_Viv=8D?{$v19L z@pp+YsRZY3(z+7r{n{J7$6=a7R`t5#u~_{WOj{^NDwIhcRv$ZZz1*Lm}L}JgiP@1DB22e;#D>ZDn?#UHv*;s6|i`2VEq$ zyh_uDC$yUeuvj@3@`$u*HtVR|wY$ip#yfsePM>Yt&u*31lCt>9hzw0q_w9(Bl-Eez zo#@Uyd9AcYTAMK3K1wfv9X;Bf_I0e}&8n3orzCU)RuH~#Wyg)LYb)tMN7WjmIy3pJ zU{Ovh><&9-UlQ_IL>v?3w5s0dZ#um0ghA+hGNx@&4nkvWh_-clm(6d7tOGc8(UYA` z%ja|J%(T^M6^JygD?BNp>YB4i}2>e;9N6 zEwg#jXJUA;pIv1cJR4b)f8&nvfPCBRYZoxXRU`%oub0*nON4bBs1yV2f&ao{=Cs&oTU6advUA;R@9v`=ozkwU)C#ZGxErU<`G`hO$tG}JST;3#Y zN-P#~1O8>Hx^v*BMZLg+YZiZ|j>D(uZ&xqEjWnsdr08k=1#o=iq~e*-=I-Anhc@1R z4ddU?Rom)A&uz0|e3<@d`>*lC^uZlXi%$TX0)b>EV@T-V-|z1|)Fbo_2ZsE^eZnJj zddI${53?rsKvH2BRk)n!d9=FWP~% zt*~-hGq!**G#C(thiPEvcS=VX0e0jS7@lVq^!fJ5@I0R1(MbIDy3Ddm6J9nv5`$ z2%m%SEX$3CKHc*e&aQ`lM-iT21?&QH#wrd4dJg`3Ippbg@4AMM(x-Y>-8RDNH69}U zq3lS;P(bV-Vge5I3cZ8F&G|L1~%OnGF*|kig%i)W*xHjnk=(SJJ&twoa#4!ru>rFCV^K)BVfh zw`(>`*KGJa6=is)JxxDc3<>;Qax`CdG*3I4uh0#$&*N}UdzMWcgarOBIqEJu>ZToa zp{>W$Y{6gDI@|U(pf{|m7qj#1; zyS$85yyNI9X74M@yK>n3{fu_r`-6)1H1_>bLwf;xf8y&dXYc=kI-dj{t;W~7iZSs&qM8Jy%?M5 zF1iB0OP`?cLA7}9B!2BDk6RYtVrqS6Q$(2fhgSP*;=`U&^4`&zYjAHL;mYy@*7`%1yi51L z*nqFmXJ1^5uhZ9F%qsYV726@Ha41@mkIc#NK_8^oU#x;2#TRSw9ii6_Vp+KL%r?;OoU6Cxj%cyc9AG2J{bZP4=*$X~^LXhPN+WX29{3$*4 z%I{pP$4ts_pV)JMpRjibj18>QsaGrU7onG5-HrhP=|9_n|2wq*XHQv^K4tyg!b(hs ze(t3Oum3Z)g|5H;p2_x~P$5Y4U*G7#?}nQG31<08sO6ViEx}z5oE%0hy^^vpZ04Uf zlm%|zS zGZUq`$F`1c2s`=4vdR24kzUI=5ghm$<0v%ieNr*$oh|3sppGFp{K}ev{&}zVtZV#7j4q*gE z5@HKmyXpweBR9DycRn>RZ_kHy>KYvaC#_Rg@@1&=v0P=Ir&hx(bX8N;s_|Mp9FVXRO{GV?X zm-2cFlzvDjWWEY6>qa2}SIIx=2Lb*xHy8h--2$^|&x(x5G^!&g{6ArgQ^j`6d(#$kR%mZlOSMA10zWz2@YaT@cw$Bng=# zdVJGv#aWj!u+5pXL?vgtQ1~i9j>~$jq=g*L!Y9jO1wa4-6wE?SFc&vDOPr}u2)f(@ zgsgADm*G0Vr{VTLwVJpZLGB&^ToD_!C1)XLBbg?>Wdf(Ngb4U-4Cmzr#A1QSZVxjA z&LCun0J%gbZ&5HQ8O~syS_<2V_C-MOY%zFF!4k|@w^`eM3GmOQw;$>L&gE1*Dm*aCE=4-beUS2il*qQ7J)D&Ohwa&%|a>D zOW8a$Lnx&NCX}KXQFB6-l0-QhKTF<(7;u>Y5Yb4m3U^l7^hG01!a{C?P&UDSG8ffa zl?~Z^24>zx1=hd;Fpy1jSR?H?kV^XQNZ*KN3g7@BdRM$W*7+>8sEN*J zi#FBFJb5N(C*w8p8|YO4{B&gybSR^O#ZNd*r1pFnU8D9nYFflcsojR=4(sBTp8UJ1 z-6mLw)~u*Ds&<#JtpxB5GT8u>0VymNxoTGc%~Q!uF{B!-jZkR=KcZ8CT`PI)E^3f~|-T zLkotDvj3X2i8ZR+m&B{afhn>~OoED6#TZnhY6--jl`^zY@wIqsV|or+th@uULZ}dm z@#;pBoGpnCqb3^p1rmI6?OCXqUn)zE1cDNLet{i7D9O%lW=KFNMlDhF0BS{x)PbBJ zv@(QZv1};Y_d4>h17Ri3*AsER8JYuBDsM=_#TjzcrxP2lRf%Y-jNz|PjpOUV%Ot>HI2rZNhH3+-Dh8G2bq~6Zw!7L2&SEV@9SEjV~MZki7XS%bh7m zES{9%6qAjLkSDmD#69PlEZ;+oXq0e4M01*mMeR40knNw&Z;leyTP63YN^xGfMan4x z;tc*788m5bXE1Jm7iNP?*%ggVin&bFFRR@$P3Z=y|wZxLKR`KU6W={bR67?^FV4LYh*c@GktTZ zobasaESWPClUOU61WDY)i2|#NI>n71O-nrPgJ@l>i|tTms3L3tbSNQKwUFY|Cl?nD#2*VLlxX(1vrw1L*h!%gj|7%X*DDsEuWV2C5hfUB5RQ;2S}+ zEAc+;#&LJL15T$IY#_K%9T3RXA~su6Vmmig6|X0GV28wZt$Q0BEzC7qL19 z4A2%0v__~tqC;E7YN3Waw}~~6N0vbF(e`0oEFmn*l_g=zD+@lOy=4uh`@YFcdS9h= z2|4Sh;?8S?zc};YT9`|yCy4WQKMRPx2J-x%=&T0#c-dggu#cA{FyPCva zdnbD%N%Pr>u)Psse;;{C-NfSGpgMI6WXtd`Pn;=?C{;RuALgb`L%Y-(_^J%Uin=w( zO4yynACh;Dvq_uqs03@4K+01w^B2nf;OA>}Gj-YSmlVsa_b5WR^znVOF3{qnF4S8sE~VQJ*?A`0#IMXUiGnZ2q{sNW5gUa?wa-c0wo9 zuat@UW9}r0(*u@4?vdp(C&jm&$i*K~+(}~fYRdO%_)^Sr4*$Xvb1^3&7gDin!34#w z8geUAU(9BzZ(vc*#ks1&7)gMTUuJk)vhSyw#IZ%@gQU!v8dz%-Hl*VgB?Ghwnf7|b zTA#H9kg%*c1ybGP@ZCCxZ4nyS+t#;KoY%KZoZq)xtfRpjzd6^jAvW#g)nXOvRflw? zysPHswO=!mAN|p&@ucn=+8{fmA&>NU8^=)6jG1P!ALrXc&xPp0f+{Mpn^! zE`GVz4&`39L1-L#S}w7h#D&6wn2P&?hQbV0#r-gDT4Xm!2~D$o2{NHS5K2i=Dy~`@ zx#ZPMpzjhL5Ek~X#&0w@Y_VlddaQ+rHmxRx)J~9W#tJEQXtw7PUs4gDH}DH_(fld6 zz0pyqHXM4@j=QltD>S!t{)(&=f*o^(9a&3?GPTZ$p+$`jx@m@bQ`}TnyGJAmSnOTj z>>JRiXPvxh0^3|4g^^tT-lJ_Zdl7nm?}omOVsqanv4s@psrA@(%sj1g6I);cyNVc+ zVqXDTopP&F)EK&nSv4zWm1S=A*@TwpyRlVcP9>04x#l8Me46qolfj=hl8=E`l@EeSHuM$Yh*kk76N8S& z5WbBN{z8QC9sG?aCtNTGUmQ(bQL3K6vli?4S@_E03gZimpj(UG@Fje2u`MvU(%r-| zdpu>*llc;*K>ntH-(PH5`VAtOlagSFQ}d~ul2TTfxDX-O#5Tn-or*1+yGSHDEwm+y zMAA3vGsvJ0p{NocYU-w+MQ5TJ@xX}obi%TA01b-Eg=MYa8MnAXST3xfrcsK{MpF{h z6zv~QCpFIHgM|c}{>Doy~K+6f1iJ^m+$HKO{*yM_4_)IcUR~By?lqilk4moXebF)7$?(N{b1O9&BfR{Us_igH$`4UChMVbmTM?VyC z%V?vFvf11q9rH2(Hf%1r@N6C4xw##F7l$@CyQ8=it-{Eb88`VkLNd?2gsod%D?3W( z9;3}}(u^m0eO>*i$JZ?_f0oYEacA+*wmfH&Q1QmCv*1~La_a_%!bF8WmO}j5){UtO zjC&bx*mejG;&-;SyVSdJuhV_06vz(s+v{S$Y>-kU;hYVf*?ta^51e^BoR-7c{eCKM zA~IRVcg#kRHqb)bl7L*Nqqa)vz5QsnNI1OE)L(f)0cm{u`^GD`!qo3>X@z}q! zkZA4vPGn-D6!C+dLvS!O*mYZnyLWv7pU1npcfe!#huyW3bOtZ%F3b9ulKtPbxksD- z!Si-ko9@%@=eT!wtLY2c{Stq7_ga{a%{_OUY6}7Q{0R6B4?$Yask*_sBdxzMB#))! z4S7gI{%#raZyWN*4Ebz$%FxEKlquxzmZ9LbpDyy*eVCR#mNS2(pETs}R$ATdw7Rjhx_?Qtgt+H(jQHUR8@_hj zYz(IXcNLy_(j7`ZVK4>Wo@Qz4N&#;-E-j$XpXRkV==0W`X>0SryLphlyqll0&aHp9 zZ00e-#KzOlKOK7Tt+ zTK>U2w)(?dgD?d=%xAPdoRTe6>mOEoQYq%mriPX}*73|SG z58P}^Zdt@IRwQ|5vd-%IrW)2+Q_)nyI_uJ#Y^<}sq&bsyHqM#Fo|~q67-u)1$!Mx& zBOQiTBkOdfq%?I^>Th;giG<6^vz&}q>br=QC_iK9^qJETelKJ@`y)L(ia!|oduYVp zI=>v=4}Egp0igk#a3j2fTkstMF1&aK{!J)&(U1ho@SDT!xkgA#ZW(Rr$eQD(fqstn zBJStdd3h&H4hffUn{z16c(T{Gzt78c@?LHn;GN$dft4Zs=ns>+X*$8}qYXQiM@Vly z#|Dqn!4tHhl^*Uz(hz^Z9250kl6N{uM|B+Y3TNo(Roc+fA9tQM7f3_lxaJMlVulst z8z=(3DRS3n^A{8~;u1BOY(L_g-dMHh7ZjCNEu~E~N0si<0t|Uq$ zk?tv+{?-imAGrRl|FY5<IkQ@ZW_pOU7C8)9ZqMj86eF%;dC7J$Gwh(RD4Yu%?o0bG_J^Ge>;k!kBgTt?a2|Wj2idmp w6xJ;Ogg`gzmI0F`T+X_Tbcum`W}Qy=86eF)JbyuSA1p+=jR5?fQcH85}xTF?HDDQ zU1~VMW=t3O!YDhvu$EDD`h*FLLeoROFq%WS3%)Scf?1RA*9mNQ`pW3e#Pb|zO3^H! zGm54&O`rOmaV_Jl>CQhGRXJ7&T@=x|BBC=r?+2r~#&s3bODd)tvJV(tRB^ka;&xHM zt-K(3+&23bc087hV}&>we8z~Fp4uNsyeB<=`kJUc9a8B$Mm>77+8;4*t%P@ z9=B!%vXhyoZ(P7AvYq)i<3UEoRojpLVGL(v+_qizALBem#sk~;{AZM9WV|r-7SmkD ytJ^CWnSz-ZZ*D)s%;e3^_-nfaH&ZkVqviGzex?ol{AP?I9~j_7!E|GJro#Zoig?rj delta 343 zcmdmTfaBYK4&LRwyj%=GpkXAPD%Z%nm6vhmCnhzP$qwow)AxL0)SF(=#>hYY_b0|& zp%fNLkV-J90+O}t)8~C=4Cm*BiUFw_juNivAMF?=nO$l)rZ-$?)ZnUNNMW-9OHDp- zLwtJX7e!yMOq$DGm-U#ct-A*6aSc`=JDGX<#s!Qb+Z}&19%N)(xt;khV>lz@*6q>% z80RrE?%)3FKcg%o}t@}?KcGaUv1wdHB{ diff --git a/litellm/llms/palm.py b/litellm/llms/palm.py index 0521f0b042..8a7203d891 100644 --- a/litellm/llms/palm.py +++ b/litellm/llms/palm.py @@ -56,48 +56,44 @@ def completion( ## COMPLETION CALL response = palm.chat(messages=prompt) - - if "stream" in optional_params and optional_params["stream"] == True: - return response.iter_lines() - else: - ## LOGGING - logging_obj.post_call( - input=prompt, - api_key="", - original_response=response, - additional_args={"complete_input_dict": {}}, - ) - print_verbose(f"raw model_response: {response}") - ## RESPONSE OBJECT - completion_response = response.last - - if "error" in completion_response: - raise PalmError( - message=completion_response["error"], - status_code=response.status_code, - ) - else: - try: - model_response["choices"][0]["message"]["content"] = completion_response - except: - raise PalmError(message=json.dumps(completion_response), status_code=response.status_code) - - ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. - prompt_tokens = len( - encoding.encode(prompt) - ) - completion_tokens = len( - encoding.encode(model_response["choices"][0]["message"]["content"]) + ## LOGGING + logging_obj.post_call( + input=prompt, + api_key="", + original_response=response, + additional_args={"complete_input_dict": {}}, ) + print_verbose(f"raw model_response: {response}") + ## RESPONSE OBJECT + completion_response = response.last - model_response["created"] = time.time() - model_response["model"] = "palm/" + model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } - return model_response + if "error" in completion_response: + raise PalmError( + message=completion_response["error"], + status_code=response.status_code, + ) + else: + try: + model_response["choices"][0]["message"]["content"] = completion_response + except: + raise PalmError(message=json.dumps(completion_response), status_code=response.status_code) + + ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. + prompt_tokens = len( + encoding.encode(prompt) + ) + completion_tokens = len( + encoding.encode(model_response["choices"][0]["message"]["content"]) + ) + + model_response["created"] = time.time() + model_response["model"] = "palm/" + model + model_response["usage"] = { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + } + return model_response def embedding(): # logic for parsing in - calling - parsing out model embedding calls diff --git a/litellm/llms/sagemaker.py b/litellm/llms/sagemaker.py index 4f1ae1acc5..8bcc8d51d1 100644 --- a/litellm/llms/sagemaker.py +++ b/litellm/llms/sagemaker.py @@ -6,6 +6,7 @@ import time from typing import Callable from litellm.utils import ModelResponse, get_secret import sys +from copy import deepcopy class SagemakerError(Exception): def __init__(self, status_code, message): @@ -60,9 +61,12 @@ def completion( ) else: prompt += f"{message['content']}" + # pop streaming if it's in the optional params as 'stream' raises an error with sagemaker + inference_params = deepcopy(optional_params) + inference_params.pop("stream", None) data = { "inputs": prompt, - "parameters": optional_params + "parameters": inference_params } ## LOGGING @@ -79,46 +83,43 @@ def completion( CustomAttributes="accept_eula=true", ) response = response["Body"].read().decode("utf8") - if "stream" in optional_params and optional_params["stream"] == True: - return response.iter_lines() - else: - ## LOGGING - logging_obj.post_call( - input=prompt, - api_key="", - original_response=response, - additional_args={"complete_input_dict": data}, - ) - print_verbose(f"raw model_response: {response}") - ## RESPONSE OBJECT - completion_response = json.loads(response) - if "error" in completion_response: - raise SagemakerError( - message=completion_response["error"], - status_code=response.status_code, - ) - else: - try: - model_response["choices"][0]["message"]["content"] = completion_response[0]["generation"] - except: - raise SagemakerError(message=json.dumps(completion_response), status_code=response.status_code) - - ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. - prompt_tokens = len( - encoding.encode(prompt) - ) - completion_tokens = len( - encoding.encode(model_response["choices"][0]["message"]["content"]) + ## LOGGING + logging_obj.post_call( + input=prompt, + api_key="", + original_response=response, + additional_args={"complete_input_dict": data}, ) + print_verbose(f"raw model_response: {response}") + ## RESPONSE OBJECT + completion_response = json.loads(response) + if "error" in completion_response: + raise SagemakerError( + message=completion_response["error"], + status_code=response.status_code, + ) + else: + try: + model_response["choices"][0]["message"]["content"] = completion_response[0]["generation"] + except: + raise SagemakerError(message=json.dumps(completion_response), status_code=response.status_code) - model_response["created"] = time.time() - model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } - return model_response + ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. + prompt_tokens = len( + encoding.encode(prompt) + ) + completion_tokens = len( + encoding.encode(model_response["choices"][0]["message"]["content"]) + ) + + model_response["created"] = time.time() + model_response["model"] = model + model_response["usage"] = { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + } + return model_response def embedding(): # logic for parsing in - calling - parsing out model embedding calls diff --git a/litellm/main.py b/litellm/main.py index 5b1871b282..f991889df5 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -977,7 +977,6 @@ def completion( encoding=encoding, logging_obj=logging ) - if "stream" in optional_params and optional_params["stream"]==True: ## [BETA] # sagemaker does not support streaming as of now so we're faking streaming: # https://discuss.huggingface.co/t/streaming-output-text-when-deploying-on-sagemaker/39611 diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index ba9390f16b..c01b605d92 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -926,7 +926,7 @@ def test_completion_with_fallbacks(): except Exception as e: pytest.fail(f"Error occurred: {e}") -test_completion_with_fallbacks() +# test_completion_with_fallbacks() # def test_completion_with_fallbacks_multiple_keys(): # print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}") # print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}") diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index b0f0abba9d..f016ae97db 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -709,7 +709,7 @@ def test_completion_sagemaker_stream(): except Exception as e: pytest.fail(f"Error occurred: {e}") -# test_completion_sagemaker_stream() +test_completion_sagemaker_stream() # test on openai completion call def test_openai_text_completion_call(): diff --git a/litellm/utils.py b/litellm/utils.py index c030713628..e25dab254b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -977,9 +977,9 @@ def get_optional_params( # use the openai defaults raise ValueError("LiteLLM.Exception: Function calling is not supported by this provider") def _check_valid_arg(supported_params): - print(f"checking params for {model}") - print(f"params passed in {passed_params}") - print(f"non-default params passed in {non_default_params}") + print_verbose(f"checking params for {model}") + print_verbose(f"params passed in {passed_params}") + print_verbose(f"non-default params passed in {non_default_params}") unsupported_params = [k for k in non_default_params.keys() if k not in supported_params] if unsupported_params: raise ValueError("LiteLLM.Exception: Unsupported parameters passed: {}".format(', '.join(unsupported_params))) @@ -1225,7 +1225,6 @@ def get_optional_params( # use the openai defaults for k in passed_params.keys(): if k not in default_params.keys(): optional_params[k] = passed_params[k] - print(f"final params going to model: {optional_params}") return optional_params def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None): @@ -3441,14 +3440,15 @@ def completion_with_split_tests(models={}, messages=[], use_client=False, overri def completion_with_fallbacks(**kwargs): print(f"kwargs inside completion_with_fallbacks: {kwargs}") - nested_kwargs = kwargs.pop("kwargs") + nested_kwargs = kwargs.pop("kwargs", {}) response = None rate_limited_models = set() model_expiration_times = {} start_time = time.time() original_model = kwargs["model"] - fallbacks = [kwargs["model"]] + nested_kwargs["fallbacks"] - del nested_kwargs["fallbacks"] # remove fallbacks so it's not recursive + fallbacks = [kwargs["model"]] + nested_kwargs.get("fallbacks", []) + if "fallbacks" in nested_kwargs: + del nested_kwargs["fallbacks"] # remove fallbacks so it's not recursive while response == None and time.time() - start_time < 45: for model in fallbacks: diff --git a/pyproject.toml b/pyproject.toml index 44398321ad..3747784598 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.811" +version = "0.1.812" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"