From 60873abb412bc908025359dafdfa6efba3594a10 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 26 Aug 2023 19:30:54 -0700 Subject: [PATCH] further fixes to together ai streaming --- litellm/__pycache__/main.cpython-311.pyc | Bin 28867 -> 29079 bytes litellm/__pycache__/utils.cpython-311.pyc | Bin 66078 -> 66013 bytes litellm/main.py | 1 + litellm/tests/test_streaming.py | 53 +++++++++++++++------- litellm/utils.py | 4 +- pyproject.toml | 2 +- 6 files changed, 39 insertions(+), 21 deletions(-) diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 7048c99f86c3b8e4c7c90fd8496fe708db2efd7e..58ce173b3b7815a0a0f2ee000a679a9f9e9e6f9f 100644 GIT binary patch delta 4033 zcmcImdvFuS9lkxiWy_K*Te2+4)>*P-*_LGke#!5LY*T}QKocGT138fzW7)_i31k(Z zp%6MLWA~RS>0nYI!I0oIp(YK?w9~kqcG5}H%BiMt$#|NU=^ss}MV>U%Ogd@$yOWKj z26xD`-MRgCZ})ER`#rwh`^oRg>02b@l18Iuu+&}qfWLO~YDQ45KIUea)658A7|=^i z@QmC_PQX<;C-G4RZp)3*N0=~&9h1V!lX`p?`vDmnF^0l0tuSXvcX&BT)GPS92yu^l|!jo z3%^orCl>IkZHV3KGcxPAWYPwI(YfKAxj=S;GL*n{nvv!6U_+W$D-Q7a<1Ed4ft(_s zFSWogot0Jik}Y%W1-SjHEiyBMTb%{!1@6LG>=`VCSjk|;zH zt#CzSk-5g%yAyFI5h+S0q99aHIo>1i&FLd|;i3)Q2S3cP8Hxo4$fRU}$%i5n!BG_= zn=<&LN)M$P71%W1BxXgT>;9>&A*FECVj^Ccb~M5_blYTQ<5H3BAo0O=eH$r<^ZFBo z!szEbf)p#vixFdZ8C-Q4OCwI|q_)(Fq=!}}^;f|wnU<9dKAk0GH!fGTG>fBE;;1dc zjcP(&n5TlN3d{>R%w?L)@(C2q6|9LukO0r@vqcJlrj_I-yll$X1nwwue^R1#0GD%%bb{MrBV919vOuHRuUMUM5X`IFlGW#i-`L2Ixk z3d&U#+5|UpOTcB(N`>TrzvueM29e}_5U|uOWd24W^QE+6ay7hX$@d7Ka`5GgYK(Bp zdc|Sw&i>q2F^6UFr1kaxQydkc7~HVsr)^qDs#o|;aLd*W`|`BVkmn=!!vN;~18^p< z?eOLaC3*yFqTGqEeVGK!N=Z-~W%x~#TT;Cm;V8EeGY%E024k%yqy zZgldzD5(dKF`uM`MM-tj%5x2%&(4NZc6mvV-yGrvUM5j zlGd~>tyw+!m4EX^EnL-GpoYsJ>!6dXC4F#|Gf4JQKOBB2TjJ;Yps5H?8*x{J)7Izj zJtZ9XAZ_@tpwiZ#s-A@{ur*SLLD&ZY#|Fbd>Mn&(h+7~0rm&T)28r`5l0WavzISeE zN!&nRSn(e@3_@?=+T}QqAq;s7vM{?2{8ubKb5*+c-_;WvBCR8qMFHTp7=~?b%YyEQ z#S`ysw+0!!=dSjar+O;VM%(z2q@AmhUVb2aVckMKvuqw?(%uj&ZyC11r~sweSRq8PlT zdPQQ^4uR6@>SIfe{X+i^cm{q>D7Z6)GDluc3i;Q7c-LZ>)_EP|I1GC^!&5yswF85E za9{7PeSN!zdk1z6!MD7BQiZGVwHKw~j;UwLx=3aR-i5ma=t1zX;STt#?+3&HSIReM z3zIkHL*c=Jp^>HeoQfH;20p0hAk9#*;u+F0^|KWx3HddwU$qu*o?BHk?93JT%IXD#bNN!5^4iARHjY(X_7W=Tb z>K|k~?5Q3nhd@&EZ88dHYtE6ErW$J{guDx_{xZ@5`~B+?f9&^@D)`j@3H}y+To*=? z{q;X5V^ft4ZX!>Z3lBGzTf?ma303I8K<{qkI2>*hzSb^40KVIJoYYKJHXS3`WjPFU zg!yPaVbU{C);wKvq&?0)2)}GD%41b4&mDM85i9SEx`k`b<&C)-o@$Tk3ue+CQ}UL* z+Tg4<@0>5Ht-$Nm!8ZnDRrg1??s%taN3?3ktTFHOz&8hGv#jrPuGxa3*`n6CLY8Zc zGXf|XOL1H!^wJq?$>pY)rEb>hh#MG_bDmL~GXEyPsN^FfWA(w?>x&QH^J+F{TraMo z5>`PNmBfvVKIdH>_l}O6&9z61>)ssx#ba>^qc50eSUo<45EntoaL!B03(E8A7u2Jg zS%c+VL)1`?*WtNb$865-=*FE<#GKvp64qkG7cFQHn{ABC8TYDJyU;(^`(m!HH}x@B z_nRFt*T$&MF=uyN?7Fn>!n#*GW}I~~XWfjwK4z~U)yz5FFDqZszNC%Tbk7v6i50Dx zajuOy*G6^R`+?1)8P6V!={%RYn6C8lmhPCY`nqaH*E&~PKDqkE?#XUpen#h+E%ssW z(HWil{lXQq9{;SfBDyLt;|#s1=VOv~LZeYK*bbcf8#l85Oj<}PcjPn75oVOY#8VjnQYYC5 zXQUoUCS~ALsX1kwi5PHVfgD*$D&fuH2z(^7N?1+;_hdHsBwGWo%JSm(cKA?ctHx(j zEW@0{9&4P$IzMH2hD)X9?c%eDnqw(5P9kz@=2-B{t-bpO7*0xAAe@X!5al!lQ9)A? zl~jtTqB2A^l_P4X0x^v$5w%o>sG}N0JxxP2P%WYn9U)vgH6xnfguI@Z;if#-W0^`p zj@V<3&qNL+#_KSNyvPw4Qk$Vs(LgdFV70?}wL*w==Aa4pSg^a@tddSTr9KSk?;9)1bo1P&!2QR<;wQnIdu#&K1lp1Mb_(3=`kg|E7y+jK?}S5lCRH{+ zab1v274CuO%pnqhTjn5X0-q(|4Nl2WB0Xg6{czf-ecl4{T8ZWN6aP=0KZTY}x5B^eZm(cIoAXby)b&&HGT}Xs5v^@8G0gE4c*Ak-|Ba_Cd>XDe z-Ky?bEF!15ZXm8U@&vTI8eok}3rAgU@>O`t74$wiB}a9z$J#=iGZ!V~+9Io@dwM+t z+*ZFd{0}U!o`tPzLwxi2y;w?hd}UHxH^M=;6Arj_@Jn|ZOyT&Z(crFMuvx`z;(BS- zYs~8`7F9L(b-pHEY<)3(t5DzGOs8Zst%API>CjSa4c3TSZ{avhX4i2-tEWTBf#z$u zezCU7(Utzh+KS79DV13%=@DHrLD*q~W1ck90jE8+rmeINPbggjo6Mfh$00@ zD{Rks-MT2*Kfj&E_Xkx1E!dq~_E_^f;5WJLUtqom4e!kx``-+2&94j&J<{(kz9}h% zhLhi-O})DL$i!9HDV{=qC{VMQEWv_mi$7T|K1hRHBzEMaaU%&)EQ4r4<(GEAo}k>q z4$6i#SP!Ex;BAQS3}bM?8<36D8a#X*AS71 zhBDj){y*7^Cv_owuUIdOJ$1vW;%d{k9;(DII+!1oNnJjRhq2Obkl_^Pm&&iS&* z^Ki}QVPEI~ZOJXuRqc);E;QKNH#pEY(z~N?_-84RmMHPJ5?|`P?koEpX@e)rSCB^dVflASOSGcmAR!;Xua~srjjD1f!T)isRgIl{ zG6ws?TgUpghJ2z)-$1xO6bXg5guIf7XsaL1TvbOxJI6vJqrIcMheKZWYRa~ep|Hr{ zs%B&~66)L0J36#I6dr-UR^21r@aO7DG7Nibj*=I^UVDrji=MBQ5b|4iyRHQJeOlLv zxV*lOtbnKM?~-a5Um8KKx0b$34o9O6`9$^5&lS(m_d19KiFw6vVr;{*h!@-(EH51Kq>^fKzD9{l_#j&xovdg6_XYz$B z`buD%w&{k!cEtCJZ&sIa!^jW2w_eH zIpfUyQR}O%M^{g-KF~g^wfume(H7%%-Rz!C-*kzeN#BImZ3$~J;Lj{bht(VAq)c|{ zoAS$9wZHOS&T4+U>vGo0OVZ0(YiHE%>kjvE`3acpco80~mPN0v`@VEzyMg)f*K7eC z{@BoB6~;eVR;3H$--}x+gz?L=i_cr{CB}(}0soU&bHu}k?pGYyj-wstHz7slx*xCOC z(MFH=|3X4GL^o`^LP%eF(XfXf) diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index ef3e1e402e981cc3246fe1ba05c5e51af6d700fc..773d47432605c370372ec1bb4f67920ee3531192 100644 GIT binary patch delta 381 zcmbQ&!g9Bng?BkGFBbz4yx8(8#cv~T_6^30$(1*9Aivj07q$wd#U8G9zbd(g-Db8_uNPvI*y z3|VX-lR=<}8^oJ@`k_A~`{c&eDwFLW2{Lj_4t(S}S;mud^5RGOs$f@u)YdSju-E|U zJUbK<%)usne{@l58OXq|j0_C7IO5~;QY%X0b)zg2S^I509=d|<#%egunuL6-HH Jeu$AV1^{$;fH?pF delta 369 zcmccH%rdWqg?BkGFBbz4XsvyfGI=9!_6^1jlPhmz@iM0{W;1~#io{AdCvOapntbVo z5}PKIU(tVt$=`3(GNw%~y=lpFgI8gJ?hzpcc? zcy98~`!bvP?!97Uya{AZXRMn%@j)Nsr^yNrJz3AzFk~@Iu6gLs$TqogwaVlV4+R<7 zCp+#^nym6josna*&m(hHPLLBopoTGp#Rf>{*#UKdT?`T}G6(S{uXuEEa@}J|#$}Ty zKTcqK%*wz}96R~v<0|Q%tQo$S_%ts7k-p;%BeN}9AD9^pY(FqC8f>2P#E4OZlhynK z11cf5`O#B1CdR*;HD63+W#r$y|Lq!PM$^qXA2V4PZ8l%~TFq_X#K>szfdM=D5iI@% MS=MKIBO_xB03FJL#sB~S diff --git a/litellm/main.py b/litellm/main.py index f0238a6e7..6a48c7f57 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -98,6 +98,7 @@ def completion( if fallbacks != []: return completion_with_fallbacks(**args) if litellm.model_alias_map and model in litellm.model_alias_map: + args["model_alias_map"] = litellm.model_alias_map model = litellm.model_alias_map[model] # update the model to the actual value if an alias has been passed in model_response = ModelResponse() if azure: # this flag is deprecated, remove once notebooks are also updated. diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index b6deb2098..b6e37a7e8 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -62,22 +62,22 @@ messages = [{"content": user_message, "role": "user"}] # test on anthropic completion call -# try: -# response = completion( -# model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn -# ) -# complete_response = "" -# start_time = time.time() -# for chunk in response: -# chunk_time = time.time() -# print(f"time since initial request: {chunk_time - start_time:.5f}") -# print(chunk["choices"][0]["delta"]) -# complete_response += chunk["choices"][0]["delta"]["content"] -# if complete_response == "": -# raise Exception("Empty response received") -# except: -# print(f"error occurred: {traceback.format_exc()}") -# pass +try: + response = completion( + model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn + ) + complete_response = "" + start_time = time.time() + for chunk in response: + chunk_time = time.time() + print(f"time since initial request: {chunk_time - start_time:.5f}") + print(chunk["choices"][0]["delta"]) + complete_response += chunk["choices"][0]["delta"]["content"] + if complete_response == "": + raise Exception("Empty response received") +except: + print(f"error occurred: {traceback.format_exc()}") + pass # # test on huggingface completion call @@ -98,7 +98,7 @@ messages = [{"content": user_message, "role": "user"}] # print(f"error occurred: {traceback.format_exc()}") # pass -# test on together ai completion call +# test on together ai completion call - replit-code-3b try: start_time = time.time() response = completion( @@ -117,6 +117,25 @@ except: print(f"error occurred: {traceback.format_exc()}") pass +# test on together ai completion call - starcoder +try: + start_time = time.time() + response = completion( + model="together_ai/bigcode/starcoder", messages=messages, logger_fn=logger_fn, stream= True + ) + complete_response = "" + print(f"returned response object: {response}") + for chunk in response: + chunk_time = time.time() + complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else "" + if len(complete_response) > 0: + print(complete_response) + if complete_response == "": + raise Exception("Empty response received") +except: + print(f"error occurred: {traceback.format_exc()}") + pass + # # test on azure completion call # try: diff --git a/litellm/utils.py b/litellm/utils.py index 77ce504cf..719ec6514 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1468,8 +1468,6 @@ class CustomStreamWrapper: if model in litellm.cohere_models: # cohere does not return an iterator, so we need to wrap it in one self.completion_stream = iter(completion_stream) - elif custom_llm_provider == "together_ai": - self.completion_stream = iter(completion_stream) else: self.completion_stream = completion_stream @@ -1512,7 +1510,7 @@ class CustomStreamWrapper: elif self.model == "replicate": chunk = next(self.completion_stream) completion_obj["content"] = chunk - elif (self.model == "together_ai") or ("togethercomputer" + elif (self.custom_llm_provider and self.custom_llm_provider == "together_ai") or ("togethercomputer" in self.model): chunk = next(self.completion_stream) text_data = self.handle_together_ai_chunk(chunk) diff --git a/pyproject.toml b/pyproject.toml index 20675dce8..f51666c0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.490" +version = "0.1.491" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"