From 21cd55ab2606a775578a041df7a1e1c43916106c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 16 Sep 2023 10:34:20 -0700 Subject: [PATCH] ensure streaming format is exactly the same as openai --- litellm/__pycache__/main.cpython-311.pyc | Bin 33783 -> 33794 bytes litellm/__pycache__/utils.cpython-311.pyc | Bin 108632 -> 109276 bytes litellm/main.py | 2 +- litellm/tests/test_streaming.py | 391 +++++++++++++--------- litellm/utils.py | 49 ++- pyproject.toml | 2 +- 6 files changed, 275 insertions(+), 169 deletions(-) diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 48f5427417d4c6ff01911041e8654c5ee22f334d..78da5f6ab8e2926add70c5abe665c9a906315fa4 100644 GIT binary patch delta 361 zcmey~&eYVw#JilAmx}=it~_H+J-?ARil5PLa|ZucCXN^;28LROTFK4vLd9l`(>GuC z2xF2^HQnKQAw27XYW79d>?=T$L#c!H0>9E`8*fwbdTWNA9tMuS)(j`LoW()(Stbo% zZHBX2j9`v7m!BT@p=n}ogTW5z7`B8)t$vb^ci~%Uk!${>Wm=HSq&~f zZRWE^CVpzn=hRq%>~q>`Ac^xruF4?#ikYi4h`uG{BnG1IXgcwN=zHc)3LyHSgoiqa ze#PJ{0HWUtIw^tZ_a=%U>O;L4OMx`Q2Pv>^AEX5fbeKQrILm|NzVJA)f#`3-EM*c5 z-^9U6zDWp{X)=G)bmjy}{WNyA1JQqt9Kojk7lqIV9V{G0K@ta@_?-B`3>P_94KTxt o-Pr)l@U`G_^b?e1KIq3JsZgvX`qmLx?YM*-$dmFfI6t#_h*h-AkpZW24{1#1UqQ$1w>=Vr! zwOXR;)m#KMtyLRKbv|uVg;qy}7R|4Xr~j>Kme`NBS(`vLo75>=aU{*6wP};+(cfy( z#%VVGnWzb?#kX0T{IH;L>CgNS-xeC#rcESF8lT`4;v!F*O8wikcI|TCGHse~rf*qM ztc#_^u1(+PXy&#St2Sewy}2XC2#sCW5$&{0yMj7XSM5q)K%3h-$ZK0j|Hsw_RIIlp3i$Ra-(-x~YHDKH<4Z zTS|C6+A@MWw4k<}7~iQ~OHUDP1;Jg~N`kvt@Yj*xR}B<=^B!$=86oV|))3sMT~F{! zehW!%qBfCOX(kr0a*VFmJk)SU(lk3YFNst09V8?l@pGrvLKufMvBKio(tF5Zw~la- z$Y7eETuneH{f6Umr{}LXiURddZ|V4MYNbxrrtUQFCQsPg(b-<-vKh8eSQ_@OF280t z04JZcd_tHObvAXzbR*N<>I?X!C+u_kH93Y!SS+CuD(teH%_w*<@4=Nvnh&o%vi3~T znEs+MXEMf|wvUmuYIOP-^;F9Db7T#nx}1h1)Ya*ea-7*BXeNVcgX?{M7VJGAmlYH@$6I+jsn%_vi+GDf*Dg_E|XrY1R^N^%xD zB~5q^DgoB1+|1~N=cs3ftWwWr9?#`zIL&Fy2j*p}C2Ldz^S%(zupzk!;D6vfXWpiu z-|Gv-xo=gcvlh)}JvrqJP`CnMCID-JC3Ynq2lLFF0JNwzLspndnom3ztwx{yWYj)O zv(PNfR?VT=d^Vpw$xT>UkM@08%?{S1ll15`a#n<;&(q-#v_|=ClJJ6tLu38En^GWt zNcx?r-b~L@HCd&(vLu{A2+@xwdTi?B+4=cBwjPTc%HxklD=3_;-hFC#WN@|1*3>sUI>IYg<(ctOE z)e<10p`fbCnGwmzId8G2Rlb!u^mK0BaUTKtJ(F4ApILu8v;J&R*@5}DUvq$d-YY6Q zTzI4~_847!rUaW@DTB3J7GvI=+$B{^bB|agB+vndmJlE-kNuptraCImTL^q>+Np-` z%;w_%2{4aL-%F(9pIFHLgj|=Z7&=>|sEX|T(o?ix?h(4B$0SUHRcG zpjA+HN{mDxr3YQ&&pozXqgB%zMe5gD8hrU0YUIQP9S9l2$De!b^b|Mc9tSbw2sjXd z>BoKqxS6qmtd=lbdGr%5RoST}OS5Con!|7DN%^WcC4N~;*h5f-m zy?0Yrpv_40b!_x$8ZEc(>_HeRk~=9KHchd}r-tQ4qhv}6o>*x%tJBDBoIJ_=rpNihu3!(4B0rEHR} zNSf)Mn^ChGU>yJ)u7u(YCq+SW7fDVpjjG|V@k#dC1xvuuwZ2eiFc3OR7*%ujWm3oX@qSjZMU8qdzh1ah{;>Jtu)2O& z-82r0kO~D@4scjPEGDWzXfT9kMlmd>xH!D(g<;h~SHB%LJ7T(h3QA2)3&@2Ha{ui4 zndl{b1M3N38~ue=5g6baN*zT3m+nddE3HvPadmA$1%ysPIZrtA18^^+I_qkkS(d-$Yr! zpy>fc1_5B^CI_6+!1qdkuPN{FN;9qndT$+mO4I`Lq%a&?J?&k-lUAwGvtg%o&=G>$ zsi-j1gcem?;u24(BgG?$p;N`PN_mJdp~$TSE`j`YHNOPNn@Z|yFt%ae9P|f_^i>$C zLCTl@JuBhf5fLFv#puPcM2S5+NPJoCiEv+i$sX?XPIgP>AR$BA% zVDqGEE`3nc_I_5HDV!!Pm0ng;u@}AZOqnKWK<>ly9stN&-lUe76(=$9B~)@BAcm0O z(kg^Z)SYDu#dGSHW!*H&wdGf(i1q58(GQm+5o==cM`g)5Ok6suxn`1hUmdJjJq(t5 z(rz;2?3Col!ZN5nuUTA)DUxzcwyA8S(tg6t{Gv&=sg~M}QI<8)?@{MnGb)%HI2Q@6 z7(@Jl&aSZA>uGP_=<&A6CUwo2spTv$3~c1WmCpBV>}n+^IIrFBqugSLIyRE4De!Y6ywFUE{7!~h|L29|XckjP8MmW!8quNeDes?+SR{yeGBiN5l7l`*-| zc?XL3t96qnRz83t=0}3;jg=FUAXEGLG&{ z)i|Yyc5qZ=tK5Ln;{+%l;kof}i3u`)PXOs@0-+QFi77okrBW34{&dRT^qhl0;XGcU zbidj#r&c_mZkaQ;4oWpLWz^NXDR!A_;YlE3k^?68>6~RnP=w*|dc2!_ru}Z9iUlj> zPPJj~urVBXu)56zyN*hHBRij|XfzK8Q z-YD0oEmsxHJ3%d`%{+wCR{*{W@HGI|^kXPNZbn*ru+{C`3d1;wO17Py>L*u?hNxL6|$_^H_u1II5ebrG>xne^BbfoRzO@oTlk>);M(#)dZvnv8NPaL|&h2UCuRwVv>dh6- zVcu|%$04tWK<`Q)yl9_87j|)%R4cW1Z)l`N)!4YCp4^@K_UK9Kd}EF}+?dDOnMY4~ z)8iVF-vzx>0KBYoT0H*tF6m1wSylxrWi4T;E0+|{;3<3`eUemy$AqO{O4_M%s=8}Q zbxneM7Vb;v$#P7kQcR8{#a}O}6RJ18X{mL=pa{kRdO!L-1Mp3NmjPY};E=;ff(wsZ zV&E}#Y}q4bRNKe8qDq4%tl2 zYSRt1xokFAdoHW2pnnw09=c)4@sz=RPJHCp&)|;s0nZth>PERK#x0?JOs$Wxgt-U(JNFPb1qaJ)ups+&Y6Ytk$fnr95!= zx=G@r-fyg1Y>zMzsKChcc7?*h4mV}xZVHlH{hCjnw3(A<9eqexF1u;tX_USDI>=*e zmv5tUe#p~82A?z(X^PbRJ8G2g0mKKApc4!5X|b&5_v*mLYB93+$&JTTGIpW^G$+@q zHJcm633YUHP00qTSb5TFIKw0Z8k%V{XN%Oi&Eq18Wmt`_qX0$&fJ500z*=J8`Bgj? z0sNH!O`aSavFyIlAKnz(fv|Hd2hNKCP^^(gBS*dAk5I`f$GS1p!L~6uM^SeS;5fkR z>c?#(B3S-LmPTsW81zWWFJ!yv`?Hhe!z-HCG0%QIy1&mI+4pL2m~gOgSw>pNc6WFa zZC^BA_44Py{yo440DmA5!k&&7QYJ&n;djxdMs>E25>tEcZSS{D#Udw#0$W;WEhQ*~ z>{Pd*V$K|M5jwqQS|JGO19iMJK$%$fmSsX5R2$?A3lM!=4kMknWA1>b!zb;8X=HYK zq^Bd~?x3T`P4ZWSWekZ{ZS=Rd)0&Q%0egHcG}4hl{SK(xLqDTz_J;7y^_gI69eO%w zCHXXVOmx>BOJC2Ggi{auSQ*tH#`cCSqh z{ZoFged{kp=?nC1dcM1;6wf44+oC?bakS`FW!pv*XARq?N48Tn#l`@sA=v5nnz8Y1 zR8kJ<4PswxIJxAL!!e5@fMNnhrZ?CX2)o0)RGKGrsgdS4Xr}mY~h(l#`Sv`Ui$hXWHQpfYd6`Fhn{Rt478%VDs&43c zq?m~f0iK)AZcQfSc%t@FZ|RN_L3*FIvqzM=Y0O0A<+V*tmTk6NosNt&-AX((OQ?~8 z0@#vJaWqhpQ)>5nanc;!^Fu*AoGVRcqs%1l#%R3Rx@X|HwLozZ2&GX&Od=$bA;9}u z4E(lt*RCz%vRmod$Z82nug{HqD%cg~0AQrVwu$geMyjXN@80Mkd)lc+?r}w)2N_-} z*>Wld1RZ%fl?;cco8r6~MW&%5a}#CZa3d`quTNfvs*G@uTm~gibjZQ3PfiDsoggv; zr7HmT(PLu8#%DrPDWeu67hWfNWa2iLkep5ZGHmLDJ#$GbWA`ph6B|_M_Bt^~ow&VJ zT&7;QyYTGM&O|lyj_1e1@JSL~0WSwuY%Mq`Bqv2032yJ8RFMw< z$v)Po>Vu=iSJX8JZ{DyD1l9x02WS9j0@y(0WCOqgJYEm50$?G)DuA^BUjhnUV@1^p zY0E}DE(h4FCfqrKf>YC-MUG8WF|N1$&iyIoN@X+nPh7x)PPLR^irPXjRjKI~JVt4|1Ks9}dI(=w@bD7SiA z^6R(Ki8@^|4cQ@j#BOJg&~n=lfGjdEJQDxSkCxYCdB8%%b|0QMYgp6gV!VW>G|!ox zILh+)y9=k)wrb4j z%yRlYo0WS{Yk$Ga(^>dETQ#Hil_S@P3Y+D?+>s47%lF33E1-Wb+R_{HZ7*h78?tRL zW;+`4^^b25g{txBxv3nQM}tY`PcVOc%Ms;i2zbJd+4P&Thj3@8jmMUk@PdMqFw)>T zVa_35pkAl`%zlyI`{93Of4pPr}{OVvM4)RiJa zBp(eVt=-Gj?1!dBka-wR>EV{TeccG61={Eql50;MrJO%Ki`uVfZ*FS<5cF&KY#9Ds_ z&EE#7M%^d?PTU?;&wr!vdbWquXbO)u-twFvd}C+`TvIM2$=|whXK4=jB#2#NKZM@z z#=zd*x^KQR)cG==UswP2(zVVf(CwgF_2bJk%27ND@UXh?$9u(psM425&E_@3E1p&J z3c9`u@KXXOou+EJ={eNKRcCGro>RMCt{%%Ykkat(^knq(=~DRLRIj}}CUOg*Quc$x zcEd)=x%?UW?j+z!r>tRnr;l2BbAJ}>GI{i2Ji{rw0=UN=#LB%u&;jlvaPp#atpq!T z1NX??qxn3sVyQYex*!tW>CUL2=+BvWC8${w8#`B(M3rId40f7AV2!_qk>5r`nva$& z-!_}d{~_ie2pBoIRB(4vDiXaPmM@}>?W+c*hed?YQoNqdFrsk6YQH|NGlc0!K7vsiMYdmGb)fd91m#w+t` zSqE$eFQJmP@iq-hnIHKkx8@6z*9gfcRK=^6*)E`5hykN_*{eUuEoKS{Lt_f&D5~zA ziLNu41FtXExoNi`@pCl14)6at6jC8(<;y2aP?^G}PHHvQ$Fl^YF?xa2x<-wbL z2Y|=r(U@?;4A=_J0foIlpBizlw)i~^IS7ZFzop#gM%F(EbOYd5012C8!S6;LtWUm+ z(r?t$=O#qn#xtv_5~bgwg)QcLc;p$skGesQB5pPEJ+!i+Fe5L~BX5e`p)N07U58Bd zvq_u;NoED73GycdBBq@*p@j{D&EcnLK8m`yO(gbsuH2+e9IHEFA_-}lx&sMzULS$t zhX8*9_%i?kYo>Vw!TNYY{qfxr(>nf%jsSal^MCV+a~dQ{wBMyjMU20wXLbL)`{8W*TNCf`CA2xf2`HM1^BFKAin)Z>4C+=+1OwPI_j zI{)&JlUdDIb5G<&W;2%`q!R+|y%VgW2j|Z#7eo(P%yEaJ{%8_5Bl7Gxz`;pCOlIOn zH?cl%o7-fH_IT1pEX|~iaOf7oqr9#6vGX=TQT?fpZz$slr|20$hOvNpEB+~|psd8Ojk{N8YEN&`Faae~R@m;X`b z$R--g)yRKL;e$l4P|KI+_sV}vv5Oh%(a&d@XWyJ*JOHJzj=qi3Ghec+xXJ#HU0|;?HPR98!R1_%f!t{({aQ+88cd17&R1D{dXCj}3JH}R6)&iyX+=Wzo z&QKI{0rCiRSF6&`i%U46qAeJ0p!3l*44?p@@SZwxL*$a|Vnhx`qkza>r3|HVfC_+0 z0QOY88WQ~5P2!``&g_q+Qj#1=n}(SY!&u$v%-J}?Ing&%h+RcF9T@yS0cHZU=_iWC zxN0m2S%8W~RO-$(RafB;RfL1ViuNEKua@W^7mHaLXg6iJUY}7Siig#Mm84pNTSf>& zbn04}*f?wt=-&pwi3Ka=6a80ZqS8ET?INN^#-?z%(;W(X!d)TBE0hDnzx1+lQB}-t zluum$6Lgp%mUX?}TrNiVJ>aVq0J@+vVM?DvvIP~at3vd`(o6FDY=^Au`+@fQ}lD?VtnNn&=gbHycKk^a!d;&@iGtR{aF^0y@020Tq zJY>|7u%viMn9M+Hk$(nRfH&#h!+~`d*tNcH1&BW5eF*q0_GI`r7GYTE-}l<1D>eKQ>)lUE!h%BOi+@C=;&` z;{8Iog1C9NFJ*>UVb5g?#FC~hDAOp`+h>VcBe9^!u@uoiRtK$MbTcJj4yONfmZ(}9 zUqMEW^!P(QH=XE2O@O=)ua)K0(Maj^Y==fA*3ckX%mrltTUy_&+2UD|`gfWjT`R89 zKc6F}WF!>)qCR7;s4~OwTJ!>_=xd`x7a@M6-+h(1(#fwA+4YyN5+jxO#$oyiO~^y1#>~nE?7VBLs&x-0h}g)mr;xyY-rpqeu+IGUb8VTX4tA>&SVl( zA$TuX;T04omwc8Al*Iq5s!B zu~5|N-<>b2heHmW5YROnMvCnj^*!^&*xW0q_a`Y-O>ewp=N-t9X0L*HFWEwj#ryno zzNpKXMSbKZ@LQ`-Y!I7Av8b(lxXrU8t@E7V0b{|{rJrgLE-_dCV}rnZ8Fcca&s`wi zbn-FaLA_$3D9&XMIun#%B$i0|Q}xD$)XSxBT}U%(&|g?6Hqyi@uND_XKp(M46qdvM z$nv&OFT>vAryTfg)I{Iq4CyNtiMd4Jo<(A@Xw=_bBz8nc-*%07qbY#Ff&iTWym4j6 z&1CINR^l|s^zPt|(hr^;W+uA`gyJVH`jN#n%4&>Ks@ofdCpuK5QJf-v=PVHgPPXTA zeeDu4r3y?Z2%CpFy9QG1T|(mW=pQc;Z6c(vYZ9Zx-TJOZ(W+-G5qZLzUhlo13G=)hXK9>Fo^7zmx_f>KTPL3{hg(vMjz26QuOL&V$6_FsMhj$$|Or~ z{@fIs5_*B|UnZU%$D-N`TJ#q^zWqY_ioMx)7R*-?*sxMn6iu5;Dh>hb{gF9w1PLA&6aZpB?WL$FzyKG%adfKhO zyizu-zx{pLtHfot2n)xQ3&BenlaXm=9CWBhZ-1C> zjcp(ErAtA!1(@z2bCZ`-yJ1HzCf`9byADi;lOrPy?>^;+-E^%wSjqj@i+SRXzTwT{ zys(~2Re|!d2~SuDzSO>h4oQ0~o54DNG+XqTv`n-2h|M;2d}O}*kDJr$-HtfQfL9?k zrgx?&?l@Hbq&s$RDZ}8SeM*SOEm`% zWl(KKtY@s2-tN65y{Fy6er=Cci;*0c+*uAJl@=W!O-oN6m|h6I=F-F`xW$<49rhl( zmJ$_Aevcae%R+Vhg(34-Sm;H~*ud7nO9KX;p8TX`M0J09etfMMdR>gnJU2EP&A&2M?$JN?ieUvk))w)^(1Gsmcr^`Br3btsrQrXs zZ@#d=KI1B|BzwuL-AHWt(<4?`Mq0v%BhgPay>%`I)a()ArdX@RLhWTc99nLSpw(Tg zzoChmNM6%QN~tywCTEBXWIc*-*+~%!cawbGWL`Wi!Q*uRL4e7GZQeSy;}M$?zTk7Y zP{*dsctvg+-|uW4fg@ujD{35 z+Jz*CV%~_|jz&J4xeF!Ut=)nWUvPQBod)0o;8+*oZQViOd37Z-!}WtL!nx!mvXFyzq|I*?jeb=7R`qwroGxlOQ`FF3)No+PdF$-7s&mib5EIAbfGI{(!5 z{Zp47XzI_bIzF*Kb5wuk*#68(XY(rcQ=3I)mNl*Fyd?(ov^Fv3atA$r_St8lam4O@ z^JXraV)@~)#aZ<4wUKj&(!bMFikH{f&Sd8;ueF`25-8U?me=W<+eG_B&f0DAe#rGf z2q72Zn;Y65@Frw9Ap1{g7j@zzeM-A%iu6OSd4u1g#ui1kf+8{s*$%+A0y8m<19l-< z;cN-4R<;C%1~wfCTRsH9X%}{`=7FIHbpR8_K>-zHBD>;0`2NWvfiIj4g86!VhiISb zbgZJ*B$$&_HtR1@qo3{&6DH0T4ibJ6Twt|3M$tEME&}G8;~srtK>T1CuhZScC2eJH zfjx767hhqt_d80HF|s;>bi@}o%k`M=2SA5`Chr3E`9U$#0gv{H-X0WP(c?D06YaYY z6vKp=uMgcK#?R#Ky_5Ft-WxT!2x8+C5j*dnd11iu@cN>cZ!)~WK#RYXUPPedfWvyv z7Ev?q35-2p9T2BEcteS9KpC4~TziUWf8O^Ry^UdW=25wO3{KZ3m@6$rZ-dZlF?9c# zIAbGEb_LDNa573r7J|7k`tmN?RkOO;HAjeRTv{hc1sz&GbciC%Y; zC~?Z?siC@W@eXmpF4pS{_lTz1%y61Khkkq*3c(ZqzK$TBt+QRl2jtT>!$xl$(`gBh zG+OW9BPLe$p!at%DvN`yjgGM_@VWR%mtM11lskSzC{2Bf_lh%C=Qn}ATd%sAHePM| zzMI8lCqydO>o43ameD3Zd%q}k{EV<}(kJW}H}RR5zRXH{ju-cfRbrc7dO%cN!;?2V zg7G1AXy9pg$G=Ijh$@p89dL5!SQVE|39O&fDEsvT2SlT}2<^BU(EofuJT;5y^Q_jJ z3jZI@n&runYTdfm81G?>Rzr_ls(mtKFXxXnDmJNDNC z{q7s^08Z$Qw~2}7`ALzk-zLV@TmXl{e2=T2P(t)a zT}^k(d;>hJ|M50aW{$tCyP&V?cClG3M9f|J0Xj5wTkGqI7w&Ka#(W!~jeskIIgGx~ z7NR`ChqwM%JRl&i(_gRlhpFWY`pCP*!j_)^C$2qO zP6Ov2y8;>w!(dh_Q35LxmLZ`yW|TPNuFMKkeGg6VGkyK^-C~UM6_n51dyg0u{hkrs zA9ciZ7m^=R^DBL`?h)f{kv$mYAdN!F(A;H<+>0Bo=X3h*w7Gbttd3qirvVekkXXp( zx|ArT5TNh9(d)%hVhJ0##i1|Fxi@Z)zq`u|oL)iP7N(nC^4giv7d>#_p?`EAos7Bk zrHAQ==~Lp%nbFsFSQLv$CQ&9SfwiV4DpX-OLNI~L(PY-3osrB}`-UuNl9LH@LDK^I z@OQ!_Ckze~DS@6KWld_jipUY|CfW2mLRiq`x=WtI6rEa-xZi3Xdhz85r*toXDHaZU z4&2(AE{Po>9}s^R46fC$eL##V#`U9->+irD(Cz+>_4MAdhm*JK`tEu_%obvre&&eS z9{DT!loP;>F$J#Z;NJ)N$I;*yas4duNk;eOA&55a{L-))$;4=eD9V|PA9=hRFhJ&{NYBvGck#LElEc7#!;P7`1;(G!;|{`ugF;;85eosQp`_sZsPz`il607&}em zoVO6TZyx`9degT;&vtNeFbMZmeocHRCd{!3$`>yUKj!+sCATiH_sCNk=Aqh1Q0u_2QO$bC60#OXctX6whVnNy!vnz@$yb#MVjvMyG zTw|~e#Ienboj8_lxh27IOX3nph(oT+b4hm`OZLa9KW;6{tVQk~ca-!OzjUjYv zt=iaL5!$F}ZCWK2dbBNCHGQ|nTO$87uU11jZIS0oeQR5_iBxAxn?)O^S^3|1O~hC{ zUTxBSf|#K{^FxEN5Ia_FJk=zo2q$YH(VD4EAp%>qZQ4{%qc+Vm%hQ+}X=7=%Y14b{ z8@a4CMw`)V+t@z9hSnZ#)2Pj)wwKZ0EKi3v+tbmSq|KprK5Z_I)lanNQ6HUJEj?}5 z<`YV-jxeC^uqB7kZ@s6H`dUQvcW8^LWT)1_w3m2xX-lbm8I{Lr%ZcW0qPC(}xVp6l zs&|#PlCVefX^qsw9&HspU9GJq3~5b-ds#PYNH=RQ)Xm0y+Bz!#ing9iVBk#Lqv!6l6`nxA)#j;oI=wz` z(CJ*O{vE$4=fI6ORLbVz405Z`z^b~MjJ6{H|z^M?LpVyb7loPJ#Me7-P!I9dfMANoI(FKk8h5u zP0SIy)ZXM-!(zOa9^1X*W+5y+WHZ}%0!sCb#EMXbwL#88PZb0eb~zU5%K#XYjE=U# zA#0);)kv`n#v{c8aZsuVpS|X2SR^UTVpJfTs7_!V!TG(GvvH+2*>6&HNyTx)YJQej zb1Jb$wI+?K!mvqN;HjZOPNpw8gJ7_rm_5K1B&cll+%SdYqN{_3<>uU@5)}XZH+b$nGih2-a(oy~my9V7yloHLCi&6JK zj7#OE7dqOhQlM7NNuL?Y&f9OhF=0Rblb0UXfGJ#ZT+AC_8)z;iAk{|xogG#2aQYLY zo*4CH`7`C`EH?Xyp-^_I%xE67B`06KY@bWw|hMN7%&e zvZzPMm>%Iv3C618oZ)e8VwdQ#IBlx9C@ao4(kxXCnWciWYI|a+G)lh5N>8zoo_cIV z%I34f^U^|Ifzp2?{e6V(Sf3&eG5 z{P3#Tuquf$(wycwiRmB7z@#*UW;zf!rReBTt9zvSXU<4*t$J$s+xA-x?oWAaOUo_Muz;}M`#&4^;tqqs3u`urpA7i^0sgkj&| zYVY!#h>`25reUK6q9cHfO;M5Qr8lTK1r_3m+EP$Ttld~JC!f373gYZ0F0Oo+N-IR= z$%WPB=-;qy^?Q9r!aDS{P)fgKOE%&<%;hj3o7DA%mzA*xW*grQ>P-NcuatlwpiBL} zu*AW`8>iAnW{bmW^vJuYFR?FIy*2U^huK`cUQ}MR8x8Pm*t!C=I(OrF7oZuiT@@7P z4eIkMWO5r+48VQxe3BE@6~&9h{puUV-C|N-Vafbhu}Zlr?i-1vuYsi=79TGfQR%7C zV<(9-YU9}T*|5zMHj^1grzCaK^i?m8T~>@ykxosvDQ(1ge43N_NPQh}JK!Jy z(vU|0M+uC~PJbZibi3MV%j9o!dUjxhEM$n*6xA_GRm7Sunpubr!{qt6C7Y_=N3{Z2 zDcF`UsnpKt#Uifn(DZ8((t1IScTm?ObwE|tjS;u1wRN>sP_B^_5rfm;veldh-#|sv zG>|g+QP0&iUJm^jcDKvD#berW6J^w|;mSs;xnK;)btPJ3?wUIt>^)Bi=A}Y#7JZ>k zcQe~j+QKyTVqA52jCFkPQYORTgg$RFd8F%K;-uzlkE;8HWFN&-W4vND@0~zBzAS8Z>O77B_BaW z=-IHsmRVKo&*!25TQ4KtrD?d8OY23iksZ z1pEN-69BJWlaQJSXp6udYHQQ|<`UV~m$Rl{IDUkped^5G($Ie)3Fkxmie{u0fJd|$ zsi+$vN4^gk>}Hq%Owt_6p!J0EjC9W~x2F?JIy?;dyf*nN<@aNB+X(hr&Ls@9C!Z}C zd8%*6x+i08MvPZelQ)maV6%c1Rm8|5`l2<_w|T{?1(Y=G?c}Q3&HkY{X#=o&LP#|c z)SG@Yc93}?+w>`2NCf~=B{^$KU>Wi>pc!z4z;L)aJ86-U{m6MiUFlpLVjZ~g>;Y^8 zXaE*)ME7CULuM68ru(qDWLv;+Y}<)%$-Oh*%mVWb-i*YsjMxprGK^HWbOpAMJL~kz zpekz~Lrcnv=1Icn+t<9zR>wr30VC7h6$tt}oH+K9{qFE;9(lrQ4xUx?KG?CDb0D6M z$DR&yAiL%3D%F!;@g^0@w*XO!5)B4#4^aBwYK^B%r1$Oi9F0v3A_p3ht5jLXLZPdz z9pxi1kToY_3`da0N?kB%=3s$(u%q&_$It-N(Xf(3qdiNPS2OIu$?|oSbplu+>`vK# z<|6NpD$SQW_7NmU0-gpm6BzN-QP}4_gUr)_AE+i@b=nZ6{6yXAD+nD$qhyUF+v0ag z+K0$?b1g_U*=0%aY~?x117*Ry%?;VRYFK07Lcf8$_>SGq;1)W-YCH|(k113BH{e~s zI|PAL0(PRR%AZJjoZ%lO%KDuCGuEk?ucRDQlm7rjn+9Oo%|-L(@k}-xZgU&*Ps;tJ z@&ZjUEeRq+C!m{gbqI)kexnPhe!UKvW%oomvnUmoE@&9dyD+J zO4(6JtW@nNwL54!?oexYyhhv1vYlVeh^6O{nYIl=z?^+f+iHc;eML*UlGW&kC>rIjWD00|2T}P?t781D|)XDDA;+Xm! z;UtxE)r^pzvS~TYAe+EQa{Ie{LDPc=!kbC* z?w(yi_7X;duPYoK3dmt-bz%EJ9Q9ISjw*aWFp@c#M9VM{rsXT2THV?A`>Qgo6|mKa zpm~*M9&Ot^y91Hc&0GdmS?K;eIAkLQ^_&z-Gg*{liP~d*zX^>HVp5;j*CX;*Q?-HB zlcyy)LN>xYb?7UlAx=h@R|##A-v(O}nhY;?BZtNe}5CRg~;qEHpWdhPn=WFPV zecBBd_FIZ7WdwnEDu_sgRGor)ysZJ8?pt!rb}?};C5_}(zjS+?SW5j}L3YwcZ1{M{ zPQ{3${f)E5MZ4ah`crR3YaeLw6iAg5&>`r^iIg(zu5MbZO;0}w8A)4cH%`llv(@GH z$SPzd2K_YuX}{xidAS2Q8AN#2PC;rapj#FBvr1_M=0wVX%ZS7Y%{d`wQoX;$s7J1? zB`thbhI13uaLr!yfc^I=2~#VP3$BA$)S4`q?d2jv45! zQrQoDZ(Oe_k1n5^S3vAIc$-JQhxFp#-9h_pr&l8@*`z)^FiIR!nYVt`b2$iX0L%r< z11tf+waR&bT0E`+GyvuURsq%nu0oZKoQD(~xm<+Dzi`hb+L&&k=an**XJz}x#9u!?xD|O0x9b4I}0ds^v^qsg>lBB_gI24>g43ClpPjn)#4~?HD?`tC`%osO7WC42|4IAHD~qsyC-96g$nLyPm6tNYdJ zqi+ry!Ja&|-S5(z+@3wG+)q@i`iky5ZyUFl_!=zDEQ&#S`7r14?eI#!5A$E@)L?yI z-M1FO_uuf~ngns9dgt+caX|g+@!5`MjO~b;*S~V|*TL`YfTt;wVp#NU#XMskLnQ-n z48_L*Cjbwq=lZ`_eHTjDVIM_`wf-QA9|wGhygvgz0x0$ECvrBhJ;cc(ZK+HpUc z^{PcLPfN^0@(|#dy8h*B#A7P?l~HqfvS1fvs^(dweh7Gu;Dp0e?RI2*2M|@AY{uiG zYWpi?+M5Vg33 zj?-N}a^yohzY!H~0^Ce+;-a9|5C(X~YF5Elt5@)bxfqRN298-3B@$VNwbS2ej*eCN z0y^M5N4$sTqK8$&TKxf7{ij8J^lEnKB+|!FwS-dy6pVLu2C=>lsGsN3OQ`k|s_|*} zMWkSJ@;RjLL5kNHHmuw6co3ifupB<)$u}rfFb}?N z#;|0!<`G}D*dO$shEQD^9a1ySNqfFGV++hfWa^YvlmtP z-h_%*Fot6Q)}(2FAn_{lUIUy0Jgj!TSL!%}RXp2^E@Fooa z_shL8;UUU>167*Q&m$`3{V{nw_#t(;xKA~{Ut0Yrs{aD;I$*$(Snxf_gH6ddk$OuV zdw+ZgCSaK_sSc^-{k zAP7me6Al%yVX!&;2*nrL#GpVz#-Q^St9xMf4oK6~9frhC=y#y_TfpxDe*nBkkYxG? z*2i)6#;-@1*6}GC0(^b;-+bbj4vEkrKd?m5wmNE!gHI$ztjkRYI&yD+A?j8&{!hhX z5^sO{w*6_M5UJ|!4;SQr7342{qS>#$_^?LAtLi_GH+N9jkH~c8{qwv~29YG=@goO2 z6H`Q)NM!-C0l5IyY7SDv2`&hXv%egHA|`QR>q~fHb6QiEv3+AcdRv5GO5KTz=~cPE zXuV&yYZ7)v9*1SW9lZ4tP#9qW(#SQE1_v038#XW>c<=*S2@LxR96F6SNymRLMQY5G zMFwbmPUR6H=V104iM%M9TP67qD$Z3$KK_>2s}}#|X_LdO#h%@jMO*wk%L7H;U{M>r zWNDkrto|W^DA2iifZx67q?V|xsSgJ`%+Ax14>OF^=88N-Ub5gWis)nRhf^tf>fYwu zk%INwNe4Ok1(6)nci@v0L96pUpI%YOyl2po?`_>pw;Y0=HgdZZ=8`tj@)R}vvpQPM zH+{Cuy3|e?(gBDiAZud_%y66WnsBU9^SXl()-G(o0RWRV_3`;adjb)et+M_$h3GH+ zTZ2tZQHTCDC$E_XAbE#ykXSX<@;Fits*nGbCtg%ZUlfml!5DUa_hoJ*rl3aDfs;X= zM@G6@`^AjJY_R-3p0}w(Ula}J5hr0o2tSO|tt_P~oBK|G5ob$FLhWQg3PE?7O8!@# zeqg6?WVd@M&XbCgG(b8aLoc+7D?*p#7d`SSRD_!BPI8gT1B?LV19-qZ0|xlFnbeC> z&g_q-)F1(pj0|&2%TbX8X04p++|(aq6MJ)Wx1jUC0;U0A&E2EdZ1D#4{9-Y3Op!Oh zGeV}L6qBMm!&D%Z6b1eMqIN$;IEU%)6^J>BJZV^nN@v($u+td`x`JH+$y1b!(XZP4RI`U&61=Ks~@?{l%r?be8MaX!8Lm+0E51+KG`KIr9&MK)K7X z6Jbelm$07;O-KF~WC1VeP18k_Z4xDKe^w0ZKRI3OvnF9ohON_uc$L0lmYAM6u&w$~ zKQK!yF5)?q#iwoQZ1ebdujWMXQ#Me4ulApxMRYTI(GlDPofVlzwti);m{W=gL=Gg2 zzF6QiVbN^ximrdZR+OxaP8cIiy1W68lVUPqC6kxosj-|I8nKMGS)dH~ zD6YR|zIar`oudKLtH4eA2X$gf;(!XD*2gUnCFXLw9<2aL{hJnuF+x0{U%ODuR%6PO z^fwpMqWP;0*)hRHeWgQ;R`={q;lD3(6EbC(IvX`>;BCFRUJRROzT04{ggKH*YakxF zO&Q*DnUyJTXtWydNV@a&;>BW^{&u}^I0`6;6=>7{ST9EDIg3O+t>>E;iL&A7nip|; zdj;YFdbZG7px?PjjLR5Jtv-pR9J;HOnz=twnyrRKTghhPCE6x^v8YO{pf<7vEKbsw zEEZcw@#tbC0?0fdQX>xo9xSpTPSnpX78RmWk69wF>Tq7qA1(6yD;e^*G~|zQm?!e>yI{2SM#Z> zJ&|7YmEvRpdNC3Qc5ngnEd-l&o*rxzQ%W$1fj-ROOZSDuUv3n$9m`=6ZvElaqFSG| zN=z4@=v}MGI;fwGX+bDG{Lx~1V$`a%?CcoLnj6WaD^H~sVF||Tx2zMDjo|C=iFCks z6>SapeKU)&*qC!1Ap#0MUrjaW^f`n(c9k3-8p5J`wOHg>0~XuCV)+PL{5tVgQu?Wk zaYy3`=>O@Y$}>ror;;l5H&%-(tK1i(*Devm^~xskXeFy-4@5zK;Q%jKkRia3&XU%M zWsWP++2*J`67_>?#Pa`@G|IX2YtRu}0QY&dIq3-;f3xtM4T@d&xoFEe=~iGxB;3WRr3ESxQrCp4r%F9+EW zc!z)Cq>vM-+^}I+D1Qkq*!{7IV7Ft$<5pEx&`EDshpKV?CQ&E4`wwpt=fs?IaTbeC z7qEkHXyK109*ntqQirD25T9t!a$qQ*X!j$8+ z*ugzgjCturR82<9$Sx1vze`B77ZX?ZNIvtwuvqT_t2c?VZ``ex}fVBgP*jx zaN~<5qI;a8YG#DSb%R^U3u(lEDGig^M3!ol)S~}DD&b2>=^L8G*x7?d1yM!SAZHDJ z(h|dDCR`-GSkkjqOUhLzw`8qJ8gP1BQ=;Y&5uETnPeccqPNas#vNbi5CLLWubm=Xk zD#sL^@Lh8u6}A1jtn^DZGo_Wm(u%g8Kcw}obF#{J3^l^!@OY~(8n2$~7FEKkyEKuR zV~cQ2Bgl@VX-cHGXIZ;xQazk0a%w1H2@b9~JW>-S#CZ8do!ItRgOl`PmndlDLHiC~ z$aI7Nb(J1BX&e5H3IbDJ!?gI9kmIWqm~imqv;$mNJW+y~iG0p~g?mSMQW3*Idez zrhjSI<@%B~Q69=@SVK!w6OkRX)Zj%Xzkf2{L@mZ+BLI_1){r+fS2sQgwjmG48}kPX z^C(p@MuJf@5`vS1#7io}=EJUIGn#h-XsaBedpndg;+#$tI-T+=lzbgPzb>IOfw@=( z%w?O;a{E!p@tvEH;sf7Tk>WRB^lKt|{RL^%A^yp>oeSg3KQXIMX+MRh% zA4{&0+(d6xmr%IPjHB?Wk#|P(sWiL`HB!v=H1bkLeiwwWv>SA%g+_zTm-!OHK_#?{ zq>uRuLZ?dI`E_0T`7I)^_#pNhchS_v`?UEYzT`IrV;?R%RWR*z!L&04)Al#?JH6tx zb!KY8nbc87U1w6qoJp-dlREX(%%-2EHl0dsdMB&kj;kN=o*6m)oHb@r#(|VGX`_#B zKa*B*Cavb2!;)2^FYOS;V{Gvybk#o$v~xR9eEs4Z7w=#E){yS>T^(ZVw0Np{{`~m> zSPo00QvfMzsMRMYhNPHSuVm1{Vu;~pBigl z$E8usaB$8bOuM*_KDSeh&!`i2Qp6x!s_*L*x#m^4P5RMJ@m%9*@P!B{9in<+$-FV+ z-63289X#(OR*mEiKg9;47Rcw-U!yu~aqtmT@7pd)?Z>Ijd-@yOMVFY=zeJH_Y_ zYm5C%4&_#){r`pKa2y>p`=imFLGeb1d=TA%0ggbJ8{~(O_w5)<^~Gc3(23~+0(7y3 zZuroPTlpPo@VEGWwM(qFC!d12V#0*=zw8kQL;RZB5!fA|OaH83^I}CUbxQ#jR^WCp zdm4a2S7g!~J(o}OcMJ}#pcGx1j*GYDB`iArX+gjomqG>JsPp)LL5&ufuVM~S(y&sf z*bL6%7(nqG7C^LjBh^pga!Hr>oZ*XJ{F=7dz@+m6F?0+gG z{$UfV^v8Qe!(8S+UY-TFF9N^^?)!FtG&@vYANUvoD>JNgW0*p2+*7e$cdeLEBGK>} z^vcpXPLD(4Zu-G%#Yp>eRAqMmQ`d^qF^)%2y+^-wKb?@8^-uPT$qqP>8O~n>MTTB{zzNW`OHe6an_?cikv%;8CappAFOJ-z4fMfhxHVZhGNJhD6tX z1~PyH`qMXw31)z1Vfp7^PMFuYPE%?0#0p-RWx(zFhMPqp`IB8Yi)+LpEEjA3K>3!2 z?wIOo>Yr;qMmeSrYo?-#MCK}dk1arpzXzB4SR{PpZP4c*5Sz?FLM|+vL;6Dp#52^> z)>}o1-Bjr3`fUfrqSlvCrcWp+dxM6-B2-VFe~{;ffWfOkkAn`N}RE%m=&h_ z9*W*)`ubd5jCK47>DP3>E=GlaRzz=>IwA@S$lp`>Q~eL=qQ>gkgKm1M8`|R3HZE~4 zTeyLv`Q7ogxJy<=_r|Hmf#@aXusJ0=kcuIo`>1sHS4zxY121_z?Ez=Y?&!r@`Ltep znD&?x_3s}R!yKPd@1G^~8;3=n2*px+5(XG(XrM#^6UC^h7A+zTW)8}^l6iA4rM^Md zP|f;=divnkT@7YH)NWQ9s17P7shEKx`r#dQM!yP^4ewHg`i6=F@@Wjjq50_|(hf5e zb~~E#e(Xsw#Ufx|fcG4doeW2tIePpUD!-^ddzToMhu4orhPMNkklVd2)pT{(#XGIW z$L|(%>E4Py?;f#x%OBAMM+Uq#rZp(s_xC{k7|IO5F+B21=aWeN5Wp{ye}oj?B%AM< zUqtF9z+Gr|6i@{CP%l0zvO{kmT?{Az6w$cla6lOzF9Xa4)B|{P+)OFMhC33{g-kb~ z4e%=9p8y9sOa`#Ijz#J)3T7e&A1-k*PIl2@#lrWYOyk~>_`iQ=BFr7C^5?cw0#T?7fl@|8?5-xbqgktvu zzhe3Tc5t>9zrUU5h=i#Q*@gW*ZWdX1SG#A9Orp>$F=+`@_a8ha=2*p8{nz)3tJl~C z{b2QCIMDMkaz7m?N*N~Tca;~zf#n|~_tSx*$^HG`6wirqQ$^Z23&9I diff --git a/litellm/main.py b/litellm/main.py index 765c6ff83..06d938ac8 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -197,7 +197,7 @@ def completion( completion_call_id=id ) logging.update_environment_variables(model=model, user=user, optional_params=optional_params, litellm_params=litellm_params) - get_llm_provider(model=model, custom_llm_provider=custom_llm_provider) + model, custom_llm_provider = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider) if custom_llm_provider == "azure": # azure configs api_type = get_secret("AZURE_API_TYPE") or "azure" diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 6b1b2b9a1..eff0ddcb3 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -24,6 +24,170 @@ def logger_fn(model_call_object: dict): user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] + +first_openai_chunk_example = { + "id": "chatcmpl-7zSKLBVXnX9dwgRuDYVqVVDsgh2yp", + "object": "chat.completion.chunk", + "created": 1694881253, + "model": "gpt-4-0613", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "" + }, + "finish_reason": None # it's null + } + ] +} + +def validate_first_format(chunk): + # write a test to make sure chunk follows the same format as first_openai_chunk_example + assert isinstance(chunk, dict), "Chunk should be a dictionary." + assert "id" in chunk, "Chunk should have an 'id'." + assert isinstance(chunk['id'], str), "'id' should be a string." + + assert "object" in chunk, "Chunk should have an 'object'." + assert isinstance(chunk['object'], str), "'object' should be a string." + + assert "created" in chunk, "Chunk should have a 'created'." + assert isinstance(chunk['created'], int), "'created' should be an integer." + + assert "model" in chunk, "Chunk should have a 'model'." + assert isinstance(chunk['model'], str), "'model' should be a string." + + assert "choices" in chunk, "Chunk should have 'choices'." + assert isinstance(chunk['choices'], list), "'choices' should be a list." + + for choice in chunk['choices']: + assert isinstance(choice, dict), "Each choice should be a dictionary." + + assert "index" in choice, "Each choice should have 'index'." + assert isinstance(choice['index'], int), "'index' should be an integer." + + assert "delta" in choice, "Each choice should have 'delta'." + assert isinstance(choice['delta'], dict), "'delta' should be a dictionary." + + assert "role" in choice['delta'], "'delta' should have a 'role'." + assert isinstance(choice['delta']['role'], str), "'role' should be a string." + + assert "content" in choice['delta'], "'delta' should have 'content'." + assert isinstance(choice['delta']['content'], str), "'content' should be a string." + + assert "finish_reason" in choice, "Each choice should have 'finish_reason'." + assert (choice['finish_reason'] is None) or isinstance(choice['finish_reason'], str), "'finish_reason' should be None or a string." + +second_openai_chunk_example = { + "id": "chatcmpl-7zSKLBVXnX9dwgRuDYVqVVDsgh2yp", + "object": "chat.completion.chunk", + "created": 1694881253, + "model": "gpt-4-0613", + "choices": [ + { + "index": 0, + "delta": { + "content": "Hello" + }, + "finish_reason": None # it's null + } + ] +} + +def validate_second_format(chunk): + assert isinstance(chunk, dict), "Chunk should be a dictionary." + assert "id" in chunk, "Chunk should have an 'id'." + assert isinstance(chunk['id'], str), "'id' should be a string." + + assert "object" in chunk, "Chunk should have an 'object'." + assert isinstance(chunk['object'], str), "'object' should be a string." + + assert "created" in chunk, "Chunk should have a 'created'." + assert isinstance(chunk['created'], int), "'created' should be an integer." + + assert "model" in chunk, "Chunk should have a 'model'." + assert isinstance(chunk['model'], str), "'model' should be a string." + + assert "choices" in chunk, "Chunk should have 'choices'." + assert isinstance(chunk['choices'], list), "'choices' should be a list." + + for choice in chunk['choices']: + assert isinstance(choice, dict), "Each choice should be a dictionary." + + assert "index" in choice, "Each choice should have 'index'." + assert isinstance(choice['index'], int), "'index' should be an integer." + + assert "delta" in choice, "Each choice should have 'delta'." + assert isinstance(choice['delta'], dict), "'delta' should be a dictionary." + + assert "content" in choice['delta'], "'delta' should have 'content'." + assert isinstance(choice['delta']['content'], str), "'content' should be a string." + + assert "finish_reason" in choice, "Each choice should have 'finish_reason'." + assert (choice['finish_reason'] is None) or isinstance(choice['finish_reason'], str), "'finish_reason' should be None or a string." + +last_openai_chunk_example = { + "id": "chatcmpl-7zSKLBVXnX9dwgRuDYVqVVDsgh2yp", + "object": "chat.completion.chunk", + "created": 1694881253, + "model": "gpt-4-0613", + "choices": [ + { + "index": 0, + "delta": {}, + "finish_reason": "stop" + } + ] +} + +def validate_last_format(chunk): + assert isinstance(chunk, dict), "Chunk should be a dictionary." + assert "id" in chunk, "Chunk should have an 'id'." + assert isinstance(chunk['id'], str), "'id' should be a string." + + assert "object" in chunk, "Chunk should have an 'object'." + assert isinstance(chunk['object'], str), "'object' should be a string." + + assert "created" in chunk, "Chunk should have a 'created'." + assert isinstance(chunk['created'], int), "'created' should be an integer." + + assert "model" in chunk, "Chunk should have a 'model'." + assert isinstance(chunk['model'], str), "'model' should be a string." + + assert "choices" in chunk, "Chunk should have 'choices'." + assert isinstance(chunk['choices'], list), "'choices' should be a list." + + for choice in chunk['choices']: + assert isinstance(choice, dict), "Each choice should be a dictionary." + + assert "index" in choice, "Each choice should have 'index'." + assert isinstance(choice['index'], int), "'index' should be an integer." + + assert "delta" in choice, "Each choice should have 'delta'." + assert isinstance(choice['delta'], dict), "'delta' should be a dictionary." + + assert "finish_reason" in choice, "Each choice should have 'finish_reason'." + assert isinstance(choice['finish_reason'], str), "'finish_reason' should be a string." + +def streaming_format_tests(idx, chunk): + extracted_chunk = "" + finished = False + if idx == 0: # ensure role assistant is set + validate_first_format(chunk=chunk) + role = chunk["choices"][0]["delta"]["role"] + assert role == "assistant" + elif idx == 1: # second chunk + validate_second_format(chunk=chunk) + if idx != 0: # ensure no role + if "role" in chunk["choices"][0]["delta"]: + raise Exception("role should not exist after first chunk") + if chunk["choices"][0]["finish_reason"]: # ensure finish reason is only in last chunk + validate_last_format(chunk=chunk) + finished = True + if "content" in chunk["choices"][0]["delta"]: + extracted_chunk = chunk["choices"][0]["delta"]["content"] + return extracted_chunk, finished + def test_completion_cohere_stream(): try: messages = [ @@ -38,36 +202,18 @@ def test_completion_cohere_stream(): ) complete_response = "" # Add any assertions here to check the response - for chunk in response: - print(f"chunk: {chunk}") - complete_response += chunk["choices"][0]["delta"]["content"] - if complete_response == "": + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + if complete_response.strip() == "": raise Exception("Empty response received") print(f"completion_response: {complete_response}") - except KeyError as e: - pass except Exception as e: pytest.fail(f"Error occurred: {e}") -# test on baseten completion call -# try: -# response = completion( -# model="baseten/RqgAEn0", messages=messages, logger_fn=logger_fn -# ) -# print(f"response: {response}") -# complete_response = "" -# start_time = time.time() -# for chunk in response: -# chunk_time = time.time() -# print(f"time since initial request: {chunk_time - start_time:.5f}") -# print(chunk["choices"][0]["delta"]) -# complete_response += chunk["choices"][0]["delta"]["content"] -# if complete_response == "": -# raise Exception("Empty response received") -# print(f"complete response: {complete_response}") -# except: -# print(f"error occurred: {traceback.format_exc()}") -# pass +# test_completion_cohere_stream() # test on openai completion call def test_openai_text_completion_call(): @@ -77,16 +223,17 @@ def test_openai_text_completion_call(): ) complete_response = "" start_time = time.time() - for chunk in response: - chunk_time = time.time() - print(f"chunk: {chunk}") - if "content" in chunk["choices"][0]["delta"]: - complete_response += chunk["choices"][0]["delta"]["content"] - if complete_response == "": + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + if complete_response.strip() == "": raise Exception("Empty response received") except: - print(f"error occurred: {traceback.format_exc()}") - pass + pytest.fail(f"error occurred: {traceback.format_exc()}") + +test_openai_text_completion_call() # # test on ai21 completion call def ai21_completion_call(): @@ -97,18 +244,18 @@ def ai21_completion_call(): print(f"response: {response}") complete_response = "" start_time = time.time() - for chunk in response: - chunk_time = time.time() - print(f"time since initial request: {chunk_time - start_time:.5f}") - print(chunk) - if "content" in chunk["choices"][0]["delta"]: - complete_response += chunk["choices"][0]["delta"]["content"] - if complete_response == "": + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + if complete_response.strip() == "": raise Exception("Empty response received") + print(f"completion_response: {complete_response}") except: - print(f"error occurred: {traceback.format_exc()}") - pass + pytest.fail(f"error occurred: {traceback.format_exc()}") +# ai21_completion_call() # test on openai completion call def test_openai_chat_completion_call(): try: @@ -117,107 +264,20 @@ def test_openai_chat_completion_call(): ) complete_response = "" start_time = time.time() - for chunk in response: - print(chunk) - if chunk["choices"][0]["finish_reason"]: + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: break - # if chunk["choices"][0]["delta"]["role"] != "assistant": - # raise Exception("invalid role") - if "content" in chunk["choices"][0]["delta"]: - complete_response += chunk["choices"][0]["delta"]["content"] + complete_response += chunk # print(f'complete_chunk: {complete_response}') if complete_response.strip() == "": raise Exception("Empty response received") + print(f"complete response: {complete_response}") except: print(f"error occurred: {traceback.format_exc()}") pass -test_openai_chat_completion_call() -async def completion_call(): - try: - response = completion( - model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn - ) - print(f"response: {response}") - complete_response = "" - start_time = time.time() - # Change for loop to async for loop - async for chunk in response: - chunk_time = time.time() - print(f"time since initial request: {chunk_time - start_time:.5f}") - print(chunk["choices"][0]["delta"]) - if "content" in chunk["choices"][0]["delta"]: - complete_response += chunk["choices"][0]["delta"]["content"] - if complete_response == "": - raise Exception("Empty response received") - except: - print(f"error occurred: {traceback.format_exc()}") - pass - -# asyncio.run(completion_call()) - -# # test on azure completion call -# try: -# response = completion( -# model="azure/chatgpt-test", messages=messages, stream=True, logger_fn=logger_fn -# ) -# response = "" -# start_time = time.time() -# for chunk in response: -# chunk_time = time.time() -# print(f"time since initial request: {chunk_time - start_time:.2f}") -# print(chunk["choices"][0]["delta"]) -# response += chunk["choices"][0]["delta"] -# if response == "": -# raise Exception("Empty response received") -# except: -# print(f"error occurred: {traceback.format_exc()}") -# pass - - -# # test on huggingface completion call -# try: -# start_time = time.time() -# response = completion( -# model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn -# ) -# complete_response = "" -# for chunk in response: -# chunk_time = time.time() -# print(f"time since initial request: {chunk_time - start_time:.2f}") -# print(chunk["choices"][0]["delta"]) -# complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else "" -# if complete_response == "": -# raise Exception("Empty response received") -# except: -# print(f"error occurred: {traceback.format_exc()}") -# pass - -# test on together ai completion call - replit-code-3b -def test_together_ai_completion_call_replit(): - try: - start_time = time.time() - response = completion( - model="Replit-Code-3B", messages=messages, logger_fn=logger_fn, stream=True - ) - complete_response = "" - print(f"returned response object: {response}") - for chunk in response: - chunk_time = time.time() - print(f"time since initial request: {chunk_time - start_time:.2f}") - print(chunk["choices"][0]["delta"]) - complete_response += ( - chunk["choices"][0]["delta"]["content"] - if len(chunk["choices"][0]["delta"].keys()) > 0 - else "" - ) - if complete_response == "": - raise Exception("Empty response received") - except KeyError as e: - pass - except: - print(f"error occurred: {traceback.format_exc()}") - pass +# test_openai_chat_completion_call() # # test on together ai completion call - starcoder def test_together_ai_completion_call_starcoder(): @@ -231,23 +291,18 @@ def test_together_ai_completion_call_starcoder(): ) complete_response = "" print(f"returned response object: {response}") - for chunk in response: - chunk_time = time.time() - complete_response += ( - chunk["choices"][0]["delta"]["content"] - if len(chunk["choices"][0]["delta"].keys()) > 0 - else "" - ) - if len(complete_response) > 0: - print(complete_response) + for idx, chunk in enumerate(response): + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk if complete_response == "": raise Exception("Empty response received") - except KeyError as e: - pass + print(f"complete response: {complete_response}") except: print(f"error occurred: {traceback.format_exc()}") pass - +# test_together_ai_completion_call_starcoder() # test on aleph alpha completion call - commented out as it's expensive to run this on circle ci for every build # def test_aleph_alpha_call(): # try: @@ -286,13 +341,43 @@ async def ai21_async_completion_call(): complete_response = "" start_time = time.time() # Change for loop to async for loop + idx = 0 async for chunk in response: - chunk_time = time.time() - print(f"time since initial request: {chunk_time - start_time:.5f}") - print(chunk["choices"][0]["delta"]) - complete_response += chunk["choices"][0]["delta"]["content"] - if complete_response == "": + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + idx += 1 + if complete_response.strip() == "": raise Exception("Empty response received") + print(f"complete response: {complete_response}") except: print(f"error occurred: {traceback.format_exc()}") - pass \ No newline at end of file + pass + +# asyncio.run(ai21_async_completion_call()) + +async def completion_call(): + try: + response = completion( + model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn + ) + print(f"response: {response}") + complete_response = "" + start_time = time.time() + # Change for loop to async for loop + idx = 0 + async for chunk in response: + chunk, finished = streaming_format_tests(idx, chunk) + if finished: + break + complete_response += chunk + idx += 1 + if complete_response.strip() == "": + raise Exception("Empty response received") + print(f"complete response: {complete_response}") + except: + print(f"error occurred: {traceback.format_exc()}") + pass + +# asyncio.run(completion_call()) diff --git a/litellm/utils.py b/litellm/utils.py index dcf41cbce..b52136035 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -80,6 +80,8 @@ last_fetched_at_keys = None # 'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41} # } +def _generate_id(): # private helper function + return 'chatcmpl-' + str(uuid.uuid4()) class Message(OpenAIObject): def __init__(self, content="default", role="assistant", logprobs=None, **params): @@ -89,9 +91,9 @@ class Message(OpenAIObject): self.logprobs = logprobs class Delta(OpenAIObject): - def __init__(self, content="", logprobs=None, role=None, **params): + def __init__(self, content=None, logprobs=None, role=None, **params): super(Delta, self).__init__(**params) - if content != "": + if content is not None: self.content = content if role: self.role = role @@ -105,20 +107,35 @@ class Choices(OpenAIObject): self.message = message class StreamingChoices(OpenAIObject): - def __init__(self, finish_reason=None, index=0, delta=Delta(), **params): + def __init__(self, finish_reason=None, index=0, delta: Optional[Delta]=None, **params): super(StreamingChoices, self).__init__(**params) self.finish_reason = finish_reason self.index = index - self.delta = delta + if delta: + print(f"delta passed in: {delta}") + self.delta = delta + else: + self.delta = Delta() class ModelResponse(OpenAIObject): - def __init__(self, choices=None, created=None, model=None, usage=None, stream=False, **params): - super(ModelResponse, self).__init__(**params) + def __init__(self, id=None, choices=None, created=None, model=None, usage=None, stream=False, **params): if stream: - self.choices = self.choices = choices if choices else [StreamingChoices()] + self.object = "chat.completion.chunk" + self.choices = [StreamingChoices()] else: + if model in litellm.open_ai_embedding_models: + self.object = "embedding" + else: + self.object = "chat.completion" self.choices = self.choices = choices if choices else [Choices()] - self.created = created + if id is None: + self.id = _generate_id() + else: + self.id = id + if created is None: + self.created = int(time.time()) + else: + self.created = created self.model = model self.usage = ( usage @@ -129,6 +146,7 @@ class ModelResponse(OpenAIObject): "total_tokens": None, } ) + super(ModelResponse, self).__init__(**params) def to_dict_recursive(self): d = super().to_dict_recursive() @@ -1041,8 +1059,10 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None): # check if model in known model provider list ## openai - chatcompletion + text completion - if model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_text_completion_models: + if model in litellm.open_ai_chat_completion_models: custom_llm_provider = "openai" + elif model in litellm.open_ai_text_completion_models: + custom_llm_provider = "text-completion-openai" ## anthropic elif model in litellm.anthropic_models: custom_llm_provider = "anthropic" @@ -2359,6 +2379,7 @@ class CustomStreamWrapper: self.custom_llm_provider = custom_llm_provider self.logging_obj = logging_obj self.completion_stream = completion_stream + self.sent_first_chunk = False if self.logging_obj: # Log the type of the received item self.logging_obj.post_call(str(type(completion_stream))) @@ -2413,7 +2434,6 @@ class CustomStreamWrapper: chunk = chunk.decode("utf-8") data_json = json.loads(chunk) try: - print(f"data json: {data_json}") return data_json["generated_text"] except: raise ValueError(f"Unable to parse response. Original response: {chunk}") @@ -2430,7 +2450,6 @@ class CustomStreamWrapper: chunk = chunk.decode("utf-8") data_json = json.loads(chunk) try: - print(f"data json: {data_json}") return data_json["text"] except: raise ValueError(f"Unable to parse response. Original response: {chunk}") @@ -2485,8 +2504,12 @@ class CustomStreamWrapper: return "" def __next__(self): + model_response = ModelResponse(stream=True, model=self.model) try: # return this for all models + if self.sent_first_chunk == False: + model_response.choices[0].delta.role = "assistant" + self.sent_first_chunk = True completion_obj = {"content": ""} # default to role being assistant if self.model in litellm.anthropic_models: chunk = next(self.completion_stream) @@ -2544,7 +2567,7 @@ class CustomStreamWrapper: model_response.choices[0].delta = completion_obj model_response.model = self.model - if model_response.choices[0].delta['content'] == "": + if model_response.choices[0].delta.content == "": model_response.choices[0].delta = { "content": completion_obj["content"], } @@ -2552,8 +2575,6 @@ class CustomStreamWrapper: except StopIteration: raise StopIteration except Exception as e: - print(e) - model_response = ModelResponse(stream=True) model_response.choices[0].finish_reason = "stop" return model_response diff --git a/pyproject.toml b/pyproject.toml index fb0e5ae29..768235679 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.675" +version = "0.1.676" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"