From 1e06ee3162e3d2480b9a718f08f54d41b3bb8b36 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 28 Dec 2024 18:38:54 -0800 Subject: [PATCH] (Refactor) - Re use litellm.completion/litellm.embedding etc for health checks (#7455) * add mode: realtime * add _realtime_health_check * test_realtime_health_check * azure _realtime_health_check * _realtime_health_check * Realtime Models * fix code quality * delete OAI / Azure custom health check code * simplest version of ahealth check * update tests * working health check post refactor * working aspeech health check * fix realtime health checks * test_audio_transcription_health_check * use get_audio_file_for_health_check * test_text_completion_health_check * ahealth_check * simplify health check code * update ahealth_check * fix import * fix unused imports * fix ahealth_check * fix local testing * test_async_realtime_health_check --- .circleci/config.yml | 1 + .../audio_utils/audio_health_check.wav | Bin 0 -> 29184 bytes .../litellm_core_utils/audio_utils/utils.py | 13 + .../litellm_core_utils/health_check_utils.py | 28 +++ litellm/llms/azure/azure.py | 129 ---------- litellm/llms/openai/openai.py | 100 -------- litellm/main.py | 226 +++++++----------- litellm/realtime_api/main.py | 8 +- tests/local_testing/test_health_check.py | 56 ++++- 9 files changed, 188 insertions(+), 373 deletions(-) create mode 100644 litellm/litellm_core_utils/audio_utils/audio_health_check.wav create mode 100644 litellm/litellm_core_utils/health_check_utils.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 5d132dd51d..032f697c78 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -69,6 +69,7 @@ jobs: pip install "Pillow==10.3.0" pip install "jsonschema==4.22.0" pip install "pytest-xdist==3.6.1" + pip install "websockets==10.4" - save_cache: paths: - ./venv diff --git a/litellm/litellm_core_utils/audio_utils/audio_health_check.wav b/litellm/litellm_core_utils/audio_utils/audio_health_check.wav new file mode 100644 index 0000000000000000000000000000000000000000..f766d23e940732bd14c21c64fddb32e75e680c65 GIT binary patch literal 29184 zcmXV(1)NmX`~UCEo!Xh*o$X?G*=6Zix+Nt<2?IfFzu1C`EkC;hyD(4`gHRM{5J~Co zSYoN|n4Y;abMOEC41e!yc4qFJd(IP|C(d)u!5hbn8Wo-ju5?g3DPxqe%2j-Br?gZGlrp6ace51DxL_PH&KPE% z3UO5!=XtWps5ee3tZ_PK4G%=B=vqO3d&<&qbrkXokuz^yf9|XpzCwrEro>Z&{32YwR}uGL{%SjE%-#<0yQ@43+yS zN_$2rS9&X*p(vC7FBm(GWyTiTA2PhgCHiip@59D+uD$dfH-xiHICj8uvQom+E#bQu z*|kHaNlL2HR_TEZis&%~nmf{WfnuX=x)L>NjBUnxdOr%?A*8sOe(M?Ips~UD-k4|1 zXY`Fo`ZRL)GnP=*9_`$Oj#?{r#*7#?rO7aiT*e9-`-}s|zs6#o-j&cy964xcW({MU zh6Cx_s^mduF7&nL)khi3yKY=&l&w6w1CIC5dI#JIPgT%(fqqXQRS$REa1r3n2ID6v z{K@#ySZXXpmP?W7N#iU!OM}W(WPE|qcEZCv`aN%)LS74vPmOPk<;Zg_e68ZwGtg6r z#QG}#Q=V4dQ(jP}fxHr=(*liU!ch=}mO$mr%3aD7Wr#8yy_SKKE+F74Wi)q6k#^?< zE(Rjymb~(Kbwon_=r>k!}SknK67b_6}xpu7;uMdK+-qj4Nf><7;p zzwIy<7@rw$8ebuWl?ja;Lkhnmxg%h-WdftANZ6&Q@Gx1KseGj@Q+`(70`FbXRZln{ znBX`Ktc&ecF}5Fxmm(t!=mougU>V~4(awyRa7uqi8P9a#$=C95(M~(l`kY;4k zZ-LSZYK!@$C6eucYUvNCzL zHBSzMPO*6>Qp`qneUxiJY!Ak8W04_7j~U6>Tapq-_93G`PmD?Ug~7B^lyg|vMI>JU zy$wbsdf8yCLsIL&VFeOjZ~VcXjc9Olf`YS*FZPs8dqufS?+$pZg3|p7U2Otw>(KTI zWTqgwi|AVH?f|r3#zHdDRFL+^(UbgV=T3~)JK^pm)Vk3|OWIUH*HX~4k5SLjJ_1g% z8PN|;PN0d4&{j!%3wC2bQ6VTu!~SglUq6)~U^BgL0~1GS*Am$$D|Kl90^gi`&g7Y_ z1aidt#Gs@M6pvSKP{twKFxH~+RJVk!U&Ou)a3eId0~f;*9RDB%1m<#sG;8alfg z&ALHlF}~t5l-AR)8Tr(}#c{l}1++!5&;1FWQqXY;NOLp3j8_Y`o8TZH36${HuGI6? zF`ll2hvP`P7-V$=BmEfjDn{!Kb=B~+5s5E{*HetDA(w-E-;6$l_n=XZHD8a;+e4Yy zxk&IFy>3Ou74*9q3t5K-y^LUE%p>4+FaEurSH#F;%(i%${b=zbJY>N0MWk2VPxvub$T$on#5jxn2SGk3yRLJOin6TJALwi1piv7x4f?#08bhUR0? zmyJXPJ7FYzD1nP8$VtLxLdYu!O)6S4(6?BmTWN+PiMw%-QUWJgjN(IXCy~}xY+)IG z|&Bi?88_h&}CMEiUsKMo0wM#_T|mL&eS z0Itj+Bnf@=g69n6;$ut)y!S?1%KhE9c0)Q4^nlaxnkP2LDG^5r6Q98 zTHJys?niH}kV+SH*9EBwrjmHKA-Oyx7y)IMxWv;g0#z%J{X#fD1ePul^De@9GqzT5 zG(e5$P@{#Cqk|!g(i+Np;+=-$;|sy8g)s($kg;HG1a}8;e<;{`M0tYO zwcw%+&r6;mnUz?q8LLRAzZOvFXWTPTPzh}th@BF{j}m)V5L?B&oab{D{Io=x9TJ`| z8!2Qls+V6cBR>U1c7QJN2a0kWe^Up}s(JDZnm>-FE^((W;d=*w_v;hU>pDDaS0o)_ zJjo}jkWnN3U54X2a2vr>oj@+fc%1?Z%hB*mW2!O3c!?}&4SlDf%|?*3mo`y!DA|=C zyc`8pXRz}$Fky`$XqC+04{F7$ZvyA%;phl5IR+kLTpoUvtXt@mtl2}0!$>O`uF4r* z!Cq29V@4uFFz|NO*tz6Peb8B3w4H&@O3_e0Z99RKM?uDeT=!tD)A3M+2{g2#RcrVk ziX@7_eLm930y{b*2B1T-9mypnYt7+ZXVeIi4x^vTNd6R5txsTAXXa9jWTJ2>Iy#Kc z`I8)RIi7tPI1^k*yjSR@6e{~5>3+y}B3Y@-rTTK^6Ui&+SrBy;d3cQy+U7uOPxK_7 zvK3EE!v1bT4l+0Jg40x_SOGE{kw7v$n7Jzvb{COCEbbh7@FV9#WO1$NZzK}3gQImz=}2n?_9oG77&N!WQ`w-g6Hgmp zLvSRsDl=LVkD%jO{b*P`Zv@Yq2Hiza-y7}uXy>C>JFO(@)}ZBTWEAvJCa->#Ms-Q>}T@tpYY{ktDBjN96;v| zvi*(x@>3#~&oe&3c0VLn|A2@(7foDZZ1D`0yjH^DkKp1n;=<>&{E9x$AqxwV(m{ye z&WTLKs|pR>kZ%P0sv(O?LqqMrWHEF_c*aiV1H?hjW$3vKcM^qr@QlnfENHP7>+6fQ zO36O2=uiRU<@kbf`o4t>?{=ctUBu(==*|x!4xlf|Mr)x!QEJe?_@+8&-woFbh*Ckk zeOcnlW0VN}YP5_({bgjbhG_dsLK6S-L>1X@J~$~r@7=JxNAM@Z!C(=#*Bd1C#5yM+ z^$i~B(v&mX!|SUH6;Hv zu~KHS2a)!-Nckh^{+&BN6E)|fDM5cTUQ4`2Du~ZuBq#irf$H%{!p&!ytuK>@CEYkCiW!50sacXOveGbDDCHlR|sRD7w?GA9VY`U^Cjug_Blj%tA(c3OOCa zdvD-61^P13&INGX3h89RK^S?4m z!c+K@GxQ@_P)De_nx3YUZ%b~T%GCn;Qt)Il-%F=A2R3g-W|?SlC|3tsNdB)uQ5av~ zLi;X!j?^F=w93aj0?`?GwP3d2q;rUrW`q*Wov6)&yx%VD&+89O?M z+mq5cwP_Z6V%S=57D$M9v;zKG_2Wb^F3a|t_ z@=1g0c6it!NJJ_(7txL6xKT#=2b%UEr3Ij82fr!MC3OOs5ldvdh<%8CNj0Jj%+`U? zjranYzx>7c2hrRB#;zc)wbxg5ZcR- zY#(~N2J0USf{M_R$Y2B+%Q#RqgnMP6MKmdX$;2ngf|@~y#Cs2Y+33eb1gb&q=ixq> z(6~xZQXk9%A8mUy~32&HHI&vK5(}r#}}O=!cCxL&f4jWhz{>XWTdl zjPY9zoEm7=hekWnawt*!d0IZJ+>N%{pigT;E?G#hD8IQ)oPl_7Cv)$}FRvmQu6xg0A&w zc?sGTY=@C~J``lr|6#79Af=L+br@dvGr~F$UZ3!t8PJ`FKBd~=Psr2CbEDA36f&;6 z6aHB8x4zg?Kj<-#l0=Yg(7G^zsI$l7T zmQVW=*x!eYPVlRiux^Q2osdHw5==*veHpI^yR*T^aANCS==d(>A>}pYZRJHg(4BBC zUZW3BNeswMTaf}{O-hJ8?X2&&d#uTrtu z!uuk)S;hN4q^IF)WtJnep+a=K4;wp#bdK^8`eYVZ2o;h8%j~ox*>Nx04+X=6(Z+Rn zz^-sn0#|)NN>?m+DxUEH?58C-G$CuL;r77q-hllKLK0ojbSYNQhr3sDHkofo=6Hx} zANY_OtCQ$_50+&}Bpo$$%TsqXK5GL}#f`%KXZME-UeU4tz-^BTDSA zfMS_-r|`^SKA+@{Wbj$slbo>{p4LOp9uRM$hZvkl9()93ouj9AX!%Vnc035^%y^x# zkpR-)gyf^NJ;$^A;8$|QHt6VD+Vw=*ld$s_nPcCd$Xng;oWXs$^6<+wa5@hgI0_b} zmM$~U81iZ$=jnx(`+}Pwv`A*`f_4vR_cNjw${V3nZ2LIx4fxDV_{hY1q%!{>aytr5 z!o5@y_7kK2M6MU1RAxC1*n&KH5=m7f`!Fr5ph4<-2eGGYB-s`@`?xC=2v-96!_iwa zR{0-Ui_AM464sKAM9-tabMz+FZx16}MsC}Yr_3y+l4Au^-I1BZq6T<~BL#^C9uTpE zrzKZY(3a$2jodrT^H(I&0y#`Z>erz0ZXohDEbmrOI|wPahssV|W&ARl`uYDUkE5l@ zv?>J!abl17>pgI^hcW7*?-<_?@<~=!&NJFTtnxN`dk~b%nnfA58DVUp=?oNXKAQliAT#2jTf`J7Zl`eUnE`1GFq(uwE$VU5L5Yo;=| zAC3G6MXS;AdOZ0*P;&-)Q)$sDVb_fyUhdDQHueE|`yAs7<8?C06+Ce!VewKY6aT!K zSg->Byo$NsKYW@)9=ZTuB(dBEzMObng_lI$_TV7}kJk~poCyo6!%9_rw5;?bVV`MG zIf#}L)m#bB=tjd*=c+`{)xgUqWw1 z_Se5$uANZXUye^^DLkWs1 zX>|Z=@+Gp&_VBa|@3aVs?4;dcBzO=h$jV+23`w3^2-XH;DN_H^_%8K)nG-hfDGRP; z&YOddBmM2erl3er}a`HRD z^8L)s9)yQZNI`lS#M1@n#mP&9N2zZ%qqi$^KY)yP67lxJ>3TG}hflJu8U+pK@l4AS z^M~)5c`oN#Lfl*poX{XPDw+3{O0)-0G=vOg9K4ld zUjv~l3G^j#WkFqg_;JCN)QDSiwL)Gc&|E+)9Rm05kgeo6L&0(!jZ2J?s?bSjy@Xe& zaTa+KOeC}9aQ4~`^$m;7`W_AEa2B7b+$ zzeKQ2*y8h4P`^Pok{=5arII8S;3G&;YVD$pYGk1&5LgDCGAow|Eqw?0l!2V0e3y(W z4&HL%yd7LkS6)@#z=z(8?#3VqiKGE+N@`+aO(pczk^8xLD3ONL6q6IZSAywHM6y-T zRRJYZBNcije>@GZm*Lchto+h4gFWVZ8GgNFmnoy*K%#3Rb?WI%c`_^#{WQSgcfHQO_(-9T*F1X86AnuCPoTQ>U5LXL`Z1}vOKj}fZe1z7k=X!BtaZnSYD_B|X~+rVjm zdb|zaHvtWmL!H9iqj2^w^4SjGp@dCHPAQQr3z}pfiPZh9_|!VE6M?^atoTnfU&CkF z)gc+}9wa*#j_Sdq>~ZJ>Z6UC_5_$iH^rRQ5Fo=(r)%mXYjBC*80H`WLS~B;}WW*w* zD06rh_Lc+RGT(GE@`x&V}k?F0oRH z(lVpJfc7*jMg?_JS1af04@$31R94)`RBAj$yri1dE)fNC>9K%}WjfyA!Gtdm4%*Rs zE%*@0i%yc@w;CE`wMS~uJ)mAz7Q|b~3`b>lsUiibrV8>d!jT8<@5cii2RU)j;Z0Pi zWnEnAnX;;~1Fw0ERx&#}jORFx)Gf$Ke7YNHNHwt#s#~C^%ly_1mhzElI#P-Es z8BIDKLMo7AJvpGP3(wSo=#}X0Z}9jnQDYayb?;T_=t_zeg#wr zmr|1mBXy}kdKp1xhbE9xLQQZcmU}xh!}0tbiR9XWh_;NGlfaP3Le{9eV9{NOqocuR zJLq>qZ4vqy&kW(YL?46ElC1KyMs6ZqSz|r{1>5kgQj0kZ68|MHTgtq#0;*&$&gBHP zQnBC8ym1SE_wl|M>wlMPChMVJldrFY>-l7Jb3pB4uFdH2GJN>yL*|~+udFIbHA9D9 zKflRbAqjhy{WaC_-vBCQ*MY1BT;!Xqng_{cIw6a4aMvDmx2L`Mi*=A@a+_1Nxpb4fgoAGkX0VRZ3}Ex zY5~`Ph==dM6TVK&rYEVF%@rc9r7N*x9NC-u}Ho%w06K2ZbEOPc$a+*63ZKr zw#@&XM3(a)((=Dh%Fdc(sweAh5MUK#XWPgUVwzLn6E)T^4&Mwl^D zkhn(wQoE=?dhMA9^n)^)PfArZiQjd6kJPmU9kobKR_6s1vb#*>mCZ|5?j#aPP0K*D zel&X--RRttwWB)j$?9=?q$`#6;R!jJ6L@V0g|d1+fE=J7EhN{_(avrzsk>K#4B=nA zKnh4M;CC;5bphB}3%!ynUqBNFz^ZtG z2EK*B(jKh$JMb!V_!UT1Dyql0?_?fYODm~CixlOrtRi**8zJsXE|ZHkWDlH*m1jbI zUu-p*8B{e-nW0(o`qPY7i)>|1f0j>aJSFQFmyt;vTDxJvBjH_EryFT;2%7(37Qd4g zk^y?rx_EcdysQwWfDxfgEfQ zDrB{z0ezhY9qWnFUy}FifP*W038Y`i!#aVi-r%)O!cJw5R=~GA61B^r*plpA8xBov z65dR9lx#pdv*5r3PIX4?!|yl3@hEWA7pi-c9SsEU*F%dN#Fm1s{$@C8-^}UZTZrFd>#+$|XBJ1SPWDL1q=Q`$hJ# z9YDX5g;~M8%)?}rORCf|f0O!IG6+0^B}(Sg8u<+ccX`-=WKc3^6~7~^vf>?^6R3|G zSM0e6&oc(eO+ZG26JBi+vTB8XO<1_hA*HIV^0b2AlE2cA!fKxEKl&Zb9)aE8dL_5H4BH!aG?~sQE zW&NQoE!r}gtghve8OU0?tk)G_eZ3PAv_E>z#D7V3N_I%Zp;dG#^NcE}K8S4mjCvUl zB5U{+Am~4SO~v9)Fy>mceWiLMdjh1cQ^k9TR}=ipPOvSs+5z_m`7RmXX?j1#FUzR9 zKfxNE>@r%-tmHG}UgLfu?Pv5U&jrz7Bk!lN0I>lxS2oy8OZcPf@ExP!M35r80VQ`X zNT9kD&QpX~BY z6VSWN_{18rpjG_WC3=ub;%WTPF-EGyi^*`V41i)?hz)&dL4!Y5x@50cf-);v3amsA=$ zgCeP`O;+wB_n(OO5w9YclKmLsGiCpu?D>|OY7#h-JYqOLN#?z>RxbIhRQgAO_3`+p zLD*hTejkKyk~OL_`jM5$BJLI?;)xZUpG1bI!Jp(1D##4u56WmU4E<$b?I)4?5!y&~ z?liKhg0n4H%%!ezfm4jhTEh#&*m?;wcs@|#pMkJ3^qA8nv& z3=|H8=7G>IIFVI$*;8&oPm-NUwSN%NTy{QnLpp8XT;(@6)XQvL*2bl-FLB^6Eb|By zRzQc$1J^fX-jYEVd)(rPLA@Q3ajDy#`jeb3jBcQcJ}q zW#2*!X-FKC-6PFJm8^tk61>P9TUI9}e?0`h_2?oUIm@g>_G?KdAc%Eqa#GBzJJq;xZaL0PQkElB%rCMe=xe@REw1P+JIUq#hue zl6?!Z1}1YRD|f5uTPj(SgP*|veZrh#Hu^sT|1NrJjaGW2r7SLqM3P%r;9F|CvUBxH z4PH>P8GQZ1-zz)L0(j#z`j>s?5_{|5NOo@JK~Ycg0m&=Gj=O=?CdNMneq;uA3CYWh zSt?rMM`evo@>q#`amJRNdt!ZIym|`WQuf5lOuIe$kv-8zkgWIusrpHFl#iY=(24@L zQJ$=#_dNI?j|Z4Uyc@$c5M;{!w-P9pJwah;ZA43Qu0Z0qtV2o-QBHicgW^6|{Yc)$ zJ4kgzszI_VUe>)DK)mc9uLk82XpxxXl7 z0S!`XlloEsj7TkTAau9pmmIPd4~WR)v+P>yh3D&qMx+Wa72*Q8oRHwNE0~v=qKlqn z1xxA}Qol(?zSYP_te^$$Wh_CL>?j$Ge;&>(=LxQ7@zW#lP1hms9<*@Ko9u-v1C6rd zw;r9yTAZx3RB%PVO*AZE#h>YQ^_U?X)ZW4}dq> zc`LbYUu;1lP#-80ZOC3-CupgMCaHbM?yGMSYbxvbF8;g`4Ig4Gsba|5d1Og17%4mDRiP_`}da&xCDGAs)y+3&E4@lMv6F2PUrM43gij#iI5> zo74|v1x3#CTta?3xhs`v*%2eVHziw^C@kkC_9XULeM5dX7l})Dyan!mBQF06zZGN+ zVgpu1R##4)aF{7f<9+mM)?Et2`4lF08b@G5%IC%0;yBCA$KDC6iwRrBczC6#`iiIRauOqtS?2 zZ5SgTEc75xx{hcdYgyv?r3NKuyR3{Z)ht;fl6j5L)rqHMHK_%dl33D{X9p&Hz+vn_ zYQ}jWLq-tGmz8Xp8xLV^N%oS>z<#fyhjR4R0PWk+eG)WCY{^9`DcD69tU-2zN&W01 z8u-q54=Mi+{Zhk{-MCUE`IhrRa}ttY#`lxhg4BLnAk}iH>j#~3N^CDQ%G_GwMSFhh zfG-)u>kfSNQ0__XLC_jxjcx!>X5!!EoS(#E*$FWcOx;HAd>2o(0d4(3Szl;Pry95m z*~%{F9ry^TMjQq`yWvRal)Vj2Xd{S^ONKMqi7Mwh_VI*df06;TOyq3+5;L}|kYfS- zyJ#u3Td^Gn-!r(1p+#2W#IMS(^sCU1)J67y#G{P1gI>jMDtSqiY6~A7;LAX=DR9&Z ziS{QS9Y9YWT3v)b*%dS|fslFlxIdAg>>HKJmW^@i6E<;W_g)wtq*9uVK09J%veK9Z zB}2Gm@4AAfq=Hh$-z#fra^BF#3|h{fWx$c_+?C3xA5W1(R18A-TC8Rv@;FG}QaS(8 zc-nZGY(ds&|3#~RLDRS7x$ocu|HQJy?`0rU+3&j>x7x2dKD8<*YXpn9$_a{1!xZ+mW8|U70|+R5_(?CNtBX$XRmvK0F~6 z*cf&ru~EF8R2F0{YYSdNR)z+FrR(uJBhaDDC8ge-4R-E8U$TbS7fG~ANJ3(koQ{#q z>;OpEf+vaL`(!6T3ZuwgVL8zu-au+&ZsN4;V3#?w?1z=z8#1SrHAmS$DSkv&iKWti z63xo0i|nVAnz`)N_dwl1F#HhOa#IPH-P1ul+6Bgt6C_e~KaTumPqS1yPV;MkOIk>+ zSk942wLtQP2(>V&MhbTgjMjvtq)upHJGpQ^h@A3f?5Z6jNfoMV0yE+#p>kvUZdIFSmaZuCaN9qJlN6|v$SO$`1ty%m{TSlD0EV%a&{dr1>l(ggC}%<2X5=JeUqJJ+3rN;;rIsnPG+9GgObz`nuD|&tCuU{;{AG}N z5S*<7@$$h8CnM?89!_@{FtjNFLh^>JBopoI);yh9SJ-c$&PX5Yju$(Or zFD7d(ZPBb$^y}f*i=HH|$u8k-Sf7O!Sx_OnPyE<|%!|_#^EBCy(TI&K!c+Z5PIHQ} zBg0dogrIKO8PFf~%m1=Y(^GDfx)|q^AmhzYK zE&KDX0t>P(l|d`12+MwQIZ1Z}8A**!c7f(Yvjq#0y@9f7d7fB0AHVe(x|XxgU*Nx< zHYOP(j7L~|pO1IgOFLPOk{VbfffFy0$bcgc^h(5+8i?#EX@c%Jb|`p})qXSbko9J% zjB2!%wZTD&x`V7sbiyjK(AeFRRWp7kFxYEK(EpIWROBYsY$ zW*|{gW*TxjrV%ZRo}}I{d)A{!MOK-`+Z7>=eEQh|lD9zD??l>#@H*2NW^^|u6M2`y zv*Z!7%l`m4xKh`aIk=k8xa_Ae@vUPb9$Bd~R#BOj(=q*_`ZnY}S$UFJb}M+AL2fXP zcCufk7*BZ}*QkU??SdW!bv@uh)}a+_&kx#`fq?^HR@Nt+*tSH?OnMis%bY?^xX3w| zW>E7Dy4b}mR95r;FkT_XEP`HHh4`Lb65qn%a^zmmwF`Mj9n6o6aut~yZDAZa^CMaH zZ(vURr4DXo|H*e)@=@-|3ZCTgosp`X`aMPOvUV=#O2q@}$axEP@UZebzGWnt#BFdh z0K2u58%l*$PQr~rQeB84|Kn`;XY8<8#qS!vB!pIEjwE|Bhw(%Q_#B3Y_9A!L>2Zp` za#Bau&g9J7L~2>m@^Qw#0h#sYi4Kg|1w@}QdNIDrso`Isdn-9kXY{=dy)MQZT{#hB zg|Zx4NVMNU-#_y@N3J=Nw(Y@;>^7Gypb!no&I7@TM2-qLlv+VL;~c|kFJL#aO0*M< z&!v~3@v!o*@(%dC3vc!x->-qH@x-NR=zjqH%kH9X{3d6$#wFt0dF;3vAC-swIp89O z=4FqSoEaI%6TPuw@wKvJaVK%4Gx4?xOOoo*D%!q`*O-Z(&a(PH6|0#@-z|}g#J`RF zwg@Eu&9giBybvut53Y}at?!Mo*zg88Feh@Y5@ib*T8>R+kR#{g2^S=+LFz&pRo)+< z%7dpKgv4e4<~#U_t7zGU5g!EQli^MBY?+Bx<0qm+`5dk8 z$8Y4)OD=vx*1wrdQE&43;di9c z@)IcC3cmWHvv;9+B{=^JJ=S9}XASY7-Qf2&_O{=Ote*hux%j2_NW2JcWMxuTN|!ON z?8X$N^dWj#h&KVFCo+3c`B7QV-xeU$$r_)`A$s6nWR5Z(D#UL|#Y^V&Cqc#$5G$vw zdSE9dw68~#X-KsPv0T=lUo`sTmE=UV?0qc5*Ep~r+5eghN)8(%=%ojUZi{dF9v=2% zL7l*u6$~o$vK8c5lqCt@QDclmJ5zY_6kf!NX40`hIUOf6SlMV_hK=Dubdf5lvnQX7CvX6HIFJVT45|@7^ zl5IriQqy_^nck1reHOId3`Ns;qOGzXu7`5xTIAJ8blVHv2GOf6XUKb_0S`J{jy7d> z^Dy+i3e8bQkTV$l6LHl}ZY8@%uY$uLvd`L=m+{v!jO00DOx#6|%TUT$a=5-O!^BlJ|hZ|L{`B!G`R(+QqYS zB02`Icca~j2@4yK4ZQ#=Zvtb_C@;~6yfFdF-j zPtWrEOry-m2iLzC|dy3w_HfuMM2FB}Tf5LUMxhDSpdA zFEQGeC+=laIh1v*KS21mM4kt+lx}G8HnO(wr~pmnWZFDrDtX%*M5H_UH$XCpABAXj z9*FLXT}gFAC~gL~P3TB=5ELR?`L`=l;lm51zhfD{C%oxq@a{pURb+g(GI}-=ko6r4 zI_O0_ljtTj*a*4sDl#;gt*yceZ%}4K^&X_M5s7)Ry)p3S15NMa2W5xogHR*?Hi<%R z(FsY*zLP>^um|3E8Y7Lj75vwZE#zp$yjRneFB@+zz&~_6~E<=xB zfb;G6t#cru6VyrNp*ucK_RCE{9z*bOL40^F2rC5{d+GB{o_P<7WM5Ggz3xF0vbRmv zEo486oVfBJ70J@%9OOW>)d8wpP%Y<;p>C1^&G#yX78BZb=v!9IHnb$mPOf>EQ!LP#IoyPSbd`5>(MfnLYIFph2C#JAa`FAf4(DM{1n-_n`4Qv=ugPre|3(lyfz6$p2)mM^ z)4QN;0@n8|RyYEhujcP?<8I?t<3?keF~le_p2Js-GrGdx1m-HQGXFY+B+D78H8DWe zk2@m&JIT|QAWK=r(2%hBHZ7;J+q{qSA#?>m$wW`eT6w==kx>h?C~Rj=VO z-h;z$jQh~e_h@o8R+PoavY%0MY>}_5PQRvXQJR%)SjP`|xW|aw(~+{A?~s2xY#1Zt z@Qkd*%dWW{^dt|w<>?&^{zPF=lCR%#~tUm}g1esC?Z3|YVQX{8?WRJF-$cXTitdt(7r?=7L z_lcbkuV6`!88;hkja*|C==p+QSAfVz5*F||BQ9X{CHS9Jc&qQRio1-xSd29k2h#(l zxu$AU7xPr}9CN)nNo}uARu`y8)LPYH$+2{{w6!Rfo$5mMRdte@sV+57G*_8snWmUZ zO*Yd(WbGnzo?&z`j_F7AgZfVWKfPLS(!;vT2^fq#g0`k}K#vo%L{%0aH zl^Mux40heda2r0_d-aIk(&%ioG_nkjep#=gr~mYN-DK$cKl;1+Q~CtGkKSKT(WCK8 z@iSaUKTogW{X6|_uBY@@ z`DL`;kE^#{s*lk}>C^PqdL+In{(5|B{MvY%__^30v9+-cu_duXv9qzf_}%dV`f#%4 z)ut46nB^|(JX^WL;(W=qGWqwEk*STTJ>9RmA8`Md8c8{x{H1H8^Ry%1u32`QKULn* zD`QimZtatB=TN8M?7($_n*%cfa{?WLO+kO?+Q?(tQ_%sjoOq@_-uOS|AybC=74u26 zOYN)nQOngl)nj(>X|g&9)6E~6pEZv#H=7ok#+XjvKbIL1{eSv~c<=a{m@D>nv{m$^_P%z5maR2K z>{?6he(eYCn3fxTB5FiGh*{#_$GhvFpvz9^_+`9Q4%tx;tf>>(UL$Dt>kIX^`g`%T z_y@5I(YvC5Yb~^ak&WTI!ujFO;Yr~I;R5;{5N(eApg*CEGFvQ-);z}-&VecO+!NAI zW?ag=lyx%u#hjTrpJd;X^;?EHy~O=ba&yvl+bVUp(nr4(eIYU=)FaTzx7lO!Tx#|= zXL_c4QoOxpUVQPrz@)KbeGmT8u;me!Wd>PU5^`I4!d=|M8Jm|m_w5dSK+Cz=tx zU%L?57}+1$5eY;FYVT?d+NaTbWB0{p=noqYDI-ngW>r0;eq$MLJz>4t_Nwg{+tao* z+Z)zRmH?L3)m&h5DM$51@mFJaM*C{s@cEE4^ja`ExIXY*;N!shKz8u8V7rhvv?4q| z@~Ji>Ix{viey`q(=yOQ<##Cy4$-LgY&wR|h#Qe5-kok901Kxd!5yT!!;uB&$qrvf#>7q4$X~C=H{=Ow|nmJ zHTka!{uC;XY|;LT&DQ%U51Q^Yw^A$A7c3!5nYGLsvdlr6WtJ1_yXuXqRXqagQ`BDS zb?PhXuj(!}%`zHIJa4(#GT7p@tWh6V74=KA$=t@&OBrm8(qE5ni3OrlqjlN~+CVKu zi$+>$)3t#1S@ix`k9Z_rsZ&i;RQe34i?P^x>r~rEwy$inZ2fH~tpBsVWSOS6H6J3w z?V=xueH*=1%ZLO+YN%`Q<-jHXhyEM=H~8Q7`~05;#s|BEI)r;irfa*Ssqv2bI5hd5 zX_&d%e3v>;U8{bB{Vg`PH+MIUQ2HCWx@Phz&^gY$Nca4AND`vf7gG>|7u`J@Vd~b@YRtS+LEX{ezX3Y@r5$p)MT1rt~2*m-%@W< zFPVFo+nYKnb^0^$%4m0OU3h4yXYi@Odw$*L@~@_kl+ZWfN41RD6urpQ+j5J2iSzlC zGwzg(FKcFbdPh6bCPpzQlp~)e_vv& zw#-nQP46n76R(HpKgQGJOJYC8?D4PSee^B*G~)9mWeMD*gYp^bz3Ts|AK>HHn0J|e z1c$%syW{6$C!zezQt*brIiy_dclq!1@Ap3u zC<&&9(m?b=ZB%T2{G!eY8dJ78V&1IgS?;&oWT`^ZPnaJzwNyUQbK^fmM}y{zp^Bg{ zFfDL@pfTVHmW3aR%+n^t>_$iPb=DbUcM=h^f23Wqv!*n-)Vp>Lh^-kRuZy%c$y{z4)ZH)Aa zTnP7%Y=}Ig-5Ko^ON+PBUo$MG>&-8#Pg-VLU$otAudvtIXWK{E8*KO5-my-wY%*VG zsyF7~_0waYYL3V+p$~(L0|Ea-{zl(kpT~D6Df*(o z-?*OmJz1Tp_Evv3M@=nFW@U>$Gk#61Mbs1dFFZH2FL)}bhK_`iA}+08bX6?LIBnW) z*=e8V{43c=ot5s)s?9FVotL{NcXjR~xu4~1&+3s8Pkqca%yEac-2A@2Nc$z&$2Y6_ zyGEs9Y+bk7PinT-6xLm;KhwCf`IPsuz{laD*tf=Pa}Vp=_QOePa&^kpZo^%ecEWv^ z`_I(1DMOM+I3+dbTgUE z0rO%aXnV_FYB#mQ{G|Ca(;Lb-!>(K7ZDTW|=e4og>d0e}A0yrI855(9SR|IOzhH!v zw&ro_^_DT#k+vNBU-lG7gZ)?gz4kM<&bCbJDs{BEMtQ@~;*Z33Yh{t2LvIHc;+IGI zkNZ~nPWt-$H~XgqS_X4N{fUGEY)hx1(`K{2Sv_^J~_79dYO6luhoBGVaPAkb7I+7X0ys zyqEJ{%$=3JDr1QISJ&;1Ypoki|HZ$LycuwMA8z`zp|&oqc60T}>UGtPHSgC=YG~Co z&NIPp4=2SqVP&3;k6-V+E@h_MNP8#!lk{8D!)du`TT)}m8=Vh0y4bo{x|&BQZrv4o zMH?0QDLf&3Rrsm!FW7vMc5n0)kz%&7O*vSh0rGw=~^+|JQ(;r4( zy*5@!Hnt~nF??M(JKQh)D>&X2`9S+RdN_8ye#p4a^p$y!>SK91Zd+|{6gOQQp1;J+n&-&l?MZNEM@Aa9<*fU`j0xFy=dREDIDbR_-2B_}@5t+zW6ylZJVP{F|=Cm^z=B(*i zA7wq7bu9Dij1y_|Q^zEmoXhR^SdW-{DnG;@j{Xz*A$%@$UFdl5)8G+ul2zdoku#bp z_G~=g_)}>#T{M5BIxW{*-sKu-d0(Ax{?YWPa=US>J{T`LAo{-6MeC_e!dHB&wT{+A zyT*Ug4Wpy!Zu6JwF^kidZC_=N+FLlTcC>S>wqLM)VYOMFHy=}48sp-RM?Z`#3#AAD z_CMj9?w#u?^@N%WJoIIC3zT6nrPbg$i6_4FnE{1=s_l@}_vU-<3P`KmUx z{`x)5BLd^J|MVH=Lfg8ecT?=?dotH&@6Wj^_mA8ixew!eEVQa zYt!|5O|)FA3GWH53swYP2&@at3ceP4F+48POZy<26aPo|8q1Vaa*m^>o6S#~Z#N%j zwm4QcQc9%nvrN6a71iQO1`F7|G0b!=ojl}uua(%IxRpEuX4LCbOLUAAXzFWMfl zjk4K6?DLkk>R!|5${ohxxI0#$jSU|SJ{%b4Z{&=5aKX|gd2fW99W5MmJ z@EzKkXrK6MJy&_n^triMy@A=%GW9;S4Zim=(=g=&)>mXNSsruxpN;9tVDpcbe{7{m zgItZtPrG|$?9BQzr#^Q=-fMZ*{HO9C&TE@9AalI?5tr&%YuTg(qisW1dtHs0b%Uz6 zTpM+lye-M6$YeM+Z@R9JF zk%u&omKV*8YFa^b8d&TZ8yWjN);qp0KAsGtgnw_cKpAGb#XL%FL4MuRy5IVSt*!kN z`)Oj>F8fmZefFPi-&yarl&TR^fNFGsJ~8&amLEA4S`+*rFvWk5Z?Sih_bP8|uj1Y9 z$@QN127E077Xk}{kB43gpN({jPK!;57wTK}r>NQANp;~elKVgE9bYiFtI(g;3#lXq zl^*2X|EPN{KHKS}=aSFD`Ob_Vvwq2$m3Jb)ZNXppPvkw8yEjM4u1_ydtxLMsdW|U< z@2@om{`RbGc)E5<_5RE6UwY%>p$k1PK5^+x)oZn>4WBoqab$Br_}|!aWusbd`=4W) z^X=qoQg3ko;r`Nni+iZs<$fpinv^$P6-jS7zO?&n_t?I+ers8wPEbo!r+Ui#r+JvU z-t?W%kN z8qFG(_L=kMHOZ|wzW||RJ!P~Xno{x=!L*rKDRg2 z+`jpPrt+qorvEi%H`_Q6bJlyz=L)SV~;{~Igu|>D* zZR3x|{)pZjwMPHY7HgZd^4N~}+eWqNq&mymY;SbtyEkUUvW>j&3f2|QElw+GC>dXJ zPjRfp%KYzhqnR_@1D#=YkUln&5;)fUNPVB`hc2F~Y+bqL+~srQE3ZCZc5y(}in_+8 zSA338KW%rs%sj}pDCu8ULCWCNtK37=zDQf@9+LWQatl{a=M72y9dFt;SngJzGUu7c zF(>G49&X-ho~ur_6kG4G4z)V0K1m@QqQ;>(zc}c$Z%&Ara$I>Jmo%D`=mX# za?8`EkMvd1PLZ?0!~SgFO3zQtY0WE}W;K1$w7qF`bGW(D)o`@{NcRm;WDZa^g-F(_I(f(`F ztmLoLe#=^$yRu+=;WNcsTP`S>Q(9e8SA4R?%XxogkIvY^0hu;xcKnr4k#AK~LtT3H zh>J%n`&WKjdH4BGF1&xS?DG57U)2B4Gdl2AxKngX+-{m>`Pc4CGMxXqZcBbK<>SkGA#U0aB|=W|L6W?{%U`mSBJpfz~11iq1_=T zUd6y{+lE-}* zeMDbu`qJ{NeUI}}%13DjGP~t1X<;qyRWiTybStfu(dwaAYRTln?7YTITiR0B0o!Ay z_oCMXM|eJJSXUdY>VC;|Ve0w57gk$Nx5A8Xv^JrnGut%xnvSyM2lTVu90 z_E+sgxGvZiJH{rJB~5ks?HwI4#}H?hYo_aI*M8U6$x~APNI8?--PI{+vc1|m&Z3!a zHrmG;!sWqS|0M7Knin)a(9m4}Xv49_C!4SLru**>c8Git8>+mbK4gnHu64bV@}s*b zeRcYv^e@sH-96mBQr}CS>a;uNS^rXxnK(%izfA3JW4I~wNa%d9BshaCKbQL5iGUi+ z2;Lq%6nrcc3-t@!q)EBiPFa->&e0zBAHqs@=oMzxG20R^kC@k;HJPTf3$&8QlDj0|kld0jXYrH) z$zjJ0*5T$2#?<)7nl&`ccWv{g#u1GJ8%rCljW;(sny>TS9O@apQ_nV?SF`Q^=e#E+ zl-kK1Nc}5yYHB#8DcR|IhIzwPmZ*83X_4|j<2rqQ{B-X{1SMaI(rv_RDr-XKf>mv&^S9D@@c63Yh`{-fql}JVC zWZ;bN2JdgptvCYvT7#vrvhjJ({ehE_h~ClCkaQvST;`elXIge>eW~3W9lLez(0N^_ zHXT20`$p-;f>W7)CwH~+?@Mcs__sBd)%L3T@zPJ1zP;3VX;9Vdnx7j6dqcqowR>WD zI{zOCQ!n*LHA8*P)LMCg3jeQ0x^b<(BwnuDlmq6a)}Z}F(pgvk)IDj{87nfq87t!HFvVl$@Iqg1^Vo)xx=A%D)z<2MHGkCEn}XgG!Pm9T`m^dc_TjFJDU;pZ z)BZ^-On)Hl@6?x*k0iawoc3Y0k7=m!fc|y7L}%?;SVJZa4Xni?B3n#MMF_2&6Yf@(NR^F;T=w;TDUk4*zjud!d_e*Nay zEeEmaBzx(Ef2kB2+);pg`dnP-ce_!$Z(*CVav^mjc zckAX>|CLNDx|}y5vpA*9o^5_V{$u21;3IFB<~B|LHEn7B)AOPCAK$fs;h~zypxA); z^!P3D_o>5Aq~}#_3Wev)_E$z|d zsQq4bv0kpd9q>1wtlwT!TD`IA&gywJAJz?Me8JNp;EJ4yWh;xxba&ZbPul5x-1UO% zOXswtNseaQTOe|xb?}4GEmS|BiToBx)jrcMl3D%8Ts23li2M|3OO-UrJn`}1 zjzHVMgZ?*s`@N;!XFLyhvc1oFAN4H^sF7K*zm+uGr%re3w2ZYmRrzfSV@2nShZH|p z)U(iD(3JB-#;+-39Uik=pBi~O(Co=<`mAA5Lsrw5%~yFp_MZznB2PtU=#MGAOoiri z=IhiC)$7%7%%e=D%6nAKJ19pLi@CtE(Dp)7QSz^;Z>Eh;e=OaZ9(I?eKHwVSsIxq1 za_V!m9-+OyJbH9IPXu9T!?b7d4Doq`!flRT?pnBcXQfPUU zs%i_%dRB9`tLc{4ELQ6<>u1)W^;+`m_O|=111&Y?H%;S|Qlq1u5_iRZjD8dSIr?jK zeYAmW{o$A=Ha-4Ld~)0o-xPZ~mK=K|I#)XwDTy2juLv&>&mm*IC;U=)EARI3qoK*c zxc`99DG$n(4BYwt$?g-|g1z4EDbsPjN-L+1N=*@eFpk1z3++)@%OwiNx7KOy^! zyVyC+!Wu$2&0oobyTv=`3c2PyYLE4%iRPo` z`_(rr4x3>wauy}Op6X2Ro|%*VKu%3gB)dAZJbhit%A|?b6w~h5f$;ReI`5X|vZnF) zwx=8KZd%to)B6_veiwdEdoETVU#fR7o?;dADb~+-u_k|9v6>%O^R0Vq6CFP#ZE*H? zjd5LYx}6n{!S-*hXUL@;N~u0P_Nz8I@lJ-B`aWwYg{&aF9qArk5u6g}>i^WY(05;; zGIUS0$~bDVJ1bL9XZFl{x5bX4dBwjKR~B7e=*!=bvnk_bO3ZPGWw+5iY6^Yh+uPi> z>AuFLjh{D7Z~oq+`o{W)5b=)&#s{|eZ}rdgcL_WaY#)9=yF0#BdE4@=qigauw=;8e zPKW%PT6|i#uF%_}u;9VmjI0`WC)X6)@1~FA*K19o8-v3GK7Y4B1##q3h{*M?%jkAtrVeZhsH=fk%}zS4%puGZgCT!}d8z*{+o2VaeaR`Z)VJ&R8d? zd8TsX-gxWSHPJ57+oFF&?}_QL592k=U_N1WuBHAimHpqTHBXL|hiijBL-n2hvHoYF zdsuKx=!NjtkL?L}>z=81HTObm|-l?UHrZDd-=sjZ8TH=nnybG<_}ZIPc+c%f)) z@$#aN3o8nK%3GeDld&wNJSoT8&h%k?gw{6nsz1Xw#~b(hd?^8M;G^Kc(CX0B)a+OI z=Xsx{M!BZ(NaHU}MV>`IZ?IN-$vCPWwm;;Wow_9bo~#dZR^?{roy%>^xi$OA%xK!F zl&783?5iyIns>7rYBJtqh4xqd8-1ugIi4Mxqxr(OglYr-`v3Et^uFy~?7iMM+5bYI zFtjPWLW{>H8M95_sxRQ{$2giCLC1Ky4u@{A%(E@D zzem3IpmU3BVRB(gx0L3T=G0a0J!!@1pQUBE&!o&rp6^`jNV1Jm|5EfT~u)isD_dt796rabNVq=;!#Lrsy}Z?eUk5>87apIm=hpS+>XR zPR9txeEU3GAM4BNE~55}`s~;rS|6-vZzviXLRQ;4ax8MUc37*`2C$~~aHuj+>Tl;A z+x&i`+K^Tsu6woqjfR@WG|vhDACVi3f2<2!N7D{x9m~7E@M6((#jg~#Y0*D#QFdO| z){K+szo&iY-j$l1`qBSu;QWK4D8o3uw|D#NZjXcGIKaUl7+?jOju592Lu$=H3}zuQ zW0;0x1M)}WFd&&w8DmZc#)w%-P2w=9#FztP2p2O@$^|3UNJ=@FHO6spKX&)__q*5g z@BXoWyz}n+JkRIzeb>GXNa;Ko!at5})vqdp;syS{@KeFLCF@dKAh5{!r~K?B4Gy6_G}! zV(x6X;QBjz(Aq`&$SuS$)ZGNkXMB&=>Fw&GydXA3-xiwr*Ewgz6Ly7qf}vn(Xl2+L zndKIQjZ&7{tuMfAQ-?$1fT$u2;ea<|2A`Y2X_p3uGnih3KoW<+X`3zc`ky>8dNFcls_%}}v8y%PNO6{zo($_}Xe zCiS6Oq}z-ntPI~`xj{^jrPOm&H(5b|5jK5m)ap&DUCESNzzQyk9I(TSVi1f~JDx$bQEH{2dDP>j)r4E;C#jvj?|Yes*geB%cr zVq6DjUx6;`c70Ggrtwn894$B)QO#D>X!8dS181}+Ltq!CO#wKeeQ=IyI*YQWdBuytFPqP*>!FKHZm>&`(Q|K7- zpjFTpA^pC7Mc)ffxmcZ%v!u@G2Eoif&viz6BQ;zaUn4X|r$k)NRYtWnMjy7%+)cDl em*^^MzU`2WV4q|?Y$dGHvvD?uIsRYAQOEx+|6kGo literal 0 HcmV?d00001 diff --git a/litellm/litellm_core_utils/audio_utils/utils.py b/litellm/litellm_core_utils/audio_utils/utils.py index ab19dac9cc..8018fe1153 100644 --- a/litellm/litellm_core_utils/audio_utils/utils.py +++ b/litellm/litellm_core_utils/audio_utils/utils.py @@ -2,6 +2,8 @@ Utils used for litellm.transcription() and litellm.atranscription() """ +import os + from litellm.types.utils import FileTypes @@ -21,3 +23,14 @@ def get_audio_file_name(file_obj: FileTypes) -> str: return str(file_obj) else: return repr(file_obj) + + +def get_audio_file_for_health_check() -> FileTypes: + """ + Get an audio file for health check + + Returns the content of `audio_health_check.wav` in the same directory as this file + """ + pwd = os.path.dirname(os.path.realpath(__file__)) + file_path = os.path.join(pwd, "audio_health_check.wav") + return open(file_path, "rb") diff --git a/litellm/litellm_core_utils/health_check_utils.py b/litellm/litellm_core_utils/health_check_utils.py new file mode 100644 index 0000000000..ff252855f0 --- /dev/null +++ b/litellm/litellm_core_utils/health_check_utils.py @@ -0,0 +1,28 @@ +""" +Utils used for litellm.ahealth_check() +""" + + +def _filter_model_params(model_params: dict) -> dict: + """Remove 'messages' param from model params.""" + return {k: v for k, v in model_params.items() if k != "messages"} + + +def _create_health_check_response(response_headers: dict) -> dict: + response = {} + + if ( + response_headers.get("x-ratelimit-remaining-requests", None) is not None + ): # not provided for dall-e requests + response["x-ratelimit-remaining-requests"] = response_headers[ + "x-ratelimit-remaining-requests" + ] + + if response_headers.get("x-ratelimit-remaining-tokens", None) is not None: + response["x-ratelimit-remaining-tokens"] = response_headers[ + "x-ratelimit-remaining-tokens" + ] + + if response_headers.get("x-ms-region", None) is not None: + response["x-ms-region"] = response_headers["x-ms-region"] + return response diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py index 72dcd59abf..f771532133 100644 --- a/litellm/llms/azure/azure.py +++ b/litellm/llms/azure/azure.py @@ -1491,132 +1491,3 @@ class AzureChatCompletion(BaseLLM): response["x-ms-region"] = completion.headers["x-ms-region"] return response - - async def ahealth_check( - self, - model: Optional[str], - api_key: Optional[str], - api_base: str, - api_version: Optional[str], - timeout: float, - mode: str, - messages: Optional[list] = None, - input: Optional[list] = None, - prompt: Optional[str] = None, - ) -> dict: - client_session = ( - litellm.aclient_session - or get_async_httpx_client(llm_provider=LlmProviders.AZURE).client - ) # handle dall-e-2 calls - - if "gateway.ai.cloudflare.com" in api_base: - ## build base url - assume api base includes resource name - if not api_base.endswith("/"): - api_base += "/" - api_base += f"{model}" - client = AsyncAzureOpenAI( - base_url=api_base, - api_version=api_version, - api_key=api_key, - timeout=timeout, - http_client=client_session, - ) - model = None - # cloudflare ai gateway, needs model=None - else: - client = AsyncAzureOpenAI( - api_version=api_version, - azure_endpoint=api_base, - api_key=api_key, - timeout=timeout, - http_client=client_session, - ) - - # only run this check if it's not cloudflare ai gateway - if model is None and mode != "image_generation": - raise Exception("model is not set") - - completion = None - - if mode == "completion": - completion = await client.completions.with_raw_response.create( - model=model, # type: ignore - prompt=prompt, # type: ignore - ) - elif mode == "chat": - if messages is None: - raise Exception("messages is not set") - completion = await client.chat.completions.with_raw_response.create( - model=model, # type: ignore - messages=messages, # type: ignore - ) - elif mode == "embedding": - if input is None: - raise Exception("input is not set") - completion = await client.embeddings.with_raw_response.create( - model=model, # type: ignore - input=input, # type: ignore - ) - elif mode == "image_generation": - if prompt is None: - raise Exception("prompt is not set") - completion = await client.images.with_raw_response.generate( - model=model, # type: ignore - prompt=prompt, # type: ignore - ) - elif mode == "audio_transcription": - # Get the current directory of the file being run - pwd = os.path.dirname(os.path.realpath(__file__)) - file_path = os.path.join( - pwd, "../../../tests/gettysburg.wav" - ) # proxy address - audio_file = open(file_path, "rb") - completion = await client.audio.transcriptions.with_raw_response.create( - file=audio_file, - model=model, # type: ignore - prompt=prompt, # type: ignore - ) - elif mode == "audio_speech": - # Get the current directory of the file being run - completion = await client.audio.speech.with_raw_response.create( - model=model, # type: ignore - input=prompt, # type: ignore - voice="alloy", - ) - elif mode == "batch": - completion = await client.batches.with_raw_response.list(limit=1) # type: ignore - elif mode == "realtime": - from litellm.realtime_api.main import _realtime_health_check - - # create a websocket connection - await _realtime_health_check( - model=model or "", - api_key=api_key, - api_base=api_base, - api_version=api_version, - custom_llm_provider="azure", - ) - return {} - else: - raise Exception("mode not set") - response = {} - - if completion is None or not hasattr(completion, "headers"): - raise Exception("invalid completion response") - - if ( - completion.headers.get("x-ratelimit-remaining-requests", None) is not None - ): # not provided for dall-e requests - response["x-ratelimit-remaining-requests"] = completion.headers[ - "x-ratelimit-remaining-requests" - ] - - if completion.headers.get("x-ratelimit-remaining-tokens", None) is not None: - response["x-ratelimit-remaining-tokens"] = completion.headers[ - "x-ratelimit-remaining-tokens" - ] - - if completion.headers.get("x-ms-region", None) is not None: - response["x-ms-region"] = completion.headers["x-ms-region"] - - return response diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index f0045d9aa4..0ee8e3dadd 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -1,5 +1,4 @@ import hashlib -import os import types from typing import ( Any, @@ -1306,105 +1305,6 @@ class OpenAIChatCompletion(BaseLLM): return HttpxBinaryResponseContent(response=response.response) - async def ahealth_check( - self, - model: Optional[str], - api_key: Optional[str], - timeout: float, - mode: str, - messages: Optional[list] = None, - input: Optional[list] = None, - prompt: Optional[str] = None, - organization: Optional[str] = None, - api_base: Optional[str] = None, - ): - client = AsyncOpenAI( - api_key=api_key, - timeout=timeout, - organization=organization, - base_url=api_base, - ) - if model is None and mode != "image_generation": - raise Exception("model is not set") - - completion = None - - if mode == "completion": - completion = await client.completions.with_raw_response.create( - model=model, # type: ignore - prompt=prompt, # type: ignore - ) - elif mode == "chat": - if messages is None: - raise Exception("messages is not set") - completion = await client.chat.completions.with_raw_response.create( - model=model, # type: ignore - messages=messages, # type: ignore - ) - elif mode == "embedding": - if input is None: - raise Exception("input is not set") - completion = await client.embeddings.with_raw_response.create( - model=model, # type: ignore - input=input, # type: ignore - ) - elif mode == "image_generation": - if prompt is None: - raise Exception("prompt is not set") - completion = await client.images.with_raw_response.generate( - model=model, # type: ignore - prompt=prompt, # type: ignore - ) - elif mode == "audio_transcription": - # Get the current directory of the file being run - pwd = os.path.dirname(os.path.realpath(__file__)) - file_path = os.path.join( - pwd, "../../../tests/gettysburg.wav" - ) # proxy address - audio_file = open(file_path, "rb") - completion = await client.audio.transcriptions.with_raw_response.create( - file=audio_file, - model=model, # type: ignore - prompt=prompt, # type: ignore - ) - elif mode == "audio_speech": - # Get the current directory of the file being run - completion = await client.audio.speech.with_raw_response.create( - model=model, # type: ignore - input=prompt, # type: ignore - voice="alloy", - ) - elif mode == "realtime": - from litellm.realtime_api.main import _realtime_health_check - - # create a websocket connection - await _realtime_health_check( - model=model or "", - api_key=api_key, - api_base=api_base or "https://api.openai.com/", - custom_llm_provider="openai", - ) - return {} - else: - raise ValueError("mode not set, passed in mode: " + mode) - response = {} - - if completion is None or not hasattr(completion, "headers"): - raise Exception("invalid completion response") - - if ( - completion.headers.get("x-ratelimit-remaining-requests", None) is not None - ): # not provided for dall-e requests - response["x-ratelimit-remaining-requests"] = completion.headers[ - "x-ratelimit-remaining-requests" - ] - - if completion.headers.get("x-ratelimit-remaining-tokens", None) is not None: - response["x-ratelimit-remaining-tokens"] = completion.headers[ - "x-ratelimit-remaining-tokens" - ] - return response - class OpenAIFilesAPI(BaseLLM): """ diff --git a/litellm/main.py b/litellm/main.py index 6b0f7c026f..52a886b69c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -51,6 +51,11 @@ from litellm import ( # type: ignore get_optional_params, ) from litellm.integrations.custom_logger import CustomLogger +from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_for_health_check +from litellm.litellm_core_utils.health_check_utils import ( + _create_health_check_response, + _filter_model_params, +) from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.litellm_core_utils.mock_functions import ( mock_embedding, @@ -60,6 +65,7 @@ from litellm.litellm_core_utils.prompt_templates.common_utils import ( get_content_from_model_response, ) from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.realtime_api.main import _realtime_health_check from litellm.secret_managers.main import get_secret_str from litellm.utils import ( CustomStreamWrapper, @@ -5117,65 +5123,60 @@ def speech( ##### Health Endpoints ####################### -async def ahealth_check_chat_models( +async def ahealth_check_wildcard_models( model: str, custom_llm_provider: str, model_params: dict ) -> dict: - if "*" in model: - from litellm.litellm_core_utils.llm_request_utils import ( - pick_cheapest_chat_model_from_llm_provider, - ) - - # this is a wildcard model, we need to pick a random model from the provider - cheapest_model = pick_cheapest_chat_model_from_llm_provider( - custom_llm_provider=custom_llm_provider - ) - fallback_models: Optional[List] = None - if custom_llm_provider in litellm.models_by_provider: - models = litellm.models_by_provider[custom_llm_provider] - random.shuffle(models) # Shuffle the models list in place - fallback_models = models[ - :2 - ] # Pick the first 2 models from the shuffled list - model_params["model"] = cheapest_model - model_params["fallbacks"] = fallback_models - model_params["max_tokens"] = 1 - await acompletion(**model_params) - response: dict = {} # args like remaining ratelimit etc. - else: # default to completion calls - model_params["max_tokens"] = 1 - await acompletion(**model_params) - response = {} # args like remaining ratelimit etc. + from litellm.litellm_core_utils.llm_request_utils import ( + pick_cheapest_chat_model_from_llm_provider, + ) + # this is a wildcard model, we need to pick a random model from the provider + cheapest_model = pick_cheapest_chat_model_from_llm_provider( + custom_llm_provider=custom_llm_provider + ) + fallback_models: Optional[List] = None + if custom_llm_provider in litellm.models_by_provider: + models = litellm.models_by_provider[custom_llm_provider] + random.shuffle(models) # Shuffle the models list in place + fallback_models = models[:2] # Pick the first 2 models from the shuffled list + model_params["model"] = cheapest_model + model_params["fallbacks"] = fallback_models + model_params["max_tokens"] = 1 + await acompletion(**model_params) + response: dict = {} # args like remaining ratelimit etc. return response -async def ahealth_check( # noqa: PLR0915 +async def ahealth_check( model_params: dict, mode: Optional[ Literal[ + "chat", "completion", "embedding", + "audio_speech", + "audio_transcription", "image_generation", - "chat", "batch", "rerank", "realtime", ] - ] = None, + ] = "chat", prompt: Optional[str] = None, input: Optional[List] = None, - default_timeout: float = 6000, ): """ Support health checks for different providers. Return remaining rate limit, etc. - For azure/openai -> completion.with_raw_response - For rest -> litellm.acompletion() + Returns: + { + "x-ratelimit-remaining-requests": int, + "x-ratelimit-remaining-tokens": int, + "x-ms-region": str, + } """ - passed_in_mode: Optional[str] = None try: model: Optional[str] = model_params.get("model", None) - if model is None: raise Exception("model not set") @@ -5183,122 +5184,73 @@ async def ahealth_check( # noqa: PLR0915 mode = litellm.model_cost[model].get("mode") model, custom_llm_provider, _, _ = get_llm_provider(model=model) - if model in litellm.model_cost and mode is None: mode = litellm.model_cost[model].get("mode") - mode = mode - passed_in_mode = mode - if mode is None: - mode = "chat" # default to chat completion calls - - if custom_llm_provider == "azure": - api_key = ( - model_params.get("api_key") - or get_secret_str("AZURE_API_KEY") - or get_secret_str("AZURE_OPENAI_API_KEY") - ) - - api_base: Optional[str] = ( - model_params.get("api_base") - or get_secret_str("AZURE_API_BASE") - or get_secret_str("AZURE_OPENAI_API_BASE") - ) - - if api_base is None: - raise ValueError( - "Azure API Base cannot be None. Set via 'AZURE_API_BASE' in env var or `.completion(..., api_base=..)`" - ) - - api_version = ( - model_params.get("api_version") - or get_secret_str("AZURE_API_VERSION") - or get_secret_str("AZURE_OPENAI_API_VERSION") - ) - - timeout = ( - model_params.get("timeout") - or litellm.request_timeout - or default_timeout - ) - - response = await azure_chat_completions.ahealth_check( + model_params["cache"] = { + "no-cache": True + } # don't used cached responses for making health check calls + if "*" in model: + return await ahealth_check_wildcard_models( model=model, - messages=model_params.get( - "messages", None - ), # Replace with your actual messages list - api_key=api_key, - api_base=api_base, - api_version=api_version, - timeout=timeout, - mode=mode, + custom_llm_provider=custom_llm_provider, + model_params=model_params, + ) + # Map modes to their corresponding health check calls + mode_handlers = { + "chat": lambda: litellm.acompletion(**model_params), + "completion": lambda: litellm.atext_completion( + **_filter_model_params(model_params), + prompt=prompt or "test", + ), + "embedding": lambda: litellm.aembedding( + **_filter_model_params(model_params), + input=input or ["test"], + ), + "audio_speech": lambda: litellm.aspeech( + **_filter_model_params(model_params), + input=prompt or "test", + voice="alloy", + ), + "audio_transcription": lambda: litellm.atranscription( + **_filter_model_params(model_params), + file=get_audio_file_for_health_check(), + ), + "image_generation": lambda: litellm.aimage_generation( + **_filter_model_params(model_params), prompt=prompt, - input=input, - ) - elif ( - custom_llm_provider == "openai" - or custom_llm_provider == "text-completion-openai" - ): - api_key = model_params.get("api_key") or get_secret_str("OPENAI_API_KEY") - organization = model_params.get("organization") - - timeout = ( - model_params.get("timeout") - or litellm.request_timeout - or default_timeout - ) - - api_base = model_params.get("api_base") or get_secret_str("OPENAI_API_BASE") - - if custom_llm_provider == "text-completion-openai": - mode = "completion" - - response = await openai_chat_completions.ahealth_check( + ), + "rerank": lambda: litellm.arerank( + **_filter_model_params(model_params), + query=prompt or "", + documents=["my sample text"], + ), + "realtime": lambda: _realtime_health_check( model=model, - messages=model_params.get( - "messages", None - ), # Replace with your actual messages list - api_key=api_key, - api_base=api_base, - timeout=timeout, - mode=mode, - prompt=prompt, - input=input, - organization=organization, + custom_llm_provider=custom_llm_provider, + api_base=model_params.get("api_base", None), + api_key=model_params.get("api_key", None), + api_version=model_params.get("api_version", None), + ), + } + + if mode in mode_handlers: + _response = await mode_handlers[mode]() + # Only process headers for chat mode + _response_headers: dict = ( + getattr(_response, "_hidden_params", {}).get("headers", {}) or {} ) + return _create_health_check_response(_response_headers) else: - model_params["cache"] = { - "no-cache": True - } # don't used cached responses for making health check calls - if mode == "embedding": - model_params.pop("messages", None) - model_params["input"] = input - await litellm.aembedding(**model_params) - response = {} - elif mode == "image_generation": - model_params.pop("messages", None) - model_params["prompt"] = prompt - await litellm.aimage_generation(**model_params) - response = {} - elif mode == "rerank": - model_params.pop("messages", None) - model_params["query"] = prompt - model_params["documents"] = ["my sample text"] - await litellm.arerank(**model_params) - response = {} - else: - response = await ahealth_check_chat_models( - model=model, - custom_llm_provider=custom_llm_provider, - model_params=model_params, - ) - return response + raise Exception( + f"Mode {mode} not supported. See modes here: https://docs.litellm.ai/docs/proxy/health" + ) except Exception as e: stack_trace = traceback.format_exc() if isinstance(stack_trace, str): stack_trace = stack_trace[:1000] - if passed_in_mode is None: + if mode is None: return { "error": f"error:{str(e)}. Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models \nstacktrace: {stack_trace}" } diff --git a/litellm/realtime_api/main.py b/litellm/realtime_api/main.py index 7afd526c45..ac39a68c60 100644 --- a/litellm/realtime_api/main.py +++ b/litellm/realtime_api/main.py @@ -118,9 +118,9 @@ async def _arealtime( async def _realtime_health_check( model: str, - api_base: str, custom_llm_provider: str, api_key: Optional[str], + api_base: Optional[str] = None, api_version: Optional[str] = None, ): """ @@ -143,12 +143,14 @@ async def _realtime_health_check( url: Optional[str] = None if custom_llm_provider == "azure": url = azure_realtime._construct_url( - api_base=api_base, + api_base=api_base or "", model=model, api_version=api_version or "2024-10-01-preview", ) elif custom_llm_provider == "openai": - url = openai_realtime._construct_url(api_base=api_base, model=model) + url = openai_realtime._construct_url( + api_base=api_base or "https://api.openai.com/", model=model + ) async with websockets.connect( # type: ignore url, extra_headers={ diff --git a/tests/local_testing/test_health_check.py b/tests/local_testing/test_health_check.py index 3535a4fe94..0d43c4cc05 100644 --- a/tests/local_testing/test_health_check.py +++ b/tests/local_testing/test_health_check.py @@ -6,6 +6,7 @@ import sys import traceback import pytest +from unittest.mock import AsyncMock, patch sys.path.insert( 0, os.path.abspath("../..") @@ -35,6 +36,19 @@ async def test_azure_health_check(): # asyncio.run(test_azure_health_check()) +@pytest.mark.asyncio +async def test_text_completion_health_check(): + response = await litellm.ahealth_check( + model_params={"model": "gpt-3.5-turbo-instruct"}, + mode="completion", + prompt="What's the weather in SF?", + ) + print(f"response: {response}") + + assert "x-ratelimit-remaining-tokens" in response + return response + + @pytest.mark.asyncio async def test_azure_embedding_health_check(): response = await litellm.ahealth_check( @@ -128,7 +142,6 @@ async def test_groq_health_check(): mode=None, prompt="What's 1 + 1?", input=["test from litellm"], - default_timeout=6000, ) print(f"response: {response}") assert response == {} @@ -141,8 +154,6 @@ async def test_cohere_rerank_health_check(): response = await litellm.ahealth_check( model_params={ "model": "cohere/rerank-english-v3.0", - "query": "Hey, how's it going", - "documents": ["my sample text"], "api_key": os.getenv("COHERE_API_KEY"), }, mode="rerank", @@ -154,15 +165,52 @@ async def test_cohere_rerank_health_check(): print(response) +@pytest.mark.asyncio +async def test_audio_speech_health_check(): + response = await litellm.ahealth_check( + model_params={ + "model": "openai/tts-1", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + mode="audio_speech", + prompt="Hey", + ) + + assert "error" not in response + + print(response) + + +@pytest.mark.asyncio +async def test_audio_transcription_health_check(): + litellm.set_verbose = True + response = await litellm.ahealth_check( + model_params={ + "model": "openai/whisper-1", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + mode="audio_transcription", + ) + + assert "error" not in response + + print(response) + + @pytest.mark.asyncio @pytest.mark.parametrize( "model", ["azure/gpt-4o-realtime-preview", "openai/gpt-4o-realtime-preview"] ) -async def test_realtime_health_check(model): +async def test_async_realtime_health_check(model, mocker): """ Test Health Check with Valid models passes """ + mock_websocket = AsyncMock() + mock_connect = AsyncMock().__aenter__.return_value = mock_websocket + mocker.patch("websockets.connect", return_value=mock_connect) + + litellm.set_verbose = True model_params = { "model": model, }