From bc767cc42a9c1d5205751bf6c43fef01155eee66 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 12 Aug 2023 16:34:32 -0700
Subject: [PATCH] adding anthropic llm class - handles sync + stream

---
 .DS_Store                                    | Bin 8196 -> 6148 bytes
 litellm/__init__.py                          |   2 +-
 litellm/__pycache__/__init__.cpython-311.pyc | Bin 3847 -> 3888 bytes
 litellm/__pycache__/main.cpython-311.pyc     | Bin 22254 -> 21031 bytes
 litellm/__pycache__/timeout.cpython-311.pyc  | Bin 5193 -> 5193 bytes
 litellm/__pycache__/utils.cpython-311.pyc    | Bin 38107 -> 39112 bytes
 litellm/llms/__init__.py                     |   1 +
 litellm/llms/anthropic.py                    |  97 +++++++++++
 litellm/main.py                              | 104 ++++-------
 litellm/tests/test_logging.py                |  39 +++--
 litellm/tests/test_streaming.py              |  28 +++
 litellm/utils.py                             |  47 ++++-
 old-docs/advanced.md                         |  29 ----
 old-docs/berrispend_integration.md           |  34 ----
 old-docs/client_integrations.md              |  12 --
 old-docs/contact.md                          |   6 -
 old-docs/contributing.md                     |  34 ----
 old-docs/helicone_integration.md             |  55 ------
 old-docs/index.md                            |  43 -----
 old-docs/input.md                            | 172 -------------------
 old-docs/output.md                           |  12 --
 old-docs/secret.md                           |  33 ----
 old-docs/stream.md                           |  33 ----
 old-docs/supported.md                        |  72 --------
 old-docs/supported_embedding.md              |   5 -
 old-docs/token_usage.md                      |  45 -----
 old-docs/troubleshoot.md                     |   9 -
 27 files changed, 219 insertions(+), 693 deletions(-)
 create mode 100644 litellm/llms/__init__.py
 create mode 100644 litellm/llms/anthropic.py
 create mode 100644 litellm/tests/test_streaming.py
 delete mode 100644 old-docs/advanced.md
 delete mode 100644 old-docs/berrispend_integration.md
 delete mode 100644 old-docs/client_integrations.md
 delete mode 100644 old-docs/contact.md
 delete mode 100644 old-docs/contributing.md
 delete mode 100644 old-docs/helicone_integration.md
 delete mode 100644 old-docs/index.md
 delete mode 100644 old-docs/input.md
 delete mode 100644 old-docs/output.md
 delete mode 100644 old-docs/secret.md
 delete mode 100644 old-docs/stream.md
 delete mode 100644 old-docs/supported.md
 delete mode 100644 old-docs/supported_embedding.md
 delete mode 100644 old-docs/token_usage.md
 delete mode 100644 old-docs/troubleshoot.md

diff --git a/.DS_Store b/.DS_Store
index 5819da711ed4e7e5053d6fb809bdcddfa3f56a49..d1cbcc87c8e898f83b116798f666d20bbac51e8e 100644
GIT binary patch
delta 186
zcmZp1XfcprU|?W$DortDU=RQ@Ie-{Mvv5r;6q~50C<+o_1dI7G1T(mL=Hw?Q<!mfm
z&dkWMIZr5<k&}ZVpCN}Kg+X_6lAt;guS@W}EL;vKpU;rYP+S&Vl$VpAmku&?a*BxQ
w#xg&~#q1m$g3Le>AQ0dN60RWYHx_<pp3E=f333br6U1R4w=!&w=b6I{09M{1jQ{`u

delta 535
zcmaJ;O-sW-5S@?@mnPVXQhO0A2zt;EZM|E;Lr)6+03k`y+StT=C_U9fZyu!V#e;b9
z2Pov?pAdhJM|~40Vgz@Y-PzsuX5P+BoXLKJh?IlO-yvdGz<95j)2sT_UR$j3JI+LY
zHwZm92;F=^)U~`R?$zfxA*Q$5m_a(Zlu@68A0Wqpbzt^aG2n3yk3!4=7{xvdj5o%=
z!2ZR%H%Y`ofO~VJpf6wbNVJ^Yt!B$<%T1%RT1s$z5)XrLcm@LgLYh3+Hxx}i=pR@k
zV>z#C$I7@um(b)xlSc{JG{QOftxZ`eX)`GTzDFs=z}xsevj-*CU5Pah)(92@eF3%y
zzP*km{F|~U-_xvoa$@YYEg*Z4n9?>vu$LimN=Yd#i=8p^cdMgT8YZp|xxHxOrz)#U
RU3p{)fvRxlDL=Iy)o(I`g((04

diff --git a/litellm/__init__.py b/litellm/__init__.py
index 93d74ab91..4d118bd9a 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -113,7 +113,7 @@ open_ai_embedding_models = [
 ]
 
 from .timeout import timeout
-from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, load_test_model
+from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, load_test_model, get_litellm_params
 from .main import *  # Import all the symbols from main.py
 from .integrations import *
 from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
\ No newline at end of file
diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index cd671c282785c606b05934d159f31165a41dc887..84bb43b8c2b03a60ef8cc3046cc1ec803c8ed0cb 100644
GIT binary patch
delta 337
zcmZpd+aSlgoR^o20SKzuZ>022<dtOP*r<M$(I8hpN}rJ-MI}W&#~=hk8v<#D6pb9C
zT;nL?T$3o1T+=AiT(c;%T=OXN%^FOMY>Zr!wK%#(ZV9EQmc-{|mZaw7<i-~y7A58u
zPu|Lr!Dz8rj?;mOsYrKnG`E_KKG4=80}x>dB8)(UF^G@>5fUIm3`qRqu*uC&Da}c>
iD~bSe8G*RiZ}VR6TBiC!rVl`Y0V8>kt)IM;&kq2R9#SU&

delta 296
zcmdlW*DlAqoR^o20SFwgUrVW+$ScXnvQhmiqYfiOib{%lj(!M)HUQELDH=J3xkgb&
zxyDh(xh7F2xu#L3xn@yjn{}8N*%&z|>v42XKFg88XujEu(}8JnHMg>j9?;|>eGp*)
zA`C%<5r~ih5fUIm3`qRqu*uC&Da}c>D+&j48G*RiY4a`aTBiC~rVl`Y0V5|r;`0Ll
DUx`Fg

diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 1884f5ce9e4485f93a3161c20d641cef69ed81f8..c0ed4c412b3ab41afdd4de74d27497ccf3fdaf60 100644
GIT binary patch
delta 8764
zcmc&(Yj7Lab>3YN06`EW_yF-B!1oJ$i+b=OlA=Ue4_Z%BbjF<65XcLXpn(N-v6LnG
zu0XkJV%cdT&rPOz$QdRRGuDV6J5hgl(sZn5+!>E&(g9Uw%3$KD+m7pY+O*WfnsL(B
z({mSCfD2Kw<hIF@ID7ZrbI(0{zH`qxyZi62^FRCn-}pm|C69yS>$Y_i8{@ctWEbjD
zm}>lH<K@PeCc~u@r96i?WC+>9WfNs#`-DC0m~e!h6He$iqTG-xTs~2*@bDq`gd2EG
zAy2qsqJrlHF5sT4R3)RlWuD`1z%O}BRKd8B?yYgeV7eLQ!&D0@2=b`#nr?;<^8OlR
zz06J21`2_(7!_%!&v2+16kvZPhzE{3RI23wd0dcj@zWN@gNoqC&OEYRGorG~x@FMh
z2FKA?`7&cYvcqBx^k@8DOCxeYy9t#8^-`<Q!W(G6u%|RpJQI+7p`a89g~GmhKlX>C
z^t-}g+rO~8^74uNy`nUWBlE$jBS((X=Y-mu9|_;!_4P37p_g(Zc?s@7LKhULuN!HB
zzOlp<ip<Oeuy0yS7|#aqWF#7(3;GVH5hfJDPY1u>eCp!3KjloE1dGrPTbUr2(O>8r
zV|IzLr4Awx9myejVjxD6OH3q>m`OgdkOIPy(iMIgfW4uD%gGMOq|W3xQb??%h!m3&
zQc7$q!ZOIck(MnxNh#7jBQXA}Ja!?Sq_|HkDZpn?1xW>Ji{s{U)%K<Qt^36SsYK=B
zIFzHsF1bW<+?ST6k+fIQg0W;Rk?zl0LZ7_^y<;k^B35i6R?T{5RWGq$DxhaZN^`J1
z*@)eT&#Ac=L-s<dJdX3FWlQCmUVw-v4lsW4ig8&F#)n_>;5p4b(`&h=X51j9rGTZ=
zpw3y<S!k3=1eIUTEgeC**L1}31OBVL?7C_eKYX~!$aGEEH0wPAicr6)R6NT?jo=V@
zX@@W%v&JfEe;~F?0<no^%^jq%l*>sujF*c%%Gb<Kb0AL2V-N|j@{i0mU72z&)nZVr
z-t%YSsPciu(tgKa-Kt-sPukXE66G(XW+2Pwg1PK|Ff*_o5!)Cl*o1AK_}=HhHcxzm
zVY|r;rE2>u+wU*m4_=saSmk_}{r;Ia$9``RZjGQ5{4IS$n|G?l{ahNRwELmLB^%`}
zr3RL>2v7gRQQ6|o+T-r~?{Nmc3%6~SJy)hGvt$LyHa$n&$t~TwQnU=2;ReeN$|1YN
z>IY4Y!PJ%FWeDLnlrYXF$u8nTCC;P(pr|y{mOb}qUJ5D219?+`7*&=XqlkytHL~m(
zS;*n!QmFzKarVPfu&^t`08?E`F9)si)VNzS?pAv35Wh@fA@UF|6@)zLAoNqIBYh_v
z@@RJ^ALEc6aUq9Xp^4KKTDEArX0%c}iYp&9Dp!(9sVk$x9OUfefhzjh{fph;-Bw66
z_hkZ}dd)seCkX7^y8l74m_Mp2a5T?iHlQliF~{Agnz)0!T!m_g<1B~s0Ng56t5}(}
z>R2nw1*#zE*AP>X-_ijpmfXxo!O^N#>a!fJ8Z{`ZR1gPh$l!_^ljz}MX?I#vj#LAB
z_c-bphnm1)tM_wH8RZ({A~hE*%!R*_OI(|s6+#zeJ~xEeP2NgZoqKpUJz3=8YiV^+
z2k&9+mb%aKT2(3Jq}yt|v3uRLmedjWT>wVpg+Oda8AmmWhqA(IjVccej)@g=pEWBt
z)~XzGy_5;9so>m{9Gqjdq;in@mF6r<HIRm-h7=Awu}jBPUNDP;I03nqtlG~0NZ8m3
zm$f1Ukulh}urm$fyO|qSHvC*lWlr6$t?BK$;{iz}t@A_FkUY|a%ygxwhtmK5sd(_J
z`ehXl>Hf`uI8*Lr7Z2^)=(6>stS$I}8u5Q!Jm|%DH1VxNtDoGc8STs(Z6b|}0@@}w
zk|sEJ$xRPYh(N612~0rSuL+tuMlZ%Pp2Vy@*R0xHsYtLU*6+}&0*{g`)VIi9h_^ao
zm7B#TP4sM4WesA>)&nr5P;<9CwM)p&#JkkSwwbLtYosMp2JBL!ZS3AMfED`P%Q=va
z;g>wv(=ut9Ygez865fr9GgbH_?NnE0r>24c%lEKVU1(Eoo=Ydq%|SF7L#5NIDsY{N
zraga<XsQ!Gl_oR#sHsDF66sAxm0<<?F1Z!;5$-$OD}vmn!Etv6Ee8`Ox1)YEK-w8A
zWW$aOc!Mf<1NVTZu}ksFo@^M=kdj(EN$b*f<|9wFkq*+ivNyY8%i7y+ja(CPWw1$A
zT|M_yU9P1abep@@xd~CUFOy(pkCN?v-<eMkuS2OYoJM6@MMZ<f{tOlh6x$RiwxywX
zOzu)k7^=5e4d{Rh$<9ZHM1>;O#fk%T5FKJqO|SB#W94ubEZaW=mhBG#i&1sYtjcD)
z3k`3XKDQ@v+%}TI?!MKurb_>V;LD5sS>bu7x-+4&`A<!7eq4)kDmd?wcYseE33Z2i
z<elN(TzMDIksV8Yuw6>`X<~A^Fx2$2>Qp&aj%w`*^3#uD$5emW;xTN?_)B&!4X_PM
zf3n}I?<U<#gRr@3VsZ}z=AOAd>YBto%?F{JQeLS1A3rClYa}0&q+z(GeKl2W_m-Ye
zaf#W!$}4Xcy6Kl|T4H6=B~=&{r^iVd+gMsKp66LYsO6;RGn{oCZj*bn_5jFLD}U0v
za$*@?@Z3nh<bfAdf1H^Q`>S{5<gx&j><uX5lgINs;^f|oz0dN`a_4kUa?kSKv18ss
zI$GPu+vuOvK4oQT2(tnzq4!@{zybPc?atak<vJ+NFGz_(KSDt%7!my;pC8Xeu`;8L
zK2cZ857G;DM}fXu_eBeP>WZ?3tEzAv-B#bhzexS{4Y3C3^6Ic6(lC!B;du!kWPO6t
zhsU9f*?W&fjubd2L3a<kD2m|VOpq;#1JU`27!7#!$q5Xaq!RmCtBn!1E@m3w<3JL|
zDID-i0R$7^g;PSm5cSUl5+)p&4+W?E5;ZlP<Lju{++=%$%{j#g(=(||4}H6#(bx^w
zr*I?vs9|T!D<s?~FzsImNxrcEoKK286A+_t@<sjO`A`720JGPa)b9Ya69wT23S@Lo
zSmtq1lzgcd&56Rue9ENBH`$2-?VD}ot`d2X`GANc3v{{h-;(<FTj51?Mqh7Ewn>;Z
zG=^fE2PX|hf7SFxQcU%BjhO43#V8Zn=-XazzFCUEOZQoLN$wBQkGx+>PBtIOOs3y!
zt^)jj*8G#ySaj3an=Shr8SHoq#1f{!xv79+gzMl4w7k{B57XY(YTitrXl*FTXGS#p
z=Yz~-QSd(8LciAfN-I2ia(IFb3Xy2S7!c0}aYRg5RA>XfX9DLFI=mp_0_tn)s!dq@
z>URM)gxRZo#lvs~YXyMNe{P#7!R)>h`Lhc%;B_;;Y5!D!?rX2-Wjfp5V7SP5WAy9o
z*wU9l8F$fw4v&F7?BgD~t;0hnJ8CPd*d=0QnvoeGiCoo*@r(4&J3NmkZ=ockHM>*A
ztCGedY*G$<><Hu2Oo*S6@3Ky7=60q+!GI{y=FZO0_t}sz6~W7_VcdJ-uy1_y2tLE6
zvCWpSD4Q9+o170t{0N_CeYpV<%}2m3!CrwVmKk&MM%ifI)T|%8ow;!$hqV)YAle5b
zbhUFA{h%`^hU?gj3YD-86nz|CgfDU%j;Hjx{twMXS9-qEv$+3#o9C6#s;y<s*0Ol)
z!!qx>)%ludz1Xuf{+j<S!>v>CCr+>18{@l=#Gjms&xsox7vqlzALJLs8~ZlQ2D{-7
z2jpW$7LVVxa%D~HC6%vr*l$@t(o%PPXl%W#e#59MYrMk&`IwQ#V;gx~ZuvK7ZX4@Y
zjrAWmD(T`58$I5It4~K^g(2av04L*!a7UpZD>T`2^LCe=FOHAT#w9s!xVUP#xMsKr
zjhnyQUc*;EeFnHL@&}dahm`4u1UCIps)x4i#C`FB6Y(?80q2-7&HAVKnN6ptKon+K
znJz2S<x~%C>v7UMJNwM>>K$*HZ&wemRu9vmU2mIg->i(An&6)nbh}&P{m0{zVtoEW
z+|QOdB%D;{k17&J6^Wy%9y;7D_kM1L=+6fzeW}M3i<_F)tD8Wpt20+;{-SZM@AzW)
zeP{h&b*~MN$9<u=GrZ~yuQ|hULm9yNxAv~v_O7>V?>gd>^LIIJuW*d1b6SY7(~$0%
z4o(xgDP7#&1rRup4r~Z?QCv3_UG>FHb@2aM<y($5@4j2;ws*u8_l|s6(y(;#&4tyH
zfwhu>7joAN?JpFr7gbWv?rRSL>wCLL09F9zi-EXddetz!W|)Qsts6MQzfS+M`(6Hh
z>Knw+zCG9tG;hy8ncPd3+orZvQya}0nt(pv&?)})n}0U6xTU21`$mR7x1og%1J{3?
zr=2~W^uPC6i{pDn<4@1T4YR9;*)_u~G^l5IsEf&;ihy&)CkdGFu%N*FwBTcQ)t_eg
z0|jFOP4>{G<IcexUH>Pad=lLOyY%^S-GZL`g<*duoPODTs1#2BkUugAr(c^7_QL6|
zK5wC4cek)<p<8#iTffkM^Mm1E^SqV*^<x(y(jPt0&VP@R17087^xj<jEjT4~kSs8J
zE1$>-1*1~J5M7uI2PGv!Byv!23cCFBkkXJh4_5&nu4c>ylYVJx))x*$S(c6_%u`S!
zgaRx-M(KYYXyV_Y^#|MBEZ8KhN!|<xUWM+LZ;A(-^)@AQ4Z$_u%g8=PB>MW10mxAQ
zdZg>=LDr$<qGXO^mw&{_Wgy-n%-;Lq7Z_pL2fxV3ON_AWj@d`qgz2&HWB}?RaRz^t
zb+Yu4&_yMTuh8d?R-FD4>wJ@uA2Gtdr-5~{7G|$Ol4Z+}S*wbXTR@_Wy()Dm!4<M(
zdcAcReD&sUkN%y&U!lJ|(OalQU#5Vf#m{N=$!Wgf<_jks{J<6HWp)&&gEJ8G;k$St
z<5NO4(+-L(#q=r*tLrkg+xw$-{oe!E-_YNVp3GGcZpPoBqho{RzhYwCj9dc(c_A`?
z9_!il6?$XL-G|v1yM!fG%(O2^!B8}5O(k&`Fixfl)H|VYv^|~6a1niHtY$)So_zM9
zKOjy;6f=LDahqAv@}Y<X`5u3n38`n@`f2D-`j_GdnAu3N24?!>6O1tZp&<32vhx?|
z`BT?B0&(sU{@3ZHaTopCxT$!ZYl>_C>Bx8+{m%H4`i)$!u!#O{y#DcNJ>S1kz}eva
zz?;!qD1Pe6c!Z^!1Hy=~QK*b^+4PQ%6ACW7@8~%p_j1{WK^Z?`rt?o+$YI+558~~%
A>;M1&

literal 22254
zcmeHvYj7J^mR>g=jW<aEe1IT8f^SlMiPVFXNIfWudW(`JQnF}kVTcWqpzvTCASKa_
z8Q81Yp&fb5$$DbiBiCS3V=rCS57ka>qWnloTa)ZgDz&L@Zf}h0DyCSqrAj8LO6Yi|
zYRmH{=X9e1G(_5x*SmIAw%I&=`}TeHx#ygF?!6!SPi!_b0f%nmztV5KNDzOEH_B0#
z4SfDTNrHHnpb44`6O;0loFt`t&7=mdny@yao79o`OdHlms7Xo+lVQW8K}u(wG)h;~
zq)ED(C(Y8;GHHRUE^Lk1CT-a~C6gr(rw^AzN+(MrWs_wQ`=mYMm~=#(lg@0K^2u^Y
zLxn3Mm6Meup&^3hb5%;iX~Rd*;!ogTJ|<m|))=mixF_9_n#r1oXVMe#PI{$$hPhfL
z-A54kM7bfLCKHXVw9wWNN!#vgXUGs4sH02n5|h4QDb(qpOVu^eWiteArybItlO`ce
zJzZXu2DlW$ROECC4Qc37i8_Je9|?aI_bqhgU9Gx3>buDXx(eFSm~DqkT?e$nNxPs1
z9bJ8wROeLRO*YYPD5sgO0op>7mx*@IZ;%IL?WCw3j^2TbVLrelLV+*@jdU<LKNXw5
z)2<h(sc<M5O^AkYY-T1Doe@idx2J;hiBK%+Pu!UgiiSie5{xY*MCVK};g3mA0%1Rt
z7Kp^5Fvs{T6AaK7W3lk)?cmfxBF2c8sc`S^-8cO=0`XwGPBdfAcyNjdCPYUl8czhm
zVSgY>`$LiW7y~7bEW{JB$mIkS6uHU-=I4WqScawgr()6R(2PI8%)~{*)NCv?6^x5I
zNIfs=m{>R{nga272uh45+O?u>Ius4XXZ?^h9*c_lP?Qed77dYLJRX<{PUWNm0($s=
z8~#845s<SWC+`x8oU9SVTrRBUbW)SRoY^BmDItQGAz9*Ik(WR!Sn?O-kI1B!)ueU#
z=lKv#u-cC_Af@mxAAbna^s4p>@viodbkKL?7<7yw5Wf?h3dO_{<X&(QI>{f7#pW3-
zQ#4P-BJ<%Oc9U4jEJXdGs6UuxKX6<pF^sx-OS<YXpQxK&h)%^Zw-1gl`g<ngK_=dF
zlVN~m^lU5~xETuc9FI*cM4+yCPdJnahQpEEH4+F#yXWtSmO#F4rU8ocz<>NYkpBUH
zDPqG^CYb7YQ@voSPc{9HBFy%mxH(hTDh)(F)6?=b)wD$sW#yliHm;X8rjM^R@ufXN
zX%A<5>eTdn4iylg7lCI>wwd`v*PiZuU5N$eMoc7rgPnCk)P|zd?H17rVz$6U{SyCx
zjq@S@&EOr7jTkc(^efV=4=gP(AU-JeLB>BF6%C6)C@mfot+NY~D)^@ZQ;>oQ&WA%&
zfkaTWBx2APvyeOx5}i|$(E5Qz{&^<07@|RFOS6$!DGL8oDYhC83;dhlUkm@Azwbp;
zY*Gnak(jv?m@<1L3dBflB9b);gOZ9MK%v0Avzk99X$`9ds%3RRb*vt!o~3|NtO2Nj
zH3BuVCZHzP4Ajh8fLd59P%CQ#YGX@)martyQnnIk8CwC=&Q|>~inoUa19?*t;kbcy
z(M~zMLNadJnK0&C<<BWh!kUkr%eDJ8>-O={l7u~<zSsb(YRHwPDhc^59Yr<d;)`mi
zQ1ez*LoN?n#@N_0)iY`E#fqXss`|0drGu&3K7NN%s#Wh@`6aE&)tsnS>LS4WumlUe
z!#W>SeFVJ!B*%Y}P_@IR<Y8J8x%UIpSE9tvDbfp)g{5wvn|9sTvL4Cgc~@-Fre`aQ
zuD-7+$Yo+lRq2Y`LM#%Jbh&fVMbR}XzUAJIt$t0zR!1$Wa&nT9tYvHAd2N)WJ*xb<
z6f~iV&v8UmUQQyJ%I)J-W#m#nhtxO}?iWc-E?!kbP6DE8squa-sd4`!N=<En)L5gY
zuOT&-=>IN~8mGd$MFQ;5k9U$3RnO)6zsO4D;-6n)zN5WeXnk{%`FwIr*QLu99v5C|
z-(7+wjuYxpOf7@Sx*{u8_ncO$t5EXiDwO)uIV+{a;1%puovNR6*6u<5M=-X2k{e%>
z-e~>vmi(YW?Z73yte0-g8)3RBA5Qw7dT$47VtsToSU~54mSP*t`q&y(S*`i9=(d7z
z`w+gTm)@0+gPgmhSQ0`d+2<Xq=N(dPP13t!j*h{|PJ)HCv6ZT^yfkNR|8t@%&(-3I
z5=^dIX(C*etW=Fyo$7v%cdMRush=`+U-mRv&(<e)=UXtBdlkA9-y_-&x>g3`@t37(
zR>^@|DUGS#KK{$f+EM<ypGE%Z9=0w-CL35&qF-qRcx}7Y9Jwh;CSwK}X`p+h-oj8H
zhKeMq0emp<;zH!Ma#Wf2<ZH=?Kr<R1>?xApM(_+hiv&~4dT1)gWI~Y&#*r8-D4`J~
z0X)A?B<~O7QT{Iz^j^3&9w)9Fl1;3aZCV1IOM34aS?}}eK;lrow#4ClIPJ@a^C7lQ
z%|Z2DviUhn^QrTz?~*Ogr=N+Dd^_jrm9W}QZlkr$YzqtjOOS!y2P0(-o!47qlqnkb
z8kBOSaqpKZo!zf)zofJM;l@Z)vNh72Y>Tuc+ezX}wP--nG_@ABDm4~qQ7(Q5Et1M&
z+kQ$U>u|idKAhY|A7FQZ<aM$TDv&&@Y=c0%>o7e$ZO=dbDe-|ulCHXY4e-4EOP=?A
zMgC)Qez`Oa!M3XVCfV^!+Nxzu$<C;HCex<yDA`%0w{>5px6p=;xppOQkq=(ScG3rP
zo>SLzX>AuvZl|?u*W50pw&&GTwj<r4<So1wYh<!{$CO3dmTXB}%a=qCvNhP(#WMV@
z>mU_<SfqowaWXq&8On{Jd%rP1^iZLn;?H@g-;tS1x6*G#GnZVvs`Wd~Tn?-Idiy*@
z`t*-G-z}WE=%Wu*{>u?{>yv8V=V;N>9=1EBp@);*Y!6(IC3~K27Q{k&dgQ)_R<~rP
zNFsZb{*Xq6h-y?gu5L-zliQu#4I_b;Elc)B+f?3BuTmCc+ddw0l&Ux}s;(i~%kED1
zDWw!%+1{d*eLH$tC!WR2Dv}Mbwcs6{{0P?2KFO`4;iduGH@8P=hqS&?M>pixB%aH%
zdUvp_Ub?_{pWCa{_Pkd1IsKRazq}!B^nbAxik~X26`jgi-UHJ7WM6U*eVQe497*m~
z$>^CP$=gStrO&bZFeUiu`*)PX^WU5t(qqz`pr757-ml0<;g#LX?q~ZSTqvHiZX@L;
z)|;2H9cP!`biv#sUz6JFDU@6KV&3lp$^9qKd}q&G6aA2_Qput{CyOsDvZ#{47m6fs
zK$5_LoCID<9@tR|FMo4VxTEwPz;zUQoSs0R^Fxx||KLiIBp%#W5)VFuBpMV>70oUW
z()Mlr)Pv6?xmWYjy5lN}dUfWXBzk9wCZj_|Ud4c7Ghoj4dzDY|qFR=-KE;9LAV~hT
z@WIGnawsyCJOs1N!E^!7mGWS9qi}^XH|tl$9#K+1m{iLM)V9;w<V)Y6o(!drDlI6y
z!uSt+ka@5|@-TZiJzSVV39IdF@(B1LN9K+xPuaR@5;l<K-IRaOb;GZ@E&=O?(LF^P
zPi*gD6>g^&y|p{>L*<QOedA@;{q!8~brM&U8i~YmK4lZZbR~~IUAwXd#vXle{UZwc
z0siF!cD(+%5|-A4k3RSVwcNZ85SZlA?;X8G-Xd;muMxM%_E*M46YiDc=6W3Vjk7`j
z6u=f>4>%YBctQZSn1iAoa0vc+k&KE|TmmmJxJx6NBZ1rgMC@h|vX{*>!FVtVJJ0jM
zXds-pBig5#;6Gb{s5`kRGwec#68;;Z0Dv17U|U|a(Xi_ryMsIN{tzwdBQZJ{W^g9O
z;6#%_-+r9IdAnFLJUMY`)IWUjwEx)f<x$a|zrQki>GJ6dW1=k|b$0YxyPiQcLU_bH
z;5teJG#$cs1yl#Z1*B3LoYn)Pf(k|f6%rJUSqufBFlGUl0l<NFjZ_MA0*kU`_q}8I
zk|B|juU}NpCKB`UgFQX4AKop;bpv>yXR%i-xo~lGZ1}X)Y;2P@Um9}~izIzhpi;R5
zU3c8x`5b{h?-Dbx-EiHUBw6Ubyutsm=APv;@iBm105_5V-hE7btPwTcyBUNP#c{dP
z=VNdc?hEr!7+@oaWH`K3n+G&>4~1jU&+)?$P@dm~^q+fyqzKM+Ts|KfR;$w0LUl(j
zhG~Qp+1C&$Km<hQ6VjU~jGBPw4DP##x<v4H0-_J#jXlPMfW^Rk5R`e4xL~$_7+g7&
zJL5Qputykt1?gp&3lM_$1gJ|Q2m-Qz2p7?LY4qZ`(<8&<vOJ7mI6FEf+SQT3?XSyO
zW(=#ph~xzzg$!jbL&R5c@?*?7js%&-NE~T_kgPFbkhR3DSc2LU4c^LXP&+9zq6yL=
z#7E|=qF(lV*C|ofL<|5%%AE<2L9<M3J~RcHO~cBKXgxJ?et67(@zRC!7suhjX(ebM
z9veS(>B7a+BZ_P?D0hm0Vw=VCyn;hL{&)bfI7{VB;FeEP)Ld5{^oeGadj<ehL1xLL
z=tDlPI}k$cK$UQyN?f`yF+O@psamvLxa7Zl8oG1h(mCm7_~J$X*zoz$C5KX07N-LR
z0f-AQJLk@wZ`UkYPmPaX?79@34l+=&;l-};Py*mm)Km=cD?zbj<ie@Ztkw7y1!4eH
zJTDL*>trL47krd?(UQgEARK3@Q+aJ*S{DF*;AOzLb{8Z83)3U@LcB*bo;u-|mB2Q<
zkeCHkTMD45TQVFA27obN+s6ETHQ^=Ii;I+GiqWP@wof;Q@HGH_MRBAF(tb1;qJ8|r
z$<cA>Ha|$K^j1p^1>ERX&`I65?=1D?So3Y^Iu8H&u3d741JRiUuu)x7-{iYarC5O7
zf}t^<kf4XajYa5xnI#lx5RO(xKxqLt0-<(|Xd0OfB+lc=C^}E$Sav~5GY-tU7zi<t
z!T{D{E;t3*(5z$6e_QPMshS-_r=yF3aESKh?U!sid^iOA<^k0hq<sLm^Nq{~r*8W0
z#1<I1g?IFI`P!*sJ;@CaqA3U8{8%2KMMrLc$vu)xc>4*Fk_S5mF*KqTZLTr~F}OU-
zoW^(~g4tv!Xx7xoQHXD2V22MB6jMOI#DTvI?s@<!6VY{uyJK*bTJFLU?5>eKZjq7)
zFR>P_AwcT<Q}7%Rp9-+iFKe-AjWIKUXjXG&dkTpW)0fMM`ms$;-CQgb70vlNnG$Rb
zx>2Gg2EI%%x)@?W)oeMCpH!}XIvfimL@fX-8AKO|S{R!|BUqXkjTl+*iXaq(fgG^0
zVp(p8$+noefwfE_p^*f!2wOfCvPByEE%}#?Gc}mXIu%<0moMu)!eAGII|Z0%&kq|4
zZ%PWHB$T6U-<5_$uuQ;;qRp4yj2e;QzDC8iFjp{%F{^jre$L0hl|{W1ZL+E<Ww99N
zQ`D~;{MyA(@K(2?*GjS^BxAmZX>|a`O^7AgQWQ-r&E6{tDV9!yujGf~5)2?r!Iv=u
zoR@*_1Zfbh;5*a)tn(?FuLQyi!BK_*jIn7{!6`EcHf$f3JW*LOa9x+d<yK}N5?okg
zaMg>!$*bg}OZ;aJ;w^4$Gq}ac;G~qnc_-6~1SOfljkvr=%yi<d<bg}RIJ)5sVh<Qx
zAZO4?1pfssQcjb^(jv?>GFx`^m^Uz!<WV^Dl912o%uA)DgE*s*ouOB;A{`9&5yp>Y
zyn;k6aI%*uN#YDXmBh<gATDvp?q6WQb{0ee_Szo}M1l-T4)Y-pQ8N`|)-YheDF*l<
z41>?4fyySCBmjfo0fnKN+n5a0gq{bzL;VTH85A2PFBE1;$t4zvI;o4vV0;+TASn6o
zbx3}_^qLZBhBD)Cz<ex4Y`I4%?a&j8{f&Wp1F7R172XfRd_{*)(UBT^QrW&mP*qf(
zWXh^nOj}xr%e3!b^Gcyjhj*o8OAm3I&AZYQa;Up!Eg**m4nLyg(CF#M<8tWAE8LY=
zq#$6NU2R-d>y`oQp@^aLBsX|Yxr5(ekleK?<@P2SBe}V#a{GPqoMww4q>Gf@OxPUD
zqgxg%%1qR@u3l5Z`;U<iUsi4|k}r|JepR|lUDzrijJ6fs>&aA-b9JoUT6gW|UHdm(
zbw8NC8{6pIBXkaNou@u92+&Md+h5Ib*FyZ*oG>;gJ?}dp^o??TFMQB1)O2$--G6g6
zz};BjuPzE#7k@A<P_9i=*%qNU^k-@s-fFqmz1+QFaR?UA7NIrtZ`3phHJx(wlbYsq
zjIY@*)a=KICzNevH0^qHgu8T=n+kJOgr_0`6@kDLQ`x<`Ey8LrZ<M=*@^+zoH`GvJ
z%{X0|s=AFz?>n(A1KdlITSlU^cFUyPF1y}v{jP+7e9kt(*|SCH%FUUwicE!fOAkTm
z9y-D1*)k}%%f>g1z)W-LPg(`X0B4fUp$pvQ8|y<i_@Nsamk&9y*|djiI<iu?QuiOP
z3ZoOL*-TR>*EFzdTs3lsuVIQz4X_kAB0=E2TLfv?vr*eD)b_na3C5a?%%=f0pCFmm
zFd*@%E5|3UWj|j7{O&Jg)ZWJn+=~H@y1`R71nLF^cHmR?eK*Q$-noE$f|o{O#Fqv>
z$+zDGpLo+imN!pH5jPep<jB{3_Z>J{>UjJ1o44h;_oD&0@~-0o(9%(%bTl<Bi&o{M
zI_}~G7ntLyn>=+>pl(7S%Q7>Fma|%LHht>sSa)`yfH<qO3?s@rKSQbcEs|y9vhjCT
zNt~3lj;c&W&6ZwDrp07HmW{w(Z*%&Z;OTit37&qAs?N4zZGoHcgAU&!&!8%wB+nLH
zu4-PvJ3~P8nM8jpQ8{LFa4Z#Fxh7C`X;PpX(gy^pYc0f61DS?hYX^jeefX58>N37&
z2wmkVZ^qZQRwej)m0NpVYWB6+jH!H8&zZb%rmNTJ_4+;z&i$u<{nGmW7y11!KA~Kz
zH9XZUP|Y0GoS`Z?<%Az^l(;xo7jNqpY~5fY4I|{A9pnzb!0)*v?71XIY}B@K?L&O+
zA))pV2U6a7<SorTGv{pMjqRMVJ!5g8gpZJ!uDx$L?zy@0cHXp$GwqVBgb$Tugxu)b
zD|8*pKhAtb2WLvFRx7y@Un-a}Ie)USI`L!n9d_N+$e9{5wQbA#<uDH|Y3N+mb0xLB
zu}<!ainZY5FgN}O+^C<snc%1eo>~y71qf`Gy53H{namh%nGz>-ZJGBoLQ_`pTO_Gz
zFc)UWik~wz!MRrV$R)HNdrYslpW)ii0E^07(~E1b^3FlQIk;?urY)Cd>`pKb4i~;%
zwM3SiGRCr%<Gj&@5|!~ZeFFlrQCa;#+s}I5?^$c%-Mxalm#^#-D*IBA4Y&7$*`LMU
zk8$0@eC;uz_89LT5!@pjRkfo~Z8Ws}qW$L`zvy^Msy1udGIhH%?pAKsAnzUoyIGHB
zwBDQ*GccVFpDbu4XhE@ap^J;f>M%#`cp``GC?>;XrlU`mgI3<u#+lmkVlqr_bo2=w
zNAr&}j#{qNmzqZ7P8bXaC9>R_vDjCxauyGq8C&_vm3#g)wQg(SY%Lk9W7We|4yJFd
z-4R+3KnPCWI=Fmd`NSqXtL7>P(kItW3r+hW1SfADke->#R#>k5K>F(1HKDm5LU8it
z1IsPTEg8zXa$u!|r)r@gj+&MBA9uXdkqSL6n025I9yp)WwFq^+7?4K5%C&{ZESL!H
z#4B7FgmICl76ob%0-MgBl>^E!riWn+ERVA~Q@;<4I*w#WvV&jtHZ`|p-C0(jI9t+7
z5BKp-DD*&9Qa3#<f~WTz5XMYp-KUkE>y@2rSNO`kLgn66B;#)U)V+J%y_@Sj&bvni
z_b3S96Uxbz53U^s0w+%m3Dgir4P|yRcHe+pZki;y?BGn&*>mi1c)jNm-*YKDcs(h0
zfw-}7;o!ByAS4gxL+atY9;J9l8qRwQhVz4b&7e>-SU8;T?Dtd``904wp4YXndHLFY
zp|&4vF_`9pyG=_iXKBg!_Ht&g5^jgEG!V+Zk}2maQABNT#sxmF)&g!85)(Y!B9N4!
zBVku=CGV*`>pit{CeYhf)O{CEHNddNQ4JZ&nTp_F)(hy%RCt!n%VtTnCI6>yqrxLp
zv|{9wiaO4>o3H2<DtedApvrF_dh^h#o3}IxmZsE=uNoQhI%VRhGdy)hpw2*G)9zVu
zyi)^a%V7i)pRregH|Q`*_h5W1Zm??E+te)jf;-uJ)S=rCD|lzW;Ox)oP*eJa`)37D
z=Qp57-#w#Vv0A<^c@JFu-bc3e`cb}qG(%OSV)&OSJMn0ND?0&aYG%V;vwDWNw+i-F
z82;=&u3`V$DIj>3^`~YwzhYS5q4U7!Pgio4Z6HAQ>UX{W!n=Cwq2W<v#mn2z3HEcT
z*<3y}ql@1IC+Ze?T=ud?3ob9f58ctVdP#D&Uechm^-?wlM&zHJfB*b@7w%tJ8x}m>
zT+b1~a}@gXyVoD(tGacQhlA6%=V5Z)cbxYf&-KTKyNPQa;N1rV_W`iBQYReYIuCIT
zNAN5=nwrVj_d)M`MOT1dV<>_78IDdWP0Coy-#-85`PDX@jFr1H?!7P??ZVLrNOp<=
zAuC~bZP^H$`!m8|GyWDyYU+26T%l4=-4PbQ$DQ~-oX{=jG!s%^T#<U`iqtz-vN0J`
z>B`<WYnN*`K>gxt=XvKr!Fe#Jesw=P_5P{%&fGtfzAbomKdcr!gO8vS4?kKIJSV=B
zeJBk`Xe{AGW9jCpCV>K{uqoG*8R`XYyE8+T{4nx*<b#ufr|;ig6%LK3VB$O>P!k+A
z0j8|X`7BeT;$Gc!dOme_t~)!|CV1x_!MO)K33UEHwKc5U8hBfiU~5Xz8Bf>halzBI
zHqCiX!TG3Ez5<I2<mGElcc8wZ3nY4<PtSOEb^<au$4Sj9)w>>6JZ|Txb3AoUpw2;H
z!{&VB#d|NN=nZQnSM@S?WfE!C%e?ivV7<;!*E1Gp>cZb`m|&Ko(+=gy6P;zHdG)r+
zIRhy@rWrw*)E`GF1qwC<s2uae>{?xTWarF-ym?SC4|2M}zk_4T1O>x2z899HUY}@P
zauR>D>!cel8~QVyaQW|jXIkL$f2&U(fXf$7^Q&g<7v8p4_1fR-^$^B|z1)H8>9{8S
z`G0{O1@%V@SVE0Lp~;&%%*Pm6_+5B@n><?1l4;2O^g$OP?UR}rBB?F><iSUf>62W>
zeMC~nYT$PqY*Jr{_t>!|ZH2#AFHV)DSooDXP2yrK;9_)hQtPp@L=G)Z;D;6{{LsRW
zh@>I9D}g%@+2bh`juFZ;(weU<Px)|zQTi3Lfz_sw%TFC_(I-hr;$G5(pG$#1HL8Rp
zqLxOYmx$|bX=GFhNn9gH_@lLeM$#7lQZi{`P4M9d-57(*mJDJlw%@42rR*&Fp%T##
zX}Ev-P<IlbBC0OS+xGCi4hj}!7s3cXxC9wRYI*^oEKhIcH{A#%re+H^Rknjh{3np2
zxDUz!XoR(2KGT)USKhw<=JhxH_x!m?869C{-orYz@D&Bdf*~D(EC4*Sa3c~*$V=2Z
zIt2T56n+FGO1GP(HFX9-A7Y8zA_a-U{0t+F%2zK$YxaR(D$|DhXt*a4nhphN<}WdA
zsr)^RT&!r2c@dAJc;y$LrIlo{te64JUqh;Yi*I}a2z=<Gwm#kTUg&;kt^WPUYGh^a
zD)~<TO8>*kf8+jB_oFj>?@6KeWU4z;=16sH7^-<gjbH!|)U2&vy`6EjfG@3w*#eMd
z>xRj?JpM-Yz3P=0-t;beH(Z|8@%PO4&C9k-P3y94B>?LNFqE&j-l_Z0&Dn?6B0%JG
znOdfvxNBDz-|M{J2`djReaoe*X5QFXkp9FP&Dl@L=fjwMU8XXW{Zq=d4x>*cPx%DO
z$4Tdw6WgVQW=d_9$Y<E<-y+G!Z5Kjow_PeXVH*HhSQ`YwTl)oTf2tKG1F7yfVk`cf
zJVuO^Xn$3r8*vzZRY3y%4eIQa9*A<_CXW{WKTiY2uV(;mOV4V+#vvw#V17w!5*WxH
z{+xX)v|hAUZ6Q@TuypBA<<Naej=X&;Xeq7#k{r2oD!YZhR@Sh(2UPK|6bjpy|6)cO
zdGXW%We^VxYC8m085x{CXk(&62?H9gsGVTNOh3@%M~SB~d0^E<mPl0Q%b3d{HUSO#
z9&IjcZ@vd;iz+_HXZYPxz9_=KJvXi2A~$XSz}(#Pj8cGmciBU2+LKfQ_Z+hae659g
zy4L_nsh5zHXhgFIASrhx4cpbu8XlAu{~|q6ujB)%As|JWnlBQD((pY)0fy2(CZiLX
zqmY#uMsf_vJP;UqMld`IWC=d=ByWcL0S~bdJm~vPA3j6m0doe)Ss+Rdge@-Z8jJa4
z>_jkfBM8{V=*+gz2?R>WNJqK4U050dikR1t{2`EbyIj!^F#;V4c}Mn53`x7c=mx+}
zvr-T97C!koMrvV~oLR<@3%5@ZzL0B<vK9UVO#P=weu)INw%u3^-H<kzrDpwWOoZLZ
z{0c}MlPhDAJR+HLH83JP{3|eV@qZ5lb~vd9tu<5XOr6RYtnyBuvc2bO;2QVvuDyb5
zFRp2A)HHJ~2l$$ULd`*Wud1p(-NRS)2~~aYJStD6VWs@h9gaH9Q>O*$Gz6a5s|0)f
zr}nmWd)wN=qY2L5#@j~(`$%ebLt4=LFupeM^W-m*4=+Ew@$fRPt*Y1eaEVV|hU?=@
zcxKn$GOj(UrM9YiuAz^w+9On9{4+LZRBFyK!G7#HnzO^3swWI*9v|nb&cMlA&kEMF
z9CdaxKVQ&maR)L><oUwF+UTQe+@;H$Zk*SR3%YR#>^LoetT-)jL0`UpqVhyF@fX$R
zlQ!*NG&Y|!YX8!xhwxKgAR5ol|0NJ`{edb+{(EK2LDldDgTm7$0btPu4(=S5DIIF#
zLThe<aDg~FRV5Y;q2SUI@YDb7Ke(_<rgJh1@t|*Jk97R?k)_=yg9+bkFgy=hsbJRV
zP#mBIw1fx_0T3q<?76jY@%#b$(#-K&?K)YzEOGey89+v)FLUW7JPf$7@bS1<1f1DY
z86X3I5r9JASIe-$GQw9<Q;GZz_2fD{0GCZ~74)zxqtzZIH*_Y>+`1A20w=F)6Lf8y
zu5H6$yPJIN`***;+PH4;at7}PW#O!cdFqHj9pRL7%Ya451#L>nBoB8A`K25<)}xmz
zePV8>?xKn8_a0gcnY?}1sz<^kS=foR27Wg{`~s!~IET4R6#kNwF8X!C>twrjZ0QxS
zqw?qV0B)9;^#P+heSl*HSQmzU0oBumINI4)6tHRbyL?i{L%vqL_9S9Ga&$Ms7h7ME
zQD*vVKNjR*%77~}QUk3cSa-?42Q*afV}FJ75fMPX(t|ukhWT5__j~NuQxH)4b@lkV
z-pA>E8;*t#`-R5+yrW-m^ry}~u~cxDZa7ydAn6YIhSPOFF_&`{w|Mhy!F-$3-IgaQ
zt;{>vJRE_*%HN5L6oB90<r!3y>@Nf0V~271Lwpn+1EBe$Xae-oRPaV%>L&BwFgyBD
z%zezE!!IpL5Iczz424fn{iFebF<}VjAEGf4x+wv_L<hX^)B*$0)<k!9ekRYCtkSmt
zvoJK~eavN3x?~{{f<{TBn1ph}MFNS_q6NV@ez~E{A7OL@l6R4yYavZ+q#;FC8f31F
z@@~XXJ&-SqLok<G2nP={{|9bS&EgSoytlL@NoI&j?yJtH(_{!Qr#>@85BGJ?3{lOg
z&kRwLlKwKp3*57v8KN{L{bh(=PJL!9zU2YI;!AsZOOIgbNf|SQpZltFlW?RcfpG9d
zxj>Y2MER!CzUghuR8(%3RjyjLv>I0lKz(qF1&P)OyDM6AnK5PE@)$?}z2u04&=@yK
zW2$P|`I_ghXS2L<v!pDo$&}ew?W=(`{lnvrCmv7ydf%VF{2yM1;@9dQX&yVc%U8E7
z7btQF5I_~~^xlW@$ITGO1*T!~Ha+@Ck2j&nmR(9i6w}YNgr?-K=QBOjeYbLplAhnN
z+DX`|&5<^En2{Rhc&+AcO{T`1o_N&oYuy&nNWyC=1k1ABt-%YHrNj7hq>e-*T0pH7
Mc?V*)r;>jDKiiPnNdN!<

diff --git a/litellm/__pycache__/timeout.cpython-311.pyc b/litellm/__pycache__/timeout.cpython-311.pyc
index 14a356abc300ab281fe54fc47cfb5e804879a83a..09f976993936f4b5c3eb904c22c7dadb0be4e9a9 100644
GIT binary patch
delta 19
ZcmX@9aZ-b8IWI340}w>q+sI`v0suDb1pEL1

delta 19
ZcmX@9aZ-b8IWI340}!;{-N<Dx0suGl1t<Uj

diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index a01a26e20832148de53b3d844c981eba8e8c467e..fece25c518bdf695bf8c4d26cc50322512c7cb41 100644
GIT binary patch
delta 5628
zcmZ`-dw5gFmA@nD>MdQ#4;jlgHa3=#@e4Z;FJQn>?2r`mETpuFDm2$vSeDG(Yh$}I
zCIk{5$-*+}2TikiwAm(XHf{LSeqZ`b`bWq%U(?!@ChG0BB}w~rY16E;X?DA9)9g8O
zZDVM9?fbiP9`iag=gb-X?1JUrFIwuJ^7-5nJm*4rI&hyPy@kZ=qs$cE8@X6FMq-5e
zk`g79^g5{jDA{uhRgOw~meT;>3fe#mfOjixq($&;cS#@hN1JE}r2nu_qJ`8Z{(O{J
zBy}Y%;a5r7L}9N&7l6hp+DuE;RvL!44Bm2hE7a9=p}M-)ODm7cdpc+p^t*;G0$fY0
z0oTzQz)rdta6PRB+(27s1X}wTtpjQ!T>{u;k-8-sJu1=qFIyz(8F<ZSPd8oa28G)w
z*&#)j<&Imto}^<|3STF@R(Rht0MEhvlG3;%D=|4MWgV2P22s`ulocq8pe#UHfU*k8
z4wM}zo1h3#1SlCPo$v!);UZw!fMpj75_>;ur4C-<*A_XbQ>ft(W}f}_*%H)n@r(X4
ztAo1vkNjmp2lb$K`Z)DwrD2KH@E4Rueo_g5hKH|Gw(z%S%KxlvX;i*Uzzp!3540v~
z>lL#cNuq(Q`~`^y^-5mZ73OV$Vp(Q)nv|Q};gT#;R?a&3fk2(&)NAHdEZ|QCI(*JU
zPAct?s5A~G{%)Z5qqI71&^kyCN+Z@!N(V`_u*Z-GGE_|&a(rxvsYYR9NYgU|u~cdx
zHpnu=Nvg8k$>3fRXoap>8^SFJ%lTgl2I^d_0f`obCWJ<WWdMdNmD#sXWwG8gYv!j4
z%bHxkt(tvU2z4B&lT7bpw*$2SUJdumVQJbD<v%WLDGX0bMD8Z}wysG>>14Qh%F&V|
zMf)wQYr0ycmm;2xZPH6^?yjZQmpW>?BGyY)HsD{1$iTU@)QaU+cXz$@((0P-TI<WT
zGL#d9Up-Qk`&Q_S7She{E-fJ|`EY3?+036T-C*AcV_;qU_oZ71%XNf9mZoOp;H)lK
z&AHAoqBa%;zTwi<lscg5Y?Q6!_mr0hK8aQKAPD0$^JmL9k)yf)EhkpHkTvtaR<5o4
z7;2auVl)1}c$%ivSZ_R;f|ZDRnAr}eRgG1fP@_<1@r1fJp6HLMBMEg-PiE5WGuX<f
z_{FNt<aF-CstFPpM9u+(8QDKphh+`u1G#6bPm`Kq6pbK^B8&kT-b7|7t;h6Czna!y
z8nMOAfg4rWFn@IMV)-y=Kb3oV@dFmH+s?Y*%BO+xwcLg!-yjPQK`}aGC5U$tyB`@A
zb{Na2dAPnZFvn)ck$Hk|s*lJILGcG%uTM{0yAzf_;Q@N~nF|bI;GB~n91w7O8@Plk
z>!#!p$G9i!ftX_Hi8>4~Q}rR1j>Wanbb>918iubo!xCytPY$S=A)R$2&oQjBy%|jv
zJJwKUc6@v=IT70%2Q~wxsF$6Ck7$IQ$CB9DSf70!!0`4BiLDFkl~CCVWO?+=K2`67
zJs(e+-taK8{i!7E5cH=wiw|g;7nrkdfM3%U3ZUw)*o~^}QBeIcyc%{6Zc#3k*5(Vt
z`O-+fcp(-lTk~c0(~2`>pOyd)_e>W_-teTYlr4Z7hBcY)O(>j<7V!`2T~;OQ;|Iz@
zymG0hM45@Ib}IE_<i%KP&nmogX)yt8ELIdVerA;&Qr4$S{J{Msl?_=L^yKVpZ40%|
z)wV$Gi%Xk#E7T!^EY|Z=XTLZFm<MV)=G6r08IlfxhHFNnbFOW-sDE?YFeVt;=7#2)
z*&bZfGuOKd#^e>D`A1nF_3_6VO3Znfb*u{Y^A8(}8x^XE)(H*F)g@3j`1ktkn6-m#
z-dt@^qd?TS(H=KhlNRzV4UH4C<8{s(uM=7*5-oh>EVAz8^>uNBZn4ny%p0`_xFNyy
z&Rat-a7zRi?GJBs0bkqH7Jdx(gL@!8GVA2HW!WPD{NASR<P3kUX;-=vdFxSA?oDOl
zdNjZ;AnRWdzJ%~3!ZQd@Ap9G`zaTt|@D##V5Wb4=9D;~}cs3YHMue<*YQ_^dQNw(y
zITXx+^!M;;KQyENV*bbG7hK`<{g)llNk^2w*iz#vJ3nyQv2@b0H1~GPRZ@A~zt~x9
z+m#*~h$Z5wlqS427i_ykN^lStEQXC`QmWxdWYW5t)*0jP+_IZ&=UvM;c*L27Udqn%
zhnBaKX8!8()=yw$j{2^ToCyu=hkZ@x0vK*x)%2MzUqEg@!T^eVOw|T6Y0Zo~41+iO
z4&S+=L4E~_cjS((P+a6y{<*bl)~`X?T7-1~hAo*+>g*yGgNgVcL^E|>tly1#mk}^1
zFazPIym*~nB1Y_Bn}K6E(F}M>uvhux>q_c{?oUw{2ckIuX2Oq%JZu+=5bn;svF<a9
zJ&*Jm-o5o!4@PwM1BA=`#MXPso7}gpxd>+)B}{xQ%na##>$YeOaxe(bOg^)B|8=9V
z5^UxT{>Zjy<O3{z2yj6*jZ7d#x>@xt{=01(!=g#2nx?TKjLh>C`R&`=V7(4)5BG|w
zjOTK65pzOQhIKGAXm%LHu67`-1Tb7G>=Tv6b=9zHuoeCjB^pWsiN3fV8_a0>W>wST
zkhxq#*>4ak5iaw;ZV!`7ymUv!#M@W{6P%bjj9cn=s8EPZC8qZ=HBMt<w~gow_qFK_
zb4NYHOo!o;iM)s138iR=;RIL)VQweQqyWf!7vaAV-jVp%cXXGcWmpOT4sTfS7*%!1
z95l9x7w=r`-j7m*f68sx`4hk0tj-rEmPP-7BC(79j3slqvGhK|{~-Jk;VK_Vw6=?G
zBwU|2kZHk^72!=J|AO!qf(>~xf*rxd-%PYj;9@X0f>?*`s3>~&BhQPVAOsNR)`Izv
z6y_J323L;#7QpbJ$+e+G0+z@$dI759atQOYLL}!*K5>(guV-6g2_U+eDHc{oOU^U=
zd~<`K6x&^Z5Jo6NC_-q!YQjdsxdK4WI2U|)K=r!&Py^wETtwYvFZ?DHd(0fe!oG#V
zt^9O<7o=0K^{<>5LdGz{L4c^+u<g$z)2x1uJ%XjV#EpFxNnxNYmc{|1&UvE}9z21f
zn<`=-niF7lxFQ&MSP*B0(4KoZ<tOAVeqlhD>w(dgt4=>@gOlQAw%W4{n-qRG#QoYA
z$%-#s)JT03s6hrO_(DTCWzcXZwPae;<7r5h6s+mxKhtY!ZpY%C0EVM4uElk5NU`9}
zI0g?2xAH|pg=<B0v(Ln1l(iu=AT%Nf)8n1ZoQoOOgaqN^Ic=!bw(2fyZ!bVpHg{ex
zGWG!&!P&IydM2MlMVr}WGru}o;f|p?!pWRt>`jYZY}{4+zp}?Y*Y~iO9~o~TYx!5l
zmylIFKfcAi5(kKIHn;51<5thSfY1Nqo;vp@P>%3C_Z>My8u+6}>hP)zS5-K>9sK)8
zeo}y^G>ls8HQ`9>`E&QylDqh8_jZt1c+t@Zd^@lihI)kE&7VEGf?VOhI=aX$0}Io;
z_rA-OV@>>tu~NS4Sir_wb(zQgMXoGlSyVb^Ddkg<68pF{Ykl0tzt>(oVWaYmcX+sC
z$}l2(&!6NWy|=M_Or~)?zVaIOz&OGgfT(QPshWT*rQwDJXS#MU2@$aqhG)o-Qcz7C
zbYR-M1uM}vl<qg25Q`!AO{<0r8;$MP>f!jbL3_BOW`Z1=xgW*T&|YRRnTUzHWA*&>
zePt6jabJf-LPO_1EaiitbLD5t&+nTGMy>=Shx_ub;OnmP%dYbCyQf^WS6sEzlEty?
z=b`XJ`yXHM^rEM$pRRtgd@9s*CDe30nh%E0Rh+FjpM84!RIueruw~NSlJ_acw~GWj
z0o{m)J4b!of9#D_ECJIRb5`r>h+e(2ddyNie(gheOygLYinF%$Ceu`SZPYI8!1{pA
zm)&=g{DOb`zH{UqzVrTDNF)E{@hFk_bCn^k^apa^x&O1I7>&%HN5DWLE*|U=zWGEI
z?F8DerJ-YX7c|HE5i|h98H*u178AK0!<aC;0RBW~U@)cPRWgR>H4X%Cw4jYEapM}W
zQ9gBIk>eT~*FpIIooG|)UypQNj&#0QI~BR@O5`@)a`J~(C%cuCQwwj6;Q-<Yb3<XH
zNa8*f_A@<o5AqPcj}6E8wo{FYEC;5gIq>sCry9tf+%u<|2<hO)1I<g&j|@9pni2;o
zTaPVt0vI;jV`kXL^B4lnvIh&v8ou$tI<komJQ%Sb12xH-`}~6o$izLUf@>%MB9~wY
z6N}C4>FQyh194YR7d$;ZQMm`7s1;!oi+~8Wdj`_VVbQ&Lz=*)I7+V$2E<Cma$sz<1
zEH=R)7!@J%5;o%0ll|iI&AL&79?Nk18(x@UOihDB;m;&lzV-BliC?2sBqqDDBo0(@
zc#0zv4@&cF6vv`C3^6YhfmWnPW{|Byo;ZTV;UZ!&W<u;R!f6D&n2S58$VgcZNVrla
zAmMohCA$#zBZxCjoE&&im_b>@OA-A<h!cy9w!dlh|KwZ^KR^wo)HUo3Na0`7+HASk
z&e$zA&)Z(yIQgl&CX)k`qvO+3J#pO(pXPr)yQMuK6_n(?fqX%6-mT>Q1$l1}ygu(Q
z%m<3|3(E6F3#Lm-2>h6IBee57cw0rde0tHR2`QbH{syxa{se!2Nh@iemi`vrMcyB-
L3)1|%4^{j>VDw!L

delta 4865
zcmb7{32<A*8GzqDNl%t!S(5J~j_t^?t;9!cCvgtvuoLGPNZiCZoNy?L_Q_T(OJ?;H
z2S<sRT%?59fn^AVmQs$CKuZD9VbTsqXCR@S6heRw^FoG(aJ0ablonbj4gLRpj$=ZZ
zX`lT*?f(1kzq|kL{=4s`C*(6n<?5e0opuSHH_rZn?%yRz=P`5X@r)+^cI^|@Lu7$7
zEKx#9NTFGK$j0f^wMW`GgSvq;=@gm`vKBg(=ECo+9*O2qv-opTB1`IQns>Wgl5$e`
z3wdZhD9xd@v_Ng5h42=^TMTcBI+spR=Y}$A=^o3*Ryq-k&!c6)`LrCkfK~t((n-KJ
z>IJsbI$8<V7SYL&TTH8f9kR4kqSbpOT608}q(|X3o~(^as1FpDQqm>)>eCO(nGT}x
zC!D!UPMRd?uw;|M(hemlF-uZPDwMQBy`&5|8FI47Ss-VDoJr&e<Ot-<n9~Ta+fkeg
zA`^%#LP27mC1tAcrCD*4LaqFntP=Rm;Ah0Ijen6<;!vm^b));KgO|CQOEZrU=rz2?
z1GbFvKvL4=+lzAfZdYTD^V>@NgsYQ(dA)1mr5al4B^#1NT}ksj5_M~3{9Lk_U+2!V
zm|3@xv$Lygk}M_7Nrm6zu69{9@3@K{{;GSP)4J12r7np|J0QhNJd^*MR?RoG2Fakb
z&9p%pB);sGx}`rx)rf8h3?-PF{;g*-aW_IMtO?PKn9k?s^iRrQwU}u@)FGw;bXz3W
z)1xwfD9Y;jD>)_R43P3qatgASLt+uU@jGFjZ;~#^)x0owTKc-&UU~L}B~zsr%N!lG
z(u=kBC6%TZXL^^EnT{8kL4Lf<0+QpECQMJYFZG&^&+smtXgV>`vecWtn14(zS^{Q=
zT-U6OMeEulv0z^~+H+mG&&g)<Q$=;;YJRb3k+K+cSO;HP+(`z~hl}&&?0S@7&ySjn
zq15x|CX|rb{LF+2?rtpHfDogs=ho8YWLJ7sDKROcte$t5FQ~==F~c;Z^sL@Mltxs4
zC=iZ-_k0d!*x}EX*O6uX<MJZKk2N-ON5u+qFukE7MH*r#89*3ikcyiDo5~k1WciTL
zm0%>SMm5GD6H8mYBgC74BpXC*L)-|^GlQ{2RP$@GJ~bML!5^z^aDP*UCHOxpD=jI|
zekffr`BoW5`&jjdmSGS)nf_7DF|uL@Bz>a}*$Zj54+S#AIt=xER^bv|8DO#g0Zq3D
z;U}VM;aHT7RbV%x`hNbvlqw5{<nQ>)Q=(Sg6pn_19)75{lz8}Kwce{dqkC7OQXlMW
zr~`X8=^5R_GBu4!Wsr*9n>3A+nm}r7U=N>AH@V=Fy(?<+?z#q>hg!u}hxY68R$bEc
zbV-LuG(YL48GRxEv9_cUJ3Q1T^slh>UA^EN>*~5LwPB<7v7XzYj}8$yUP(G>=4dMo
z5N`gb(V^XxhdPCYyOUDSWlu?9IZO1BP}f*{1f=f&r{_Y~Jgzrpu;3BWQBM?p$24V+
zcFu5Y#<)RefQcMo;=j5gX%Al>h8=Wsg|1`VFdQJw6Vl9aiz5@H`9ca4mxR|>z~66Z
z_S^w$N4NI}wv8_THN1J+YBIw2O<R{*i0i5i6)d4hETH+a*}W+GG2$-74-oeu?nL|q
zaW~?A#19b<ARa_Kgb-0-2Bh@N-b7CiL>$=kK~;BXu^v_Hg$Ni3>#o=UYztR35b+NL
zSfD>1&xRU&cx?fvjrpEOAoD7`@t2K=Si=uAK4)_uT724Cc*<JHyPCYVoI~`qwfK~^
zIGt+xBk9m{f{C~m>-R?@{eIYhTf<akx@%Mz7zq0}2jVKb4LfFwCi?xsKqL~^ouL>D
zs(vlpuf`Hudgt`xqyWcqRMyQb7EyI;Fc#G?#N5EInYo_K;^~=-9FKwq`x)XfPFiM>
zdOoM6X)T64h^@i_mo>0O_H&^N(CwNU*Nn#5F_dmWM6k%o)c8Ow8aKMbmSMru{5LJN
zmKPw|nEtfIWg{oJ+%~^`9+u5VECA@{a5Sv3Bbf9A0|OdFSiib0sG^gjY#r*IMEoA{
zI^s3Ht4%8qJ+`u|L851%LpBG3ee48xwih^r?rW&Kj5oCB+1Fwcu{gc5eUnQ$jrkG&
z)T$N-Mn-lVagu+&YBza{53O#<#nJhQ7$ihVBLMPeR{OkIpl6N^Hnw@oWhbx#G;@aA
zx_njtz~mQzqZY%d|3bl-zYNv0yt}KTSeVREqtu^(NoL~|`O{s^Fh3u46^HOJWO%mu
zDwqtH6Wufr8!)VRuxK@6HbA$jutHQ8&{W+NhtuU9EWtBIcL#d|ntvb`*H)<UcmNLc
z^H_EcQHD6l`_>evo`<AfV4MTSzQZXGCL#eX#@PE<iuVD(x)qn!dss0Cg)YC=%hUk%
zi&eKxV;^9V*j$S-)q!Y7;M6g^Ik5ErU%nU^i=tqQR0_l;HG0Tm>LbL*h(Ga)wabdo
zW2_GVha*h9pQ;)h&2gCQU27{L20<xeF#Y)2H?kC?Ha|cceSg6sv6?=^l(7sj^$Fqv
z;!}jq-=j^lL^EGv{tV(P#J>@5AwEZ(MO;LDjgVkwn2C3)(^5D|%#0B8vkHqv%NZzB
z5H^GzF*Zlcidiu&Vlm;2vUdTx16>|Z1cNYjhSM`q6{kbEpE)r*=JM1PPQHAd5mVrE
zk8>}2a@^7!=WSse?Peo#5qXF#L@kyIAK`9~y8-2658n~WwD+I};(VHi)+sqpL2@O#
z0SKGwmsr@zzltm&<$O{9oRo%w1Y#>{*md)kSUAe2jEM&^HFhy#Lzoo~+K#Ctz&Cn*
z6}=KWdnXoMQ4!mS;|=ywZ^eDg!5la$#Kg2UnnlQ2o*&aJl_021kHj7{!@)CMYjxm_
zn2G(U@$K6EWX1y%6C}F^)Zm6C<U*sKPup4m-#5#)dQ)R-UAOiI;sFh|rI=oyXc6so
zA~F$9L>59sIK}WYnrC&0DnvCxxLpBFV;o`>MhO--!SNICG2yK|{o&Rc^W1CEW&q%`
z80$|(K?tCGB4JI%!w0DLs~WqJ^`fHLXuO`U*j{SC9@P;8X>I#EvLcr641Vp-{f^5k
zS>d1TtcCmMgj5Zg!8=l&_UYJF#J2RksryWhd1z)nVgX;iyV~A~<%oOv(C$N|iaYjH
zo8bna@YX$VWIqgw&|k6E#71i4-o2B_dcJ(`Jn}fddGD>x6=*;#D#E&X`M#OtSA6}x
z3Vu(mjVJe?Yag;Cv{2owp^QnIx=oujXVQ>7X~&<xhKG%yb(5}8H3*+ax;Ye%Qn93b
z3hM^5x+U0~i1zWjzJCjOhI?;0OiuG(-!h$)@l6M&$XhJwSqFBLJhZ}|MBoBvGY}8+
zQ@55kE`+3RjzY6+J@)c;EK<>MhTo53zh7Ko7_L?Db~mp$m|M096>Qj9KMiOB@!5b=
zY)o(;Uvsd+`V!9WR}xPiY&K7Qy=o?Z_TbAVD{JH<!xNh}U|ZK9ur1byKnJlxL_T5-
z$`HRmL*L`?4%hMWky^4Qy<()EkQSai)X<7kr7Ljr2@X=$iZ$i}bTeGHs1aW<@QcOd
zQ*;N!{ZP0EE=6z-%;O&%s)mmv*KJkGAXIu=PS3rqkfd(H3ecnh*f2sMJSZl^vJxMk
z39*b-fg;RK3@$NSMcYOg6AQNzjfll0)<r93a}i=OF2ddhG5b6~&(Oks_ytuSWo=jj
z+XA99mSn>4{Av{DA8vbWJ2&y8skgE89AX_pgza}QbsmA?*oeUIV@ibFt1-16A>zK+
z+~t@RfnJ0#5d#>?Qix#$M;t*s0>BR`jlCej9ut=pafpfdX@n+>M8^IWTTyH*v3ta1
zqL;6j<u5Z@;j=#xQRlN=PzwK2{Gus6e1{^JJv{mO?3c7t{@?|vhS;t|H-GxhPLj>%
zA8z)#^WMxUd^5Y?!sP1-DY_tiiwjMB`0!^nQ^~9g(zo$fl>Pm3!6yFbU8Vm4hW?I{

diff --git a/litellm/llms/__init__.py b/litellm/llms/__init__.py
new file mode 100644
index 000000000..b9742821a
--- /dev/null
+++ b/litellm/llms/__init__.py
@@ -0,0 +1 @@
+from . import *
\ No newline at end of file
diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py
new file mode 100644
index 000000000..0e59c6de9
--- /dev/null
+++ b/litellm/llms/anthropic.py
@@ -0,0 +1,97 @@
+import os, json, sseclient
+from enum import Enum
+import requests
+from litellm import logging
+import time 
+from typing import Callable
+class AnthropicConstants(Enum):
+    HUMAN_PROMPT = "\n\nHuman:"
+    AI_PROMPT = "\n\nAssistant:"
+
+class AnthropicLLM: 
+    
+    def __init__(self, default_max_tokens_to_sample, api_key=None):
+        self.default_max_tokens_to_sample = default_max_tokens_to_sample
+        self.completion_url = "https://api.anthropic.com/v1/complete"
+        self.validate_environment(api_key=api_key)
+    
+    def validate_environment(self, api_key): # set up the environment required to run the model 
+        # set the api key 
+        try:
+            self.api_key = os.getenv("ANTHROPIC_API_KEY") if "ANTHROPIC_API_KEY" in os.environ else api_key
+            if self.api_key == None:
+                raise Exception
+            
+            self.headers = {
+                "accept": "application/json",
+                "anthropic-version": "2023-06-01",
+                "content-type": "application/json",
+                "x-api-key": self.api_key 
+            }
+
+        except:
+            raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
+        pass  
+
+    def _stream(self): # logic for handling streaming with the LLM API 
+        pass
+
+    def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
+        model = model
+        prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
+        for message in messages:
+            if "role" in message:
+                if message["role"] == "user":
+                    prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+                else:
+                    prompt += f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
+            else:
+                prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+        prompt += f"{AnthropicConstants.AI_PROMPT.value}"
+        if "max_tokens" in optional_params and optional_params["max_tokens"] != float('inf'): 
+            max_tokens = optional_params["max_tokens"]
+        else:
+            max_tokens = self.default_max_tokens_to_sample
+        data = {
+            "model": model,
+            "prompt": prompt,
+            "max_tokens_to_sample": max_tokens,
+            **optional_params
+        }
+
+        ## LOGGING
+        logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params}, logger_fn=logger_fn)
+        ## COMPLETION CALL
+        response = requests.post(self.completion_url, headers=self.headers, data=json.dumps(data))
+        if "stream" in optional_params and optional_params["stream"] == True:
+            return response.iter_lines()
+        else:
+            ## LOGGING
+            logging(model=model, input=prompt, additional_args={"litellm_params": litellm_params, "optional_params": optional_params, "original_response": response.text}, logger_fn=logger_fn)
+            print_verbose(f"raw model_response: {response.text}")
+            ## RESPONSE OBJECT
+            completion_response = response.json()
+            print(f"completion_response: {completion_response}")
+            if "error" in completion_response:
+                raise Exception(completion_response["error"])
+            else:
+                model_response["choices"][0]["message"]["content"] = completion_response["completion"]    
+            ## CALCULATING USAGE
+            prompt_tokens = 0
+            completion_tokens = 0
+            
+            
+            model_response["created"] = time.time()
+            model_response["model"] = model
+            model_response["usage"] = {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": prompt_tokens + completion_tokens
+                }
+            return model_response
+    
+    def embedding(): # logic for parsing in - calling - parsing out model embedding calls
+        pass 
+    
+    def stream(): # logic for how to parse in-out model completion streams
+        pass
\ No newline at end of file
diff --git a/litellm/main.py b/litellm/main.py
index 4c3d75bf5..c1103e40b 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -4,11 +4,12 @@ from functools import partial
 import dotenv, traceback, random, asyncio, time
 from copy import deepcopy
 import litellm
-from litellm import client, logging, exception_type, timeout, get_optional_params
+from litellm import client, logging, exception_type, timeout, get_optional_params, get_litellm_params
+from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, read_config_args
+from .llms.anthropic import AnthropicLLM
 import tiktoken
 from concurrent.futures import ThreadPoolExecutor
 encoding = tiktoken.get_encoding("cl100k_base")
-from litellm.utils import get_secret, install_and_import, CustomStreamWrapper, read_config_args
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
 new_response = {
@@ -38,14 +39,13 @@ async def acompletion(*args, **kwargs):
 # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
 @timeout(600) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
 def completion(
-    messages, model="gpt-3.5-turbo",# required params
+    model, messages,# required params
     # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
     functions=[], function_call="", # optional params
     temperature=1, top_p=1, n=1, stream=False, stop=None, max_tokens=float('inf'),
     presence_penalty=0, frequency_penalty=0, logit_bias={}, user="", deployment_id=None,
     # Optional liteLLM function params
-    *, return_async=False, api_key=None, force_timeout=600, azure=False, logger_fn=None, verbose=False,
-    hugging_face = False, replicate=False,together_ai = False, custom_llm_provider=None, custom_api_base=None
+    *, return_async=False, api_key=None, force_timeout=600, logger_fn=None, verbose=False, custom_llm_provider=None, custom_api_base=None
   ):
   try:
     global new_response
@@ -57,9 +57,15 @@ def completion(
       temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens,
       presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, deployment_id=deployment_id,
       # params to identify the model
-      model=model, replicate=replicate, hugging_face=hugging_face, together_ai=together_ai
+      model=model, custom_llm_provider=custom_llm_provider
     )
-    if azure == True or custom_llm_provider == "azure": # [TODO]: remove azure=True flag, move to 'custom_llm_provider' approach
+    # For logging - save the values of the litellm-specific params passed in
+    litellm_params = get_litellm_params(
+      return_async=return_async, api_key=api_key, force_timeout=force_timeout, 
+      logger_fn=logger_fn, verbose=verbose, custom_llm_provider=custom_llm_provider, 
+      custom_api_base=custom_api_base)
+    
+    if custom_llm_provider == "azure":
       # azure configs
       openai.api_type = "azure"
       openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE")
@@ -72,7 +78,7 @@ def completion(
       else:
           openai.api_key = get_secret("AZURE_API_KEY")
       ## LOGGING
-      logging(model=model, input=messages, additional_args=optional_params, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=messages, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
       ## COMPLETION CALL
       if litellm.headers:
          response = openai.ChatCompletion.create(
@@ -102,7 +108,7 @@ def completion(
       else:
           openai.api_key = get_secret("OPENAI_API_KEY")
       ## LOGGING
-      logging(model=model, input=messages, additional_args=args, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=messages, additional_args=args, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
       ## COMPLETION CALL
       if litellm.headers:
          response = openai.ChatCompletion.create(
@@ -131,7 +137,7 @@ def completion(
         openai.organization = litellm.organization
       prompt = " ".join([message["content"] for message in messages])
       ## LOGGING
-      logging(model=model, input=prompt, additional_args=optional_params, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=prompt, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
       ## COMPLETION CALL
       if litellm.headers:
         response = openai.Completion.create(
@@ -146,14 +152,14 @@ def completion(
         )
       completion_response = response["choices"]["text"]
       ## LOGGING
-      logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
+      logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
       ## RESPONSE OBJECT
       model_response["choices"][0]["message"]["content"] = completion_response
       model_response["created"] = response["created"]
       model_response["model"] = model
       model_response["usage"] = response["usage"]
       response = model_response
-    elif "replicate" in model or replicate == True or custom_llm_provider == "replicate":
+    elif "replicate" in model or custom_llm_provider == "replicate":
       # import replicate/if it fails then pip install replicate
       install_and_import("replicate")
       import replicate
@@ -168,11 +174,11 @@ def completion(
          os.environ["REPLICATE_API_TOKEN"] = litellm.replicate_key
       prompt = " ".join([message["content"] for message in messages])
       input = {"prompt": prompt}
-      if max_tokens != float('inf'):
+      if "max_tokens" in optional_params:
         input["max_length"] = max_tokens # for t5 models 
         input["max_new_tokens"] = max_tokens # for llama2 models 
       ## LOGGING
-      logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
+      logging(model=model, input=input, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
       ## COMPLETION CALL
       output = replicate.run(
         model,
@@ -187,7 +193,7 @@ def completion(
         response += item
       completion_response = response
       ## LOGGING
-      logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
+      logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
       prompt_tokens = len(encoding.encode(prompt))
       completion_tokens = len(encoding.encode(completion_response))
       ## RESPONSE OBJECT
@@ -201,59 +207,13 @@ def completion(
         }
       response = model_response
     elif model in litellm.anthropic_models:
-      # import anthropic/if it fails then pip install anthropic
-      install_and_import("anthropic")
-      from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
-
-      #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY")
-      if api_key:
-         os.environ["ANTHROPIC_API_KEY"] = api_key
-      elif litellm.anthropic_key:
-         os.environ["ANTHROPIC_API_KEY"] = litellm.anthropic_key
-      prompt = f"{HUMAN_PROMPT}" 
-      for message in messages:
-        if "role" in message:
-          if message["role"] == "user":
-            prompt += f"{HUMAN_PROMPT}{message['content']}"
-          else:
-            prompt += f"{AI_PROMPT}{message['content']}"
-        else:
-          prompt += f"{HUMAN_PROMPT}{message['content']}"
-      prompt += f"{AI_PROMPT}"
-      anthropic = Anthropic()
-      if max_tokens != float('inf'):
-        max_tokens_to_sample = max_tokens
-      else:
-        max_tokens_to_sample = litellm.max_tokens # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
-      ## LOGGING
-      logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
-      ## COMPLETION CALL
-      completion = anthropic.completions.create(
-            model=model,
-            prompt=prompt,
-            max_tokens_to_sample=max_tokens_to_sample,
-            **optional_params
-        )
+      anthropic_key = api_key if api_key is not None else litellm.anthropic_key
+      anthropic_client = AnthropicLLM(default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
+      model_response = anthropic_client.completion(model=model, messages=messages, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn)
       if 'stream' in optional_params and optional_params['stream'] == True:
         # don't try to access stream object,
-        response = CustomStreamWrapper(completion, model)
+        response = CustomStreamWrapper(model_response, model)
         return response
-
-      completion_response = completion.completion
-      ## LOGGING
-      logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
-      prompt_tokens = anthropic.count_tokens(prompt)
-      completion_tokens = anthropic.count_tokens(completion_response)
-      ## RESPONSE OBJECT
-      print_verbose(f"raw model_response: {model_response}")
-      model_response["choices"][0]["message"]["content"] = completion_response
-      model_response["created"] = time.time()
-      model_response["model"] = model
-      model_response["usage"] = {
-          "prompt_tokens": prompt_tokens,
-          "completion_tokens": completion_tokens,
-          "total_tokens": prompt_tokens + completion_tokens
-        }
       response = model_response
 
     elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
@@ -270,7 +230,7 @@ def completion(
       else:
           openai.api_key = get_secret("OPENROUTER_API_KEY")
       ## LOGGING
-      logging(model=model, input=messages, additional_args=optional_params, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=messages, additional_args=optional_params, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
       ## COMPLETION CALL
       if litellm.headers:
          response = openai.ChatCompletion.create(
@@ -311,7 +271,7 @@ def completion(
       co = cohere.Client(cohere_key)
       prompt = " ".join([message["content"] for message in messages])
       ## LOGGING
-      logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
       ## COMPLETION CALL
       response = co.generate(  
         model=model,
@@ -364,7 +324,7 @@ def completion(
           "total_tokens": prompt_tokens + completion_tokens
         }
       response = model_response
-    elif together_ai == True or custom_llm_provider == "together_ai":
+    elif custom_llm_provider == "together_ai":
       import requests
       TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN")
       headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"}
@@ -410,7 +370,7 @@ def completion(
 
       prompt = " ".join([message["content"] for message in messages])
       ## LOGGING
-      logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
 
       chat_model = ChatModel.from_pretrained(model)
 
@@ -419,7 +379,7 @@ def completion(
       completion_response = chat.send_message(prompt, **optional_params)
 
       ## LOGGING
-      logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
+      logging(model=model, input=prompt, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens, "original_response": completion_response}, logger_fn=logger_fn)
 
       ## RESPONSE OBJECT
       model_response["choices"][0]["message"]["content"] = completion_response
@@ -428,13 +388,13 @@ def completion(
       response = model_response
     else: 
       ## LOGGING
-      logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
+      logging(model=model, input=messages, custom_llm_provider=custom_llm_provider, logger_fn=logger_fn)
       args = locals()
       raise ValueError(f"Invalid completion model args passed in. Check your input - {args}")
     return response
   except Exception as e:
     ## LOGGING
-    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
+    logging(model=model, input=messages, custom_llm_provider=custom_llm_provider, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
     ## Map to OpenAI Exception
     raise exception_type(model=model, original_exception=e)
 
diff --git a/litellm/tests/test_logging.py b/litellm/tests/test_logging.py
index d7b0d828d..3174083ef 100644
--- a/litellm/tests/test_logging.py
+++ b/litellm/tests/test_logging.py
@@ -28,31 +28,32 @@ except:
 # test on non-openai completion call 
 try:
     response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
+    print(f"claude response: {response}")
     score +=1 
 except:
     print(f"error occurred: {traceback.format_exc()}") 
     pass
 
-# test on openai embedding call 
-try: 
-    response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
-    score +=1 
-except:
-    traceback.print_exc()
+# # test on openai embedding call 
+# try: 
+#     response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
+#     score +=1 
+# except:
+#     traceback.print_exc()
 
-# test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
-try: 
-    response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
-except:
-    score +=1 # expect this to fail
-    traceback.print_exc()
+# # test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
+# try: 
+#     response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
+# except:
+#     score +=1 # expect this to fail
+#     traceback.print_exc()
 
-# test on good azure openai embedding call 
-try: 
-    response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
-    score +=1 
-except:
-    traceback.print_exc()
+# # test on good azure openai embedding call 
+# try: 
+#     response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
+#     score +=1 
+# except:
+#     traceback.print_exc()
 
 
-print(f"Score: {score}, Overall score: {score/5}")
\ No newline at end of file
+# print(f"Score: {score}, Overall score: {score/5}")
\ No newline at end of file
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
new file mode 100644
index 000000000..b7332772f
--- /dev/null
+++ b/litellm/tests/test_streaming.py
@@ -0,0 +1,28 @@
+#### What this tests ####
+#    This tests streaming for the completion endpoint
+
+import sys, os
+import traceback
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import litellm
+from litellm import completion
+
+litellm.set_verbose = False
+
+score = 0
+
+def logger_fn(model_call_object: dict):
+    print(f"model call details: {model_call_object}")
+
+user_message = "Hello, how are you?"
+messages = [{ "content": user_message,"role": "user"}]
+
+# test on anthropic completion call 
+try:
+    response = completion(model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn)
+    for chunk in response:
+        print(chunk['choices'][0]['delta'])
+    score +=1 
+except:
+    print(f"error occurred: {traceback.format_exc()}") 
+    pass
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 65cd96a8e..fc7600cb7 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -64,13 +64,15 @@ def install_and_import(package: str):
 
 ####### LOGGING ###################
 #Logging function -> log the exact model details + what's being sent | Non-Blocking
-def logging(model=None, input=None, azure=False, additional_args={}, logger_fn=None, exception=None):
+def logging(model=None, input=None, custom_llm_provider=None, azure=False, additional_args={}, logger_fn=None, exception=None):
   try:
     model_call_details = {}
     if model:
       model_call_details["model"] = model
     if azure:
       model_call_details["azure"] = azure
+    if custom_llm_provider:
+       model_call_details["custom_llm_provider"] = custom_llm_provider
     if exception:
       model_call_details["exception"] = exception
 
@@ -206,6 +208,32 @@ def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""):
    return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
 
 ####### HELPER FUNCTIONS ################
+def get_litellm_params(
+    return_async=False,
+    api_key=None, 
+    force_timeout=600, 
+    azure=False, 
+    logger_fn=None, 
+    verbose=False,
+    hugging_face=False, 
+    replicate=False,
+    together_ai=False, 
+    custom_llm_provider=None, 
+    custom_api_base=None
+): 
+    litellm_params = {
+        "return_async": return_async,
+        "api_key": api_key,
+        "force_timeout": force_timeout,
+        "logger_fn": logger_fn,
+        "verbose": verbose,
+        "custom_llm_provider": custom_llm_provider,
+        "custom_api_base": custom_api_base
+    }
+    
+    return litellm_params
+
+
 def get_optional_params(
     # 12 optional params
     functions = [],
@@ -222,9 +250,7 @@ def get_optional_params(
     user = "",
     deployment_id = None,
     model = None,
-    replicate = False,
-    hugging_face = False,
-    together_ai = False,
+    custom_llm_provider = ""
 ):
   optional_params = {}
   if model in litellm.anthropic_models:
@@ -247,13 +273,13 @@ def get_optional_params(
     if max_tokens != float('inf'):
         optional_params["max_tokens"] = max_tokens
     return optional_params
-  elif replicate == True:
+  elif custom_llm_provider == "replicate":
     # any replicate models
     # TODO: handle translating remaining replicate params
     if stream:
       optional_params["stream"] = stream
       return optional_params
-  elif together_ai == True:
+  elif custom_llm_provider == "together_ai":
       if stream:
         optional_params["stream_tokens"] = stream
       if temperature != 1:
@@ -698,6 +724,13 @@ class CustomStreamWrapper:
     def __iter__(self):
         return self
 
+    def handle_anthropic_chunk(self, chunk):
+      str_line = chunk.decode('utf-8')  # Convert bytes to string
+      if str_line.startswith('data:'):
+          data_json = json.loads(str_line[5:])
+          return data_json.get("completion", "")
+      return ""
+
     def handle_together_ai_chunk(self, chunk): 
       chunk = chunk.decode("utf-8")
       text_index = chunk.find('"text":"') # this checks if text: exists
@@ -713,7 +746,7 @@ class CustomStreamWrapper:
         completion_obj ={ "role": "assistant", "content": ""}
         if self.model in litellm.anthropic_models:
           chunk = next(self.completion_stream)
-          completion_obj["content"] = chunk.completion
+          completion_obj["content"] = self.handle_anthropic_chunk(chunk)
         elif self.model == "replicate":
            chunk = next(self.completion_stream)
            completion_obj["content"] = chunk
diff --git a/old-docs/advanced.md b/old-docs/advanced.md
deleted file mode 100644
index aa3b22389..000000000
--- a/old-docs/advanced.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Advanced - Callbacks
-
-## Use Callbacks to send Output Data to Posthog, Sentry etc
-liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. 
-
-liteLLM supports: 
-
-- [Helicone](https://docs.helicone.ai/introduction)
-- [Sentry](https://docs.sentry.io/platforms/python/) 
-- [PostHog](https://posthog.com/docs/libraries/python)
-- [Slack](https://slack.dev/bolt-python/concepts)
-
-### Quick Start
-```python
-from litellm import completion
-
-# set callbacks
-litellm.success_callback=["posthog", "helicone"]
-litellm.failure_callback=["sentry"]
-
-## set env variables
-os.environ['SENTRY_API_URL'], os.environ['SENTRY_API_TRACE_RATE']= ""
-os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
-os.environ["HELICONE_API_KEY"] = "" 
-
-response = completion(model="gpt-3.5-turbo", messages=messages) 
-```
-
-
diff --git a/old-docs/berrispend_integration.md b/old-docs/berrispend_integration.md
deleted file mode 100644
index cecd90eba..000000000
--- a/old-docs/berrispend_integration.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# BerriSpend Tutorial 
-BerriSpend is a free dashboard to monitor your cost and logs across llm providers. 
-
-## Use BerriSpend to see total spend across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
-liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. 
-
-In this case, we want to log requests to BerriSpend when a request succeeds. 
-
-### Use Callbacks 
-Use just 2 lines of code, to instantly see costs and log your responses **across all providers** with BerriSpend: 
-
-```
-litellm.success_callback=["berrispend"]
-litellm.failure_callback=["berrispend"]
-```
-
-Complete code
-```python
-from litellm import completion
-
-## set env variables
-os.environ["BERRISPEND_ACCOUNT_ID"] = "your-email-id" 
-os.environ["OPENAI_API_KEY"] = ""
-
-# set callbacks
-litellm.success_callback=["berrispend"]
-litellm.failure_callback=["berrispend"]
-
-#openai call
-response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) 
-
-#bad call
-response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad call to test error logging"}]) 
-```
\ No newline at end of file
diff --git a/old-docs/client_integrations.md b/old-docs/client_integrations.md
deleted file mode 100644
index a7ebc6969..000000000
--- a/old-docs/client_integrations.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Data Logging Integrations
-
-| Integration     | Required OS Variables                      | How to Use with callbacks             |
-|-----------------|--------------------------------------------|-------------------------------------------|
-| Sentry          | `SENTRY_API_URL`                          | `litellm.success_callback=["sentry"], litellm.failure_callback=["sentry"]`  |
-| Posthog         | `POSTHOG_API_KEY`,<br>`POSTHOG_API_URL`   | `litellm.success_callback=["posthog"], litellm.failure_callback=["posthog"]` |
-| Slack           | `SLACK_API_TOKEN`,<br>`SLACK_API_SECRET`,<br>`SLACK_API_CHANNEL` | `litellm.success_callback=["slack"], litellm.failure_callback=["slack"]`      |
-| Helicone           | `HELICONE_API_TOKEN` | `litellm.success_callback=["helicone"]`      |
-
-
-
-
diff --git a/old-docs/contact.md b/old-docs/contact.md
deleted file mode 100644
index d5309cd73..000000000
--- a/old-docs/contact.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Contact Us
-
-[![](https://dcbadge.vercel.app/api/server/wuPM9dRgDw)](https://discord.gg/wuPM9dRgDw)
-
-* [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
-* Contact us at ishaan@berri.ai / krrish@berri.ai
diff --git a/old-docs/contributing.md b/old-docs/contributing.md
deleted file mode 100644
index 1c831e204..000000000
--- a/old-docs/contributing.md
+++ /dev/null
@@ -1,34 +0,0 @@
-## Contributing to Documentation
-Clone litellm 
-```
-git clone https://github.com/BerriAI/litellm.git
-```
-
-### Local setup for locally running docs
-
-#### Installation
-```
-pip install mkdocs
-```
-
-#### Locally Serving Docs
-```
-mkdocs serve
-```
-If you see `command not found: mkdocs` try running the following
-```
-python3 -m mkdocs serve
-```
-
-This command builds your Markdown files into HTML and starts a development server to browse your documentation. Open up [http://127.0.0.1:8000/](http://127.0.0.1:8000/) in your web browser to see your documentation. You can make changes to your Markdown files and your docs will automatically rebuild.
-
-[Full tutorial here](https://docs.readthedocs.io/en/stable/intro/getting-started-with-mkdocs.html)
-
-### Making changes to Docs
-- All the docs are placed under the `docs` directory
-- If you are adding a new `.md` file or editing the hierarchy edit `mkdocs.yml` in the root of the project
-- After testing your changes, make a change to the `main` branch of [github.com/BerriAI/litellm](https://github.com/BerriAI/litellm)
-
-
-
-
diff --git a/old-docs/helicone_integration.md b/old-docs/helicone_integration.md
deleted file mode 100644
index 273d22d4f..000000000
--- a/old-docs/helicone_integration.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Helicone Tutorial 
-[Helicone](https://helicone.ai/) is an open source observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage.
-
-## Use Helicone to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
-liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. 
-
-In this case, we want to log requests to Helicone when a request succeeds. 
-
-### Approach 1: Use Callbacks 
-Use just 1 line of code, to instantly log your responses **across all providers** with helicone: 
-```
-litellm.success_callback=["helicone"]
-```
-
-Complete code
-```python
-from litellm import completion
-
-## set env variables
-os.environ["HELICONE_API_KEY"] = "your-helicone-key" 
-os.environ["OPENAI_API_KEY"], os.environ["COHERE_API_KEY"] = "", ""
-
-# set callbacks
-litellm.success_callback=["helicone"]
-
-#openai call
-response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) 
-
-#cohere call
-response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]) 
-```
-
-### Approach 2: [OpenAI + Azure only] Use Helicone as a proxy
-Helicone provides advanced functionality like caching, etc. Helicone currently supports this for Azure and OpenAI.
-
-If you want to use Helicone to proxy your OpenAI/Azure requests, then you can - 
-
-- Set helicone as your base url via: `litellm.api_url` 
-- Pass in helicone request headers via: `litellm.headers` 
-
-Complete Code
-```
-import litellm
-from litellm import completion
-
-litellm.api_base = "https://oai.hconeai.com/v1"
-litellm.headers = {"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}"}
-
-response = litellm.completion(
-    model="gpt-3.5-turbo",
-    messages=[{"role": "user", "content": "how does a court case get to the Supreme Court?"}]
-)
-
-print(response)
-```
diff --git a/old-docs/index.md b/old-docs/index.md
deleted file mode 100644
index 9e3f1dde4..000000000
--- a/old-docs/index.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# *🚅 litellm*
-a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic APIs 
-
-###### litellm manages:
-* Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
-* Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
-* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
-
-###### observability:
-* Logging - see exactly what the raw model request/response is by plugging in your own function `completion(.., logger_fn=your_logging_fn)` and/or print statements from the package `litellm.set_verbose=True`
-* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
-
-## Quick Start
-Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
-### Installation
-```
-pip install litellm
-```
-
-### Usage
-```python
-from litellm import completion
-
-## set ENV variables
-os.environ["OPENAI_API_KEY"] = "openai key"
-os.environ["COHERE_API_KEY"] = "cohere key"
-
-messages = [{ "content": "Hello, how are you?","role": "user"}]
-
-# openai call
-response = completion(model="gpt-3.5-turbo", messages=messages)
-
-# cohere call
-response = completion("command-nightly", messages)
-```
-Need Help / Support : [see troubleshooting](https://litellm.readthedocs.io/en/latest/troubleshoot)
-
-## Why did we build liteLLM 
-- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere
-
-## Support
-* [Meet with us 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
-* Contact us at ishaan@berri.ai / krrish@berri.ai
diff --git a/old-docs/input.md b/old-docs/input.md
deleted file mode 100644
index 5d3ba77a5..000000000
--- a/old-docs/input.md
+++ /dev/null
@@ -1,172 +0,0 @@
-# Completion Function - completion()
-The Input params are **exactly the same** as the 
-<a href="https://platform.openai.com/docs/api-reference/chat/create" target="_blank" rel="noopener noreferrer">OpenAI Create chat completion</a>, and let you call **Azure OpenAI, Anthropic, Cohere, Replicate, OpenRouter** models in the same format. 
-
-In addition, liteLLM allows you to pass in the following **Optional** liteLLM args:<br>
-`forceTimeout`, `azure`, `logger_fn`, `verbose`
- <!-- TODO: Add info about the following params -->
-
-## Input - Request Body
-
-**`model`**
-<span style="color:gray; font-size: 0.8em;">string</span>  <span style="color:red; font-size: 0.8em;">Required</span><br>
-ID of the model to use. See the <a href="https://litellm.readthedocs.io/en/latest/supported" target="_blank" rel="noopener noreferrer">model endpoint compatibility</a> table for details on which models work with the Chat API.
-
----
-
-**`messages`**
-<span style="color:gray; font-size: 0.8em;">array</span>  <span style="color:red; font-size: 0.8em;">Required</span><br>
-<a></a>
-A list of messages comprising the conversation so far. <a href="https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb" target="_blank" rel="noopener noreferrer">Example Python Code</a>
-
-```python
-from litellm import completion
-
-messages=
-    [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Knock knock."},
-        {"role": "assistant", "content": "Who's there?"},
-        {"role": "user", "content": "Orange."},
-    ]   
-
-# openai call
-response = completion(model="gpt-3.5-turbo", messages=messages, temperature=0)
-
-# cohere call
-response = completion(model="command-nightly", messages=messages, temperature=0)
-```
-
-
----
->> **`role`**
->> <span style="color:gray; font-size: 0.8em;">string</span>  <span style="color:red; font-size: 0.8em;">Required</span><br>
->> The role of the messages author. One of system, user, assistant, or function.
->> <br>
->> 
->> ---
-
->> **`content`**
->> <span style="color:gray; font-size: 0.8em;">string</span>  <span style="color:red; font-size: 0.8em;">Required</span><br>
->> The contents of the message. content is required for all messages, and may be null for assistant messages with function calls.
->> <br>
->> 
->> ---
-
->> **`name`**
->> <span style="color:gray; font-size: 0.8em;">string</span>  <span style="color:gray; font-size: 0.8em;">Optional</span><br>
->> The name of the author of this message. name is required if role is function, and it should be the name of the function whose response is in the content. May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters.
->> <br>
->> 
->> ---
-
->> **`function_call`**
->> <span style="color:gray; font-size: 0.8em;">object</span>  <span style="color:gray; font-size: 0.8em;">Optional</span><br>
->> The name and arguments of a function that should be called, as generated by the model.
->> <br>
->> 
->> ---
-
-**`functions`**
-<span style="color:gray; font-size: 0.8em;">array</span>  <span style="color:gray; font-size: 0.8em;">Optional</span><br>
-A list of functions the model may generate JSON inputs for.
-<br>
-
----
->> **`name`**
->> <span style="color:gray; font-size: 0.8em;">string</span>  <span style="color:red; font-size: 0.8em;">Required</span><br>
->> The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
->> <br>
->> 
->> ---
-
->> **`description`**
->> <span style="color:gray; font-size: 0.8em;">string</span>  <span style="color:gray; font-size: 0.8em;">Optional</span><br>
->> A description of what the function does, used by the model to choose when and how to call the function.
->> <br>
->> 
->> ---
-
->> **`parameters`**
->> <span style="color:gray; font-size: 0.8em;">object</span>  <span style="color:red; font-size: 0.8em;">Required</span><br>
->> The parameters the functions accept, described as a JSON Schema object. 
->> To describe a function that accepts no parameters, provide the value `{"type": "object", "properties": {}}`.
->> <br>
->> 
->> ---
-
-
-**`function_call`**
-<span style="color:gray; font-size: 0.8em;">string or object</span>  <span style="color:gray; font-size: 0.8em;">Optional</span><br>
-Controls how the model responds to function calls. "none" means the model does not call a function, and responds to the end-user. "auto" means the model can pick between an end-user or calling a function. Specifying a particular function via `{"name": "my_function"}` forces the model to call that function. "none" is the default when no functions are present. "auto" is the default if functions are present.
-<br>
-
----
-
-**`temperature`**
-<span style="color:gray; font-size: 0.8em;">number</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to 1</span><br>
-What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
-<br>
-
----
-
-**`top_p`**
-<span style="color:gray; font-size: 0.8em;">number</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to 1</span><br>
-An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or 1temperature` but not both.
-<br>
-
----
-
-**`n`**
-<span style="color:gray; font-size: 0.8em;">integer</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to 1</span><br>
-How many chat completion choices to generate for each input message.
-<br>
-
----
-
-**`stream`**
-<span style="color:gray; font-size: 0.8em;">boolean</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to false</span><br>
-If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.
-<br>
-
----
-
-**`stop`**
-<span style="color:gray; font-size: 0.8em;">string or array</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to null</span><br>
-Up to 4 sequences where the API will stop generating further tokens.
-<br>
-
----
-
-**`max_tokens`**
-<span style="color:gray; font-size: 0.8em;">integer</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to inf</span><br>
-The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length
-<br>
-
----
-
-**`presence_penalty`**
-<span style="color:gray; font-size: 0.8em;">number</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to 0</span><br>
-Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. 
-<br>
-
----
-
-**`frequency_penalty`**
-<span style="color:gray; font-size: 0.8em;">number</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to 0</span><br>
-Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. 
-<br>
-
----
-
-**`logit_bias`**
-<span style="color:gray; font-size: 0.8em;">map</span>  <span style="color:gray; font-size: 0.8em;">Optional, Defaults to null</span><br>
-Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase the likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
-<br>
-
----
-
-**`user`**
-<span style="color:gray; font-size: 0.8em;">string</span>  <span style="color:gray; font-size: 0.8em;">Optional</span><br>
-A unique identifier representing your end-user, which can help liteLLM to monitor and detect abuse. 
-
diff --git a/old-docs/output.md b/old-docs/output.md
deleted file mode 100644
index 9b67c44b3..000000000
--- a/old-docs/output.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Completion Function - completion()
-Here's the exact json output you can expect from a `litellm` completion call:
-
-```python 
-{'choices': [{'finish_reason': 'stop',
-   'index': 0,
-   'message': {'role': 'assistant',
-    'content': " I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic."}}],
- 'created': 1691429984.3852863,
- 'model': 'claude-instant-1',
- 'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}}
-```
\ No newline at end of file
diff --git a/old-docs/secret.md b/old-docs/secret.md
deleted file mode 100644
index e6dd96ad1..000000000
--- a/old-docs/secret.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Supported Secret Managers
-liteLLM reads secrets from yoour secret manager, .env file 
-
-- [Infisical Secret Manager](#infisical-secret-manager)
-- [.env Files](#env-files)
-
-For expected format of secrets see [supported LLM models](https://litellm.readthedocs.io/en/latest/supported)
-
-## Infisical Secret Manager
-Integrates with [Infisical's Secret Manager](https://infisical.com/) for secure storage and retrieval of API keys and sensitive data.
-
-### Usage
-liteLLM manages reading in your LLM API secrets/env variables from Infisical for you
-
-```
-import litellm
-from infisical import InfisicalClient
-
-litellm.secret_manager = InfisicalClient(token="your-token")
-
-messages = [
-    {"role": "system", "content": "You are a helpful assistant."},
-    {"role": "user", "content": "What's the weather like today?"},
-]
-
-response = litellm.completion(model="gpt-3.5-turbo", messages=messages)
-
-print(response)
-```
-
-
-## .env Files
-If no secret manager client is specified, Litellm automatically uses the `.env` file to manage sensitive data.
diff --git a/old-docs/stream.md b/old-docs/stream.md
deleted file mode 100644
index 5e8cc32ca..000000000
--- a/old-docs/stream.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Streaming Responses & Async Completion
-
-- [Streaming Responses](#streaming-responses)
-- [Async Completion](#async-completion)
-
-## Streaming Responses
-LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function
-### Usage
-```python
-response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
-for chunk in response:
-    print(chunk['choices'][0]['delta'])
-
-```
-
-## Async Completion
-Asynchronous Completion with LiteLLM
-LiteLLM provides an asynchronous version of the completion function called `acompletion`
-### Usage
-```
-from litellm import acompletion
-import asyncio
-
-async def test_get_response():
-    user_message = "Hello, how are you?"
-    messages = [{"content": user_message, "role": "user"}]
-    response = await acompletion(model="gpt-3.5-turbo", messages=messages)
-    return response
-
-response = asyncio.run(test_get_response())
-print(response)
-
-```
\ No newline at end of file
diff --git a/old-docs/supported.md b/old-docs/supported.md
deleted file mode 100644
index 8b2aeca22..000000000
--- a/old-docs/supported.md
+++ /dev/null
@@ -1,72 +0,0 @@
-## Generation/Completion/Chat Completion Models
-
-### OpenAI Chat Completion Models
-
-| Model Name       | Function Call                          | Required OS Variables                |
-|------------------|----------------------------------------|--------------------------------------|
-| gpt-3.5-turbo    | `completion('gpt-3.5-turbo', messages)` | `os.environ['OPENAI_API_KEY']`       |
-| gpt-3.5-turbo-16k    | `completion('gpt-3.5-turbo-16k', messages)` | `os.environ['OPENAI_API_KEY']`       |
-| gpt-3.5-turbo-16k-0613    | `completion('gpt-3.5-turbo-16k-0613', messages)` | `os.environ['OPENAI_API_KEY']`       |
-| gpt-4            | `completion('gpt-4', messages)`         | `os.environ['OPENAI_API_KEY']`       |
-
-## Azure OpenAI Chat Completion Models
-
-| Model Name       | Function Call                           | Required OS Variables                     |
-|------------------|-----------------------------------------|-------------------------------------------|
-| gpt-3.5-turbo    | `completion('gpt-3.5-turbo', messages, azure=True)` | `os.environ['AZURE_API_KEY']`,<br>`os.environ['AZURE_API_BASE']`,<br>`os.environ['AZURE_API_VERSION']` |
-| gpt-4            | `completion('gpt-4', messages, azure=True)`         | `os.environ['AZURE_API_KEY']`,<br>`os.environ['AZURE_API_BASE']`,<br>`os.environ['AZURE_API_VERSION']` |
-
-### OpenAI Text Completion Models
-
-| Model Name       | Function Call                              | Required OS Variables                |
-|------------------|--------------------------------------------|--------------------------------------|
-| text-davinci-003 | `completion('text-davinci-003', messages)` | `os.environ['OPENAI_API_KEY']`       |
-
-### Cohere Models
-
-| Model Name       | Function Call                              | Required OS Variables                |
-|------------------|--------------------------------------------|--------------------------------------|
-| command-nightly  | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']`       |
-
-
-### Anthropic Models
-
-| Model Name       | Function Call                              | Required OS Variables                |
-|------------------|--------------------------------------------|--------------------------------------|
-| claude-instant-1  | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
-| claude-2  | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
-
-### Hugging Face Inference API
-
-All [`text2text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text2text-generation&sort=downloads) and [`text-generation`](https://huggingface.co/models?library=transformers&pipeline_tag=text-generation&sort=downloads) models are supported by liteLLM. You can use any text model from Hugging Face with the following steps:
-
-* Copy the `model repo` URL from Hugging Face and set it as the `model` parameter in the completion call.
-* Set `hugging_face` parameter to `True`.
-* Make sure to set the hugging face API key
-
-Here are some examples of supported models:
-**Note that the models mentioned in the table are examples, and you can use any text model available on Hugging Face by following the steps above.**
-
-| Model Name       | Function Call                                                                       | Required OS Variables                |
-|------------------|-------------------------------------------------------------------------------------|--------------------------------------|
-| [stabilityai/stablecode-completion-alpha-3b-4k](https://huggingface.co/stabilityai/stablecode-completion-alpha-3b-4k)  | `completion(model="stabilityai/stablecode-completion-alpha-3b-4k", messages=messages, hugging_face=True)` | `os.environ['HF_TOKEN']`       |
-| [bigcode/starcoder](https://huggingface.co/bigcode/starcoder)                           | `completion(model="bigcode/starcoder", messages=messages, hugging_face=True)`          | `os.environ['HF_TOKEN']`       |
-| [google/flan-t5-xxl](https://huggingface.co/google/flan-t5-xxl)                         | `completion(model="google/flan-t5-xxl", messages=messages, hugging_face=True)`         | `os.environ['HF_TOKEN']`       |
-| [google/flan-t5-large](https://huggingface.co/google/flan-t5-large)                     | `completion(model="google/flan-t5-large", messages=messages, hugging_face=True)`       | `os.environ['HF_TOKEN']`       |
-
-### OpenRouter Completion Models
-
-All the text models from [OpenRouter](https://openrouter.ai/docs) are supported by liteLLM.
-
-| Model Name       | Function Call                              | Required OS Variables                |
-|------------------|--------------------------------------------|--------------------------------------|
-| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| openai/gpt-4 | `completion('openai/gpt-4', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
-| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']`,<br>`os.environ['OR_API_KEY']`       |
\ No newline at end of file
diff --git a/old-docs/supported_embedding.md b/old-docs/supported_embedding.md
deleted file mode 100644
index d509adc58..000000000
--- a/old-docs/supported_embedding.md
+++ /dev/null
@@ -1,5 +0,0 @@
-## Embedding Models
-
-| Model Name           | Function Call                               | Required OS Variables                |
-|----------------------|---------------------------------------------|--------------------------------------|
-| text-embedding-ada-002 | `embedding('text-embedding-ada-002', input)` | `os.environ['OPENAI_API_KEY']`       |
\ No newline at end of file
diff --git a/old-docs/token_usage.md b/old-docs/token_usage.md
deleted file mode 100644
index 5bf2fbd3d..000000000
--- a/old-docs/token_usage.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Token Usage
-By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/))
-
-However, we also expose 3 public helper functions to calculate token usage across providers:
-
-- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. 
-
-- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json).
-
-- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). 
-
-## Example Usage 
-
-1. `token_counter`
-
-```python
-from litellm import token_counter
-
-messages = [{"user": "role", "content": "Hey, how's it going"}]
-print(token_counter(model="gpt-3.5-turbo", messages=messages))
-```
-
-2. `cost_per_token`
-
-```python
-from litellm import cost_per_token
-
-prompt_tokens =  5
-completion_tokens = 10
-prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model="gpt-3.5-turbo", prompt_tokens=prompt_tokens, completion_tokens=completion_tokens))
-
-print(prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar)
-```
-
-3. `completion_cost`
-
-```python
-from litellm import completion_cost
-
-prompt = "Hey, how's it going"
-completion = "Hi, I'm gpt - I am doing well"
-cost_of_query = completion_cost(model="gpt-3.5-turbo", prompt=prompt, completion=completion))
-
-print(cost_of_query)
-```
diff --git a/old-docs/troubleshoot.md b/old-docs/troubleshoot.md
deleted file mode 100644
index 3dc4a2662..000000000
--- a/old-docs/troubleshoot.md
+++ /dev/null
@@ -1,9 +0,0 @@
-## Stable Version
-
-If you're running into problems with installation / Usage 
-Use the stable version of litellm 
-
-```
-pip install litellm==0.1.1
-```
-