From 211e1edfcb943fea3015ffce9ecaf75cc6500c68 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 11 Aug 2023 17:59:51 -0700 Subject: [PATCH] add model load testing functionality --- litellm/__init__.py | 2 +- litellm/__pycache__/__init__.cpython-311.pyc | Bin 3787 -> 3847 bytes litellm/__pycache__/main.cpython-311.pyc | Bin 21560 -> 22249 bytes litellm/__pycache__/utils.cpython-311.pyc | Bin 35918 -> 37981 bytes litellm/main.py | 12 ++++++------ litellm/tests/test_load_test_model.py | 8 ++++++++ litellm/utils.py | 19 +++++++++++++++++++ pyproject.toml | 2 +- 8 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 litellm/tests/test_load_test_model.py diff --git a/litellm/__init__.py b/litellm/__init__.py index ca6830293..93d74ab91 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -113,7 +113,7 @@ open_ai_embedding_models = [ ] from .timeout import timeout -from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost +from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, load_test_model from .main import * # Import all the symbols from main.py from .integrations import * from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError \ No newline at end of file diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 55f1f9c754599041caa844a00ae1a6ebe43445cb..cd671c282785c606b05934d159f31165a41dc887 100644 GIT binary patch delta 429 zcmX>t+b+kuoR^o20SFwgUrVW+$ScXj!n9Gtg3&64F-j_(Aw_x-W0Z7?Ogdwf43H-a zaxDW|BUsOG5T zszj+UG630XIg_t5%9`ZrM(O72Md^XX)N}MhKyvCi20)r2MI*;B*C@&;*Eq^J*Cfg$ z*EGsB*DT6xvjP(%8zbjrC5|e=Tl_isi7D|Vsl_Gnx%nxnIg{sbWH6d|6?N&b0AaV5^Js{1HqLHJYYY=6SYZzsiYZPUaYaC^qYZ7I$ zS%Zm@jge!r7Dv_OtsEJQrkmwBH5oUDaIrE@j^kEl(FIxCz%8mQ17s9QfCw=l@r%PI vH$SB`C)KVf6v$-+;$p+id%5Q^PEO#xRqw;}0Vqgc@&<$O1t^+)l+OtO4&+>+ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index e9b3e7f1c89dd265680095af8df8a7a917a635be..3e2e932e1612bedebaf5ad4183cb304e5d7c0a4c 100644 GIT binary patch delta 3802 zcmai1eQZabqOz%)H?5lX$Eu-;b<*Ud=`=mH2nkhNw{}}wY3t}eJLfqL z4Qj=;-|wA!?z#8=&bjAaU%&kZ`S_>A{}Y?d!a@1?FMk%!?&7$+Sjm@?IpK@Hf8YO} zb+l?v72$N8Sa7(ST`*QK7jZV#2*qc(J-lcKeVJgNA{_T7Jh>v240A%MP^Ntx0s$M3 zu!s$ka$l)H4i*cb6YLr~0P#cMQ+~Bbs5ql%cSu!%S8&4kKK2_@X?B1uIcN}E>^7-! z)C!f*QYSnDSTB%%F5qSs-6S!v7j==+5-P@o=uk3oFg_fO(P1g`2i;4Acv;jS*-zMp zQ*y8=)Etz@=z%1gG5oyXauvs2a$@zolnU;-Ap-rsW(V9P6nHZ73CW}?VHm>x zR8waxOi+cew@QBpv&&`w1?`BVMGp?LDaTrwck*#w% z0I$QrybP=`l&f>BE)cq$-LY&YYuM%TyKI%y&Z-M+?D+!o3l`fq2fEs6CpTEPvx;qT z7JJ$jrdv9!geG*tx^zeh!+U)?jDfAumP7%au;HpssJ^P7Um9a#7`*wlGr-zib{66b z*#Df}q@8uR`i&iDI9O?jdIM2vlrqI}W7CuYk{g~}!L6VyJKU773mBRIxNuD{GM$zF z1>xMQPy9nEU$7vZ^I>Qe`~o-9oNf{}E946IvM#+`F3k_jaYEMvjy4Nhgl?r7E#o8| zVH2)OcY*T8$zC~%e7GX!b$_th&5vAqEcJ`dR3TRKc_miH?yVd%+4A`kl zyskJGP%fRPyf06A0ny$Eh_29xuE-M=`qHf$+5S9P^E_GhZdF4I)Px!;>rNm2zcX<~Z1DxH4vCCHtA%$F^4wkS8)z)ev_~-~iy4HGZ;-@w}VtW~=#L zvWK1L1IE4IF!|>^+AsFR0CQVvFG%-1ZXn3zmY-F7Tgg7Q%e$&c(+P!&IZhOZ(spD2 zlnH3UlPiRi&*f!}X4{P?+3&p`GRXesT}8@Rt#5y67%XTAVJW~4+Q44$tsrgekG{pG z22?k)Pkla98;YA)MQvwMPQ-dAi`VwqoFGcLzj>Z|*sa=uCLdG+dfEmr)s&*i(UeR( zQK!>%bO2XouA~hyU5U5Vp z9e8N}M@>1`SBjPwM$gwVMe4d2aD~*GZAyfzmBEmrc)fP+0hiEbtAJY1) zL|xGkB;tsOb4%3CnuGl$!6t*=iYKtOa4tBKYDgs|nI2>B1RW&J-VfHBPT{zx841az zO(?dpiBMH-D~e?ZI6Pg2uo__v!s7^Q**l?#X&H)D?8{Kt^gR>@STG!F)pA6Ex@w5a z;wT-&rWnFPUvhb`Ie7?M-R#A1Xh#CYDF9VBl%$yMYQegm&C#8G-BiLh+;6H)Tasuh zHa?P!33MEFgV0%&c7N*E{xQ2liy0``|lm-9KyHpEd1g z()y#1`A;~w_m^yX1YRE+yBgv3*JWMx@cO)RQ!BjgIV>kF`g>K6o;2#eG8#`>GUXdS zCFXi`16PbCYb%(ieN_XV83Cf2!1J*YJb3y+p_&ek!3|7&;2b&I-oHaT$qMOZw9*fY zGA%;MFcOz!)g+A_7>&zXuvCK(9|Bb@1?Ptluu!jnBni#=xk9yQn8o?|)J)yh zyE-z-%ss6|T3+Jh^i{UC=kRV^nIU-QixH$Ec)mcI7dd&p0d9UIUlF(?dX%YOM*F$4 zz?PMkx=s@rkD0k5=;Ck*dyOar@6!F8Y|gj9oro;FYCJ;%`fVo$wR>-)9zyUWEm?kk9ygonrkY&q+! z(RFF#jFHb8`47vyueZ$lBGY9pvt=z4TRti(&lZK?d&vZFtsz&*8bTjg7G=wiOup56!9iSUj|SOL&?-Q#dU$)a`kQ_SJ-f`=Sj^YwFpnJ zsF0M!#8dPv+ISHzA!x_dEJ`YZmL+#l(yj2k2{{h6i6Bp2#FK6mVz<4ufZ(X=K3XS z(q{oJ&?3gR?yypgP`a&Ui$=9|-qdaXv}l^PzgALMJH1UU6VlphOa)D-qSLgU^BjkQ zin#Xsy>rh!=bn#y&b2>%gZ=s^%>QGDqd>y(EB-$JY($c-(IhvV^TfYDe9?c|&Loqh zKG`uQ@e=MbKBb(Ygv`tCmf#YXVkE_$ za&VO4P6JC3>BI1|Xr_>t!wjnn*7FLq)o@>1Xdm~n@X8|$USS@Gmsg?u63)U>pc*Pn z$C(*kHMM6xF`Z(p9@3T+{Lk`>f?Xa-IxXc(DuWOEs^N;Yo$Z7|TbM}@u{C5)+MZ)p zNn@HD`U(cDJ2j*sT2KK8?M_w%-?rCR8a!S-W76EOv2U@o=Q~C5o0^pQgZ&^glX!2z z;FlB=l@4=Z{N{aB3*+yj>VFGPFvj4ssupzPt)d8SQ)gHWIo2cRfa4Lwi`6o`>)3=; zd9lyY&YhYxDW&qX3SOEC7q7~0Aj7P407Y&)A3|&^In1QY#p+6ilYw%d)gIriNpQ>c zJGAzj($A6mX4yk#j0pZ&whm{(MIl78^vt5mhK_&nE1IMh7bN+*1?vcwhAS`V3cSGrXT;d^~LMER*RQmY}4 zKRRRLwKL|06kFpv(8 z?r~r>IiRg3S7R*Q&bOsIV6Lj#I+%0kOVO{W4Z6@dKE%V6z1Nx6eqk6AwI0mH$=btg zB;%~}=c<9l*Pt&^+0VAaak++l4PKWA+1KH7IbePKD>mOzAAsL(EP+$?L-SSguA>#s zx>|gaR>OESlgy-qqf9NbA;EJUOB2&M4CgLe2V9hFQAO*v(jYI_r z$f6+IVgNzU8x=}aRk&`Q09&Kt-byRm2nG=r{3AvfUJQ84XNeppAa}DqHNxAC4K~Bt z68N}r+EI#2^O1-Vi2Fk@>%aWKy_c-Ud;QMdvzB|Kpa9_d(&G=>X=(X_gSyQ)t`sJ# z2mTazpsKe}z2gM@ph=dE6QtDZMM60+p`_G^I+av#LzNQH1AlKi`XGnAWoPc}idxgb ztqB?43i`U`7CB(MH_nCA(({$4N0lU%*=)85^wNdXURV;_U}bY7>xCW7D=LX&D_+OJ#8 zbNRu{!=25@$@gT`wa+pn+!;%k-(1@F&hVwmKK#xV^(fv?-E3Z|RQx7RJ5_M5-(Tc?{dS2z5N<&!g_8v!qx3b?+h zGf3SWV7d(hnHZ;AV+a|#?WqYBYvi5|AL;`+5S_uyHMQ zp@>*d@F+n70sS)$eG}5tCAk|dG8rA8P#%^)T~2bn1e*!^5ONyp;2-_VcNsNi?2x^X zN$L>6VG?y^b&nFmL15I{3q(=l7~P`hZ`!{{!P`APCQcCZ9D;7fA{WPCa!c)#&k^(I z1aA`95ino(7!J^qapV4yC?jPTkxG%cJU@ht6KwJYdmm(OY`JD)$KcAgjzU^AjEi$n zHMq}V9E?wVm)N~96z-_`E0I+M-$&3bqlx5{u+sb(oDXkhFT<^H{Z74*3Om9RsuG_R zM`*d8;Clo{70(g%0RhFDz5mIY;olebHQ3M-@RR=Scabs nTj7Vp@42^JlJ5J3mqs49-mptf7u*_ouSBx2E_TB}*WnYnv*?bWR(_Y-s-%KJx~E?9@OS9O z#A0ctTqn(f|7`e|$g4Z7(wsrlo;6bGAlb84nhUs2ng_UEnh&@^aszs$1%QpxVre1t zwNWaA)+VVOaI-+R5^2#Okt)s#gq(waw(Qv=Rf2r0MB9j``XgkvdW2M`P``8^tAUnQ z=#MIDq)QBiy2Nf4IS`a&#!jHb7LIm_eqSi0_>E{dRm@Nu;D7%cl#u7_r-($4*(D)v zj0sGL3CHX)1Js7Name05CBvXqrTpW(lA=xIELfeJ>V=TFNj329{vv+Vl538eZA3Nk zGlrs)7>Su;^bO-#7~~udq!&u2wU@>%F-y#R5F8!1#;lScnIs~aPaE4v8BwhdOb96_ z%oDdw8z#PFXo=a-&@b#J5{cUeY;99@5A7knhTWuxdaSJn3!CJ=8h2-;r&4hTRd*l~ z3<3Fi)jC0j85ll6LOMes66bHovwyomY=v?#hRJHcOvI z_JCNsWJU1>WQF10X%;2w_XEdc)C$#z>I;cXR=Oi$MHbaym#o?K6C-x{Bv~^ns!xq7 ztQHN;9lk^`BugHXW@5f@K-P?C!17V(`nFH?cZ#^6kgNtHVYU&c;LxX_53HX__6923 z^|k8bu#3Q8`w&7Xay~X0)`WsL0Tc&-JK=s%Ht25D^1k3Yb_LX)Kwo_V1h^{WT9mBL zLJcLE!r94hW=y%*D9seiN$$x!a3}ch>Y{7*(Z;s1#x`h-I(LjYcO%0N6+O*UnUN{0`Q7I~5_ znX;w7GFO?bUnpXquOA^O}E+rtk6n+n%8s|J$}& zo4&Ok$}aNi<`wiJ_cbq|H~2Hno{E2<>Zbs&o3iKQGh{qG1Z*ZYZ}RKSn_YN3G^-qz z#Ha#s?K584A_;`@7g}oQZGNd`&h9&?d<$Vd069$^e3@>1L1Roe0PICdf0Ib$*xD;3lJM8UfqHaFY@hrG^YhbzEhL#BPQqKl*DZR;0cB-a@z^F+% zgD;wC1^<4eN-$FXZlr-$a-rMfsz94+gmQ#Me0BG1`y!nLs7tkXuQQ^>D{33f^CFXQj@W3lE+a`hmwMookzD^AiREXdBsjN zc^Mg)k~FI*hUH#W6qy%!aR84I`Z_UKw6Gl&Y$}Q+pXyVLAjG(+heQ?xDZf21i{BbF z(`%^@2c5Ke30l}i5z!- zUw@>1DN>K6P7BNd6Wo0~Zl{P19u*P^kvJ7djP9fSkfpdhE~uDcr%H@S!sBn96(nxp z3#>Ew&kp3lbA-QpptdTOeG}LpSm|$qa(H?;r#X^A&EwQtl6bs33h$7^FpIcp!1Ty# zz}U+0b;VY|L=RSa!)^#G5$2Bg{ZYnb$=%ZlNzTn=p9F6NH{|t>V4%TcWoyAmGY4eV z-y^XGFlMU&G$UppJ=w8@eogS^dCZz4Ybkd7)XuDJjy^yb-UGmTDDQyLG;>y2Pj?Ew z&u}}ZHdJ%rTp+XJL_{A8C@cy?+=4|Y7+{iQLa>`$8EbxG$cDenO!rIdWZ#MYll?>2t9YqsFFuA5--0Do{)?Y4sYx~cOe^ESY-7sd|khX5f2q=u2X^Sx^f=8-ifXz(8% zsd6>33Vpw%2t3OUuvPrVk;?h{{r2M4`9YY?2Uaz>U(W?>J)i&FJ1yfV(bLH;r1Xe- z8>u@87%8*CQICsyG;BuRQwVxU*2AhBn-E0mh8C~W3_(aISCF$4A&3w_&?6iMG^U5B zcI07%(Zh;<+gOx8KQz~#1SWh$xtqE$^c!J)KE-zf;_$L;eZDPQUn|Vk>An6e@fBFX T2cEw&Z!dLF*qkqee(V1S>gAh! delta 1462 zcmYMzZA@EL7zglsYUx-RU4cv(8+3>pR2Un|L^j8(WXm>$(hLlVyX$&xkkJ=*db`L% zBL+}Fg>BEGesCX5j8o@6up1N5FB?RhVElv7e2qH3ZJee^QOpG zTgb|dq1yc`*(BX)uCu+1t!HUt%K)3EiIy@)JT<6ABEC*7;$|X2w_3Ka1m$}Q*mu6wxUwnDpFOLt#Dnm~fn z|CCQxd%_XDDfwLSG1fi;E`Vdkm-&4?dQ@m}Yc$%$<0-HrZq>Vj8l7v+&A5c5<$-Xg zsvXAJ=jcvrF1t)N?~b%j@GV!tJU!vvDUb8Iw~=w;?;0th$*7_f;!s*1f;5@hnu7hdt6_^3vfg9i^_+Fvs{hOY-jrb1803KjI`bR`} z!4mih{8udT1BPaO%sN+Lcu^8h!_gyAzhBl+%whUhfU<}z@Ef=c-jL81zO^iQbWF{I zKfxd1FYq_`9hldgpp(3mT`?~5y|z{II>I4c*7yPUXW%~2ONyeP*YG(aWF+$ZOjJ8Z7h@8^kU0hh5GfaEQ>zS-C3D(Rb&Gv$dJ&= zmsX{jk%`O5j#gW-tG(CDo-|JPE?R76sa!;T!z=!;noKE*m9Rqc#)_GX`eM!Ye0(X; zjD^_Md+b#hgBOg6XD={TM@I&>vy(JA@M3x;*38o}QBR)1huJA(c(9(adYT(L!QL_U zKi6+@%A;FFIG~9Xq^B^tHC(Z+4)Jjuzgb4{R8M$3RxUHfdOAdcH*b~t7 zu5d^VA2=v>QTs?9Yoq8$0ejDQWh9;LS%W;gss_V68qicVzCu+GMSX#gFDOT%K`x6% zfM9G3WxF!0a8gO7i0OVGj3ZUY4k)-AD86T#n^iiJUOR(MuMJ(X>M;VUH{U^Z5S) De*~ZR diff --git a/litellm/main.py b/litellm/main.py index 3a93f9c24..6e54d658e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -59,7 +59,7 @@ def completion( # params to identify the model model=model, replicate=replicate, hugging_face=hugging_face, together_ai=together_ai ) - if azure == True: + if azure == True or custom_llm_provider == "azure": # [TODO]: remove azure=True flag, move to 'custom_llm_provider' approach # azure configs openai.api_type = "azure" openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE") @@ -153,7 +153,7 @@ def completion( model_response["model"] = model model_response["usage"] = response["usage"] response = model_response - elif "replicate" in model or replicate == True: + elif "replicate" in model or replicate == True or custom_llm_provider == "replicate": # import replicate/if it fails then pip install replicate install_and_import("replicate") import replicate @@ -256,7 +256,7 @@ def completion( } response = model_response - elif model in litellm.openrouter_models: + elif model in litellm.openrouter_models or custom_llm_provider == "openrouter": openai.api_type = "openai" # not sure if this will work after someone first uses another API openai.api_base = litellm.api_base if litellm.api_base is not None else "https://openrouter.ai/api/v1" @@ -338,7 +338,7 @@ def completion( "total_tokens": prompt_tokens + completion_tokens } response = model_response - elif hugging_face == True: + elif hugging_face == True or custom_llm_provider == "huggingface": import requests API_URL = f"https://api-inference.huggingface.co/models/{model}" HF_TOKEN = get_secret("HF_TOKEN") @@ -364,7 +364,7 @@ def completion( "total_tokens": prompt_tokens + completion_tokens } response = model_response - elif together_ai == True: + elif together_ai == True or custom_llm_provider == "together_ai": import requests TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN") headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"} @@ -430,7 +430,7 @@ def completion( ## LOGGING logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) args = locals() - raise ValueError(f"No valid completion model args passed in - {args}") + raise ValueError(f"Invalid completion model args passed in. Check your input - {args}") return response except Exception as e: ## LOGGING diff --git a/litellm/tests/test_load_test_model.py b/litellm/tests/test_load_test_model.py new file mode 100644 index 000000000..1ff74d580 --- /dev/null +++ b/litellm/tests/test_load_test_model.py @@ -0,0 +1,8 @@ +import sys, os +import traceback +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import litellm +from litellm import load_test_model + +result = load_test_model(model="gpt-3.5-turbo", num_calls=5) +print(result) \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 4fa2751a6..ca0c6d83e 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -302,6 +302,25 @@ def get_optional_params( return optional_params return optional_params +def load_test_model(model: str, custom_llm_provider: str = None, prompt: str = None, num_calls: int = None): + test_prompt = "Hey, how's it going" + test_calls = 100 + if prompt: + test_prompt = prompt + if num_calls: + test_calls = num_calls + messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)] + start_time = time.time() + try: + litellm.batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider) + end_time = time.time() + response_time = end_time - start_time + return {"total_response_time": response_time, "calls_made": 100, "status": "success", "exception": None} + except Exception as e: + end_time = time.time() + response_time = end_time - start_time + return {"total_response_time": response_time, "calls_made": 100, "status": "failed", "exception": e} + def set_callbacks(callback_list): global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient try: diff --git a/pyproject.toml b/pyproject.toml index 9fe3e3433..78f9d4712 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.381" +version = "0.1.382" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"