From e7a6808eb2ff0c1106c502ab854f4ec31a72b14d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 1 Aug 2023 11:01:47 -0700 Subject: [PATCH] exception mapping --- litellm/__init__.py | 2 +- litellm/__pycache__/__init__.cpython-311.pyc | Bin 690 -> 721 bytes litellm/__pycache__/main.cpython-311.pyc | Bin 11380 -> 11665 bytes litellm/__pycache__/utils.cpython-311.pyc | Bin 12674 -> 15050 bytes litellm/main.py | 13 +- litellm/tests/test_exceptions.py | 129 +++++++++++++++++++ litellm/utils.py | 45 ++++++- 7 files changed, 182 insertions(+), 7 deletions(-) create mode 100644 litellm/tests/test_exceptions.py diff --git a/litellm/__init__.py b/litellm/__init__.py index 38697c7519..7ed52d7cd3 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -25,6 +25,6 @@ open_ai_embedding_models = [ 'text-embedding-ada-002' ] -from .utils import client, logging # Import all the symbols from main.py +from .utils import client, logging, exception_type # Import all the symbols from main.py from .main import * # Import all the symbols from main.py diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 73352d569b34079e00484ef045a889fb1748fcec..3e9ac33f0fa3155ad41786391a11d4482cb3d8ac 100644 GIT binary patch delta 163 zcmdnQdXbfPIWI340}wpqKbaCbkyny2V4}JsPcC;9HzPv|M+#RC&%}ZnE#_Nn$vK&+ zc_p{lbMn*EGxO4K@ugNIrxuiC=I6ziR2HOaGTvg;^3&v<%*MEvsfcy*0mcX}5umw@ dKwR86S&1px*!Tj25iWFtLHGhHDiQ+f1OQ?3FMR+2 delta 134 zcmcb}x`~x{IWI340}u$tpGa|?$ScX{Gf~}Xd1poEETFbH2jMMZ)@ FJph*hBwGLg diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 11eb85d841f7d983653af7afa7b2b1cf73f994e8..4959766b6752a40ea8ffe15adba15f6ed7b5d2b4 100644 GIT binary patch delta 3748 zcmb_fU2Gf25#Hq;e-$m9LU1OC zk%&Y`A+Qjd2`vaSf-%O0@J!f-B>#Mbu6e_2e-GlewSRZwDDA)ljBTemJcwiTIzHuW z7lW^&nGX6fzR<<#j$9!x$*S%w6zAsh@|^CIR&&y#nlH+;>JJvB&1Y<{V8I8@=-wBX z>2gC6WkVHdsX6lkm~4C8xlp~bO| z5xN7v+NHW^F;FKjI~(PuCPK4rlUD_kLr6m!*8Eyv!?g~+-m(0}NVW9A_a@U~zS-o& zVP-43Z`rigpW+|mQt-OnA!jtC#^{4UB>qFuvokEqunFA0>J0HBYV@Dp?GBXdb7A@L zrTXlx^O=`7naieg2<@Nun}cOU-g?3)qPs>R`fdkU9kHMOqN(RtLr|eT3Q_l}`4-c+ z+s*WMfyj7hBd`uIx?@0z$)F6p%!w@;0`DL04#3+E`dm{_OgXcs7<^0zcVdYQ{V(mY z2JMod3A^iytr>9B%0vhN;~J6{*8E~r zbLTN&IJRzwRKTxx0fzfb9xxm-dBAOWqrD+*5wU|7n$dR!-G}RJAN64){n4kr4JZ!EpEMw}zL~xGgzi74nGJkN&Fttd%}gF^0O(#{ ztCIlns6U}TWfl|q#~{7g8ZS>in(0C0Kq>jfH=2;wk{bus9ROzd)h_5E-!ORtUdfH2 zojJdTMN~>&Nv_}(w8~AR6`USX6z^somPY2#|irF&@g_PmP5U+E|^W*$S{2; zw9@f4rgwyq2_Opl5F`>L!6uH;6GFF8X6$iBm>oI6NPw;iiD|Y6(sp93G{UCY^144O zig|YY$rfg_WKJRdObru5!#E&0b1xP1IY}XH%!sY1yR(XtSJbSmf@6p21EDKN&~?oHDiFP?P|S+6Vo`-{K@8xtjbi|bFo$Z3 z8Xe1MNC}wxmn*YS6qgFpG4dJ=p-Uho1&yW3Vfbf-(v|I-UfBP$j|8m{t&!lZ-x1~2l=wf_P!w%wTk delta 3758 zcmcgvTWlN06`kSoDe)nRTt4+6^`J~W-Kb7vN3z_&Ng6Z`>?Cd}83jXW8JY^c6lPa) z>?|$ANWYr4wx=HfY!^;iCGcC$&U4+7VU!Z<4tf{|kMk!&0*^ z6bm_3uQ`gPe7>OOEAy7~RQAB8HP5Td>VjSAJek`qU}ZX34`e2h6spb(8aYv z*c*y#n?Yt2l$LFLGa?Gncj0sE|J`Tz7N4zl^?mf2cejQ1z574$FtsZW70-1a6qiiB zyyzR4-k#&YLvn1U08-V~NY(XLcu#bwo@gh+P7y# zJF=5BA8qwT+1R5*`pdE*f3=X(J_Y8RG4MkS`ol`V2oz{JXt?yghI1e!^D%d9HHZp^ z%LpnVr1b3p3qp^F4Of9eYed#;5CpjE7qrGegNCq~de9ERThXR~g_Ky?Z-fq$?>ow2 z!*7JoLPwPSZ#h?$PNhrfHvCGD62EFu5?7@>1!E&dL`2!%)JTEGmJs>B=|hbtLavZ< zz@V_f(Q;JjHKMToxB*ne6jA!_4=dRSn3^tFzCu8-pC9!^+DA<3O|C3YZBf`NFGr5S z$~GP0S3n=>d@6U{ zk7d`Q>2z~c^?jQTDuYJk3b|(4UZIqT$XnEorpU({nh@E`ddQqkkb84nin%V+Jv{2$ zFEtu5g^ehi^&y7s{YK<#zR}eOn#(MSZqpCa?1(Uus;tS#t#kIm?xv7;^XS>tpk3J2 z7;4J!!wwg7q(BtS!=;|(PCQp`xtkivum05>v+wNA!LRAx*Bi6^HDB_-Gi0B9pbpfD zT}E*Rx-I7UR--^+whi;YcXXwn+T`>T)>TeC_`V(bE^&b>q?|aHI89HJ*QHr(V4aR42Af#+e>^F{VG@|Cq zDoO!A>axX5mgO}zh+>op3GvzISU6cKEaWs6LM7a>=E`bXLDRFU4u(zgv%%g3o&)ST zgieG>fWioTOr@myjx+prkUi4;*59FW-F$u~H1%kOe{50sGPHM(2u`Y(#gYFQK+Tz5 zDrEFCOSzh*q}3cbb)~>cDt|BhGkX^b9pc}K9QG`+f~sd$a%{e&<@k>xL;QB6+Y2u; z^CMFDWIvu?B6K2BY!5zmi+xy5TPmqqj?M5R(Fh&q`Dk36ln^gfKgmiAl(_D9-&+wz;1-+F)Ttv)+T~4X;T>KoJ1L+OYfInGVFAlu@`iYH0M#&|CI*tdniQCvHQ@FK!X2rnZXNBAxR zPPv-v=~MGLC{{Jk(#V`grljeN&GJ7dyN~6Oc@iMyXex3ha)yP?77-YL<^YJ-FVR0W zSI&I@8Cd&*R9+bT38e}C$(}FiEdM06-zPGL`-;uuJUR4gNVI2X-2vf`g6oI)&xgin ziT`0JxjKss{ALh-F#Iooxe>|$wbo)Ot7Mdt4u#1?LB?@S)CJlhwyL*0ZY6B}+&Ois zq%0S62iXsRN8#3bpffilO6fXDRk!}}>%&fdaX1XOo8K7LgMC%<6@Pqk5oz>c2(M_?f$5$l?oDVRp}b}M>JBTtXfs7v_G=lfBU2AIWu;G zv#iw7ozLgqbM86k-gD>NUZlcAZq$+{VFQk)4-hG!y^5uS5#3O6m?e|K}Sp73%M-UfIZ;cbGqd5V)a5Pou_ z%1y?I`yFm_6Y;1V;R%1mL%i?slbeYEqg#j%u!Z;mTS)*gPJ)05!e8K$p?{-0<4Ld8 z(4RN1Dw?6DWkbzsLzHHz)iA2vkTZiSIj>yLE4tx~hR@4}GN#U`mHvhc3caDGl}nm@ zLsm2L<&5G8hR);^t$)<+{N}%J^WF#;i<#ebWzGqeZNML--SB?V3ix9#z^UAAp}-Xc z!XJU*0uPkSrd&X|ZOTi%1uyhG&N%U!``o%)AcFbWy*VHdAC4P>`SYg6P7w1uT^Or8 z)4nPN+yHX5c@iv`InN2PIv^J~gEQ;v8qMpTUen{1K!3peo#%{ss4>!3omX%J6}I(n zuIl7C5-AAgKJQUpFm-PSFPT5_zTH_rZh2?2M9ElQRmEM2MM_z5{&3e9zaj(?_E?)PT^4&bN`HimTOZ^LIw26^{2FUhF-@gpDlL zRT6eDobo^8x9%J2(0W3bVh~4 zPS3#Lp!uKreqLJWi{^!fi!fk?MrU$a>YQ#GG5@&nWT*|s>1zn>W+c{H#CX#Qgh>P$ z;Vl3wpl9UtwUnI8Sz$S&P(x3p!Ea5;B)zm3CH!eQler8+6pcCaajNbGu%l!9NDm_y z;R-VUWCvzB8eies+=mxFjGYcjDPUL}#l8E|btqw(f%YEZ9Q5dfdA$9@vvd~B%N)$K%_`cjvM>8o*fx5%%L)K+*Cu3We|7a_a`AIpmx%`#YMyc^%0t z3VE(6v%0+rmM<;m3_Qx7d|oA%7qM?G+;lt0Cneh-dI}XdkweNr$Rkkmw;fj^crfWm z6@IX=ecMw(aD>dycl8!;BFogZhrdvynpvL}8Isdo_O z5x$S$tPrX1AlR$4y$;s;I#6{MrGA9)Lxg(>_YvL&&@unpp?oQxgUCgAzVOekU-88> z#;y8b?Cl(Q%g%uYUu9taGhUK>2=wFCK6MrNyL^GW$J3t~sH5@}_=lk3@v4^FF2h&l zfky5Fj`;Y3>jUnmZu4(jwiLmJe~rJ+Mw1a*w8q^! zd&gb18vz#GM6B3ptns%Z|0*v@xT1f}uOdgj-aw>cAZomPNqvpHcA>x#sWRg)}X4O?WXhcll)V_*^{Em8f2s ziRulfB_L73T?RZmAI!D9QYvN4<`m#qa0SSDqRff9Asczf(r_^!N~rhYo0)6s*Nm$) zn^V(6_Jv%8>}Uegaw12wH&mjK-E+}_>{W%2d0qCZOT!!4;<+Qgc7l%mOUGHNR`)uzClk(ZN$4*N z{Y>az3N*0BZ6ArH!2V)6uz!Vf`?f5_T3BmODb`!Yc4$64A6|}ao)1^_46wR^C&vJw zm4v~vFvx_##X$YNODwPn+L9lP53_9t%i9K#XdW*4N6P*Y<{w!MwUk52M;Bg%dRVAu zNqUX7e}f$vFCTdWh$6ftX`(Dmu;9d^7%z*RkMtMf9wzQtCyBG(yv!y^Nm9y^!h*_r z@m3bUzz&R-51d0HexW2?EK3(z@ZyS_3-4VHHNW4%LUCw|q0UFMEOZcB(Y7-3@hEE_ zfmV`EmZg&{cydwP{C?qu*vZ7sT8ZaF&o`|qUKHCO1(?_iZAk+2C!Wj#Kr2ZDWodu~ z2S5*tcfSz3nb^G~ZD%`9eG&kGR+7fb(pZrN$I!i=<&eZ0J6?pq#g4^L>w_ci7 zvcBPR-#9DA&$9U0HL6di>~GF0hB8^1q#P48gVV7Bqia>DUB1{wOE2mT=XLl4 zu=+p1DTVNswJUwQ>~-qP5gZ#tcz`gDfGe`UurK{b;O|O4qtqRv4`C1zAE!^7V?8%_ oE^;^7|JTev_k1hHE={a(aXzpfhRuDw$6JnU9(E6vywK_c340^i4}^+dBQ5yU-l}a!gnc~+XmSgM>d~6MEQ+fE^hl7{Q2y@s%LjBd(^%`e_+ewlgBq@?K zNgIhZ&+Lrgh4wNlbco0*E9?|nnWE=3p|A2nr!dIYB~0QIB1-HQF=CJ4=UF~}A3uD# zwaQnxfwa2S4y|(B*h~YHNrJQ?@`4q56CDU0RSA*&O{8;Oi^wkI-SKnKQ%uR1P z(`Ak$a6%1b_VEL%kl9P&%b5XwR(+Crke`Ng1~@=)(cjB`Q(^jMsZw^To;*p$DfMOh zF`jJw-QF-ePLsh;o?5Ecq&F>PP0EB??Kzp;MRxfluv`7wv%B+osONz5KmoX@621Ks zL!|mK6mQlpElO+{##|)L#jSf>M)@4Pz-()_x0tXlkm@rTp=l#B*|(v7+&gN%grIXO zI&gQ{k2`OcoVr)8QJZWb%FyiXHsw2IP{8ZhMyuEAQpegcU>|I8r`lL@r0qGr=6X_= z;Xx66t8~ToiuIc7opxN;rmL_3-xc`uMdzA})2e54-J@as;p`{mE%_Go6F}GxXiGp9 zSX5qat__z)PHo{<>wa#{G{Yd(8XjA&!xhx?8Z^{FUIirJ0u4ewEpay41cI}TLJJnY z2kjl;2CxEbpHD91GdNO^b>hFd9^Xn9QMZ7b!27@lz$(E-ZG-0htAj7V3#_$nkKEzQ z`Ci`?HP5NO?&}L~tr}kDLx{40Ah#g%;7Z|b%P7H5R%*5=hD)UbGiom%U$o#X!3l;C zpKiGHT(@6sSeL{U*5FMBT@CM7U<>km5soB)er&`J(@SquoFh`vCns@7)P;j@s7D81 Y?OJEMc>MpEQq{?$X_n>Dbs*U5A4-cycK`qY diff --git a/litellm/main.py b/litellm/main.py index b5a6b1c8af..48d3dbed95 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -6,7 +6,7 @@ import traceback import dotenv import traceback import litellm -from litellm import client, logging +from litellm import client, logging, exception_type from litellm import success_callback, failure_callback import random ####### ENVIRONMENT VARIABLES ################### @@ -76,6 +76,7 @@ def completion( temperature=temperature, top_p=top_p, n=n, stream=stream, stop=stop, max_tokens=max_tokens, presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user ) + print_verbose(f"os environment variables: {os.environ}") if azure == True: # azure configs openai.api_type = "azure" @@ -120,7 +121,7 @@ def completion( elif "replicate" in model: # replicate defaults to os.environ.get("REPLICATE_API_TOKEN") # checking in case user set it to REPLICATE_API_KEY instead - if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): + if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): replicate_api_token = os.environ.get("REPLICATE_API_KEY") os.environ["REPLICATE_API_TOKEN"] = replicate_api_token prompt = " ".join([message["content"] for message in messages]) @@ -207,7 +208,7 @@ def completion( "finish_reason": "stop", "index": 0, "message": { - "content": response[0], + "content": response[0].text, "role": "assistant" } } @@ -246,8 +247,10 @@ def completion( raise ValueError(f"No valid completion model args passed in - {args}") return response except Exception as e: - logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) - raise e + # log the original exception + logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e) + ## Map to OpenAI Exception + raise exception_type(model=model, original_exception=e) ### EMBEDDING ENDPOINTS #################### diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py new file mode 100644 index 0000000000..38be0e2c15 --- /dev/null +++ b/litellm/tests/test_exceptions.py @@ -0,0 +1,129 @@ +from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError +import os +import sys +import traceback +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import litellm +from litellm import embedding, completion +from concurrent.futures import ThreadPoolExecutor +#### What this tests #### +# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type + + +# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate + +# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.) + +# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered + +models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"] + +# Test 1: Rate Limit Errors +def test_model(model): + try: + sample_text = "how does a court case get to the Supreme Court?" * 50000 + messages = [{ "content": sample_text,"role": "user"}] + azure = False + if model == "chatgpt-test": + azure = True + print(f"model: {model}") + response = completion(model=model, messages=messages, azure=azure) + except RateLimitError: + return True + except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server + return True + except Exception as e: + print(f"Uncaught Exception {model}: {type(e).__name__} - {e}") + pass + return False + +# Repeat each model 500 times +extended_models = [model for model in models for _ in range(250)] + +def worker(model): + return test_model(model) + +# Create a dictionary to store the results +counts = {True: 0, False: 0} + +# Use Thread Pool Executor +with ThreadPoolExecutor(max_workers=500) as executor: + # Use map to start the operation in thread pool + results = executor.map(worker, extended_models) + + # Iterate over results and count True/False + for result in results: + counts[result] += 1 + +accuracy_score = counts[True]/(counts[True] + counts[False]) +print(f"accuracy_score: {accuracy_score}") + +# Test 2: Context Window Errors +print("Testing Context Window Errors") +def test_model(model): # pass extremely long input + sample_text = "how does a court case get to the Supreme Court?" * 100000 + messages = [{ "content": sample_text,"role": "user"}] + try: + azure = False + if model == "chatgpt-test": + azure = True + print(f"model: {model}") + response = completion(model=model, messages=messages, azure=azure) + except InvalidRequestError: + return True + except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server + return True + except Exception as e: + print(f"Error Type: {type(e).__name__}") + print(f"Uncaught Exception - {e}") + pass + return False + +## TEST SCORE +true_val = 0 +for model in models: + if test_model(model=model) == True: + true_val += 1 +accuracy_score = true_val/len(models) +print(f"CTX WINDOW accuracy_score: {accuracy_score}") + +# Test 3: InvalidAuth Errors +def logger_fn(model_call_object: dict): + print(f"model call details: {model_call_object}") + + +def test_model(model): # set the model key to an invalid key, depending on the model + messages = [{ "content": "Hello, how are you?","role": "user"}] + try: + azure = False + if model == "gpt-3.5-turbo": + os.environ["OPENAI_API_KEY"] = "bad-key" + elif model == "chatgpt-test": + os.environ["AZURE_API_KEY"] = "bad-key" + azure = True + elif model == "claude-instant-1": + os.environ["ANTHROPIC_API_KEY"] = "bad-key" + elif model == "command-nightly": + os.environ["COHERE_API_KEY"] = "bad-key" + elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": + os.environ["REPLICATE_API_KEY"] = "bad-key" + os.environ["REPLICATE_API_TOKEN"] = "bad-key" + print(f"model: {model}") + response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn) + print(f"response: {response}") + except AuthenticationError as e: + return True + except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server + return True + except Exception as e: + print(f"Uncaught Exception - {e}") + pass + return False + +## TEST SCORE +true_val = 0 +for model in models: + if test_model(model=model) == True: + true_val += 1 +accuracy_score = true_val/len(models) +print(f"INVALID AUTH accuracy_score: {accuracy_score}") \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 593b754f03..c7eaa96d2b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -9,6 +9,7 @@ import litellm import os import openai import random +from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv sentry_sdk_instance = None @@ -29,12 +30,15 @@ def print_verbose(print_statement): ####### LOGGING ################### #Logging function -> log the exact model details + what's being sent | Non-Blocking -def logging(model, input, azure=False, additional_args={}, logger_fn=None): +def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None): try: model_call_details = {} model_call_details["model"] = model model_call_details["input"] = input model_call_details["azure"] = azure + # log exception details + if exception: + model_call_details["original_exception"] = exception # log additional call details -> api key, etc. if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models: model_call_details["api_type"] = openai.api_type @@ -222,3 +226,42 @@ def handle_success(*args, **kwargs): success_handler(args, kwargs) pass + +def exception_type(model, original_exception): + if isinstance(original_exception, OpenAIError): + # Handle the OpenAIError + raise original_exception + elif model: + error_str = str(original_exception) + if isinstance(original_exception, BaseException): + exception_type = type(original_exception).__name__ + else: + exception_type = "" + if "claude" in model: #one of the anthropics + print_verbose(f"status_code: {original_exception.status_code}") + if original_exception.status_code == 401: + raise AuthenticationError(f"AnthropicException - {original_exception.message}") + elif original_exception.status_code == 400: + raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}") + elif original_exception.status_code == 429: + raise RateLimitError(f"AnthropicException - {original_exception.message}") + elif "replicate" in model: + if "Incorrect authentication token" in error_str: + raise AuthenticationError(f"ReplicateException - {error_str}") + elif exception_type == "ModelError": + raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}") + elif "Request was throttled" in error_str: + raise RateLimitError(f"ReplicateException - {error_str}") + elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side + raise ServiceUnavailableError(f"ReplicateException - {error_str}") + elif model == "command-nightly": #Cohere + if "invalid api token" in error_str or "No API key provided." in error_str: + raise AuthenticationError(f"CohereException - {error_str}") + elif "too many tokens" in error_str: + raise InvalidRequestError(f"CohereException - {error_str}", f"{model}") + elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min) + raise RateLimitError(f"CohereException - {original_exception.message}") + raise original_exception # base case - return the original exception + else: + raise original_exception + \ No newline at end of file