From 9434237cbfa971f896a1b25a4f4e6be2fea58abb Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 21 Sep 2023 17:01:56 -0700 Subject: [PATCH] add docs on using completion with configs --- docs/my-website/docs/completion/config.md | 81 ++++++++++++++++++++++ docs/my-website/sidebars.js | 1 + litellm/__pycache__/main.cpython-311.pyc | Bin 47622 -> 47622 bytes litellm/__pycache__/utils.cpython-311.pyc | Bin 121235 -> 121104 bytes litellm/tests/test_config.py | 4 +- litellm/utils.py | 25 +++---- 6 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 docs/my-website/docs/completion/config.md diff --git a/docs/my-website/docs/completion/config.md b/docs/my-website/docs/completion/config.md new file mode 100644 index 0000000000..72ccf8e726 --- /dev/null +++ b/docs/my-website/docs/completion/config.md @@ -0,0 +1,81 @@ +# Model Config + +Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this. + +### usage + +E.g. If we want to implement: +* Moderations check for Anthropic models (to avoid violating their safety policy) +* Model Fallbacks - specific + general + +```python +from litellm import completion_with_config +import os + +config = { + "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"], + "model": { + "claude-instant-1": { + "needs_moderation": True + }, + "gpt-3.5-turbo": { + "error_handling": { + "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"} + } + }, + } +} + +# set env var +os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key +os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key + + +sample_text = "how does a court case get to the Supreme Court?" * 1000 +messages = [{"content": sample_text, "role": "user"}] +response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config) +``` +[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783) +### select model based on prompt size + +You can also use model configs to automatically select a model based on the prompt size. It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens. + +```python +from litellm import completion_with_config +import os + +config = { + "available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"], + "adapt_to_prompt_size": True, # 👈 key change +} + +# set env var +os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key +os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key + + +sample_text = "how does a court case get to the Supreme Court?" * 1000 +messages = [{"content": sample_text, "role": "user"}] +response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config) +``` + +### Complete Config Structure + +```python +config = { + "function": "completion", + "default_fallback_models": # [Optional] List of model names to try if a call fails + "available_models": # [Optional] List of all possible models you could call + "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models) + "model": { + "model-name": { + "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. + "error_handling": { + "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list + "fallback_model": "" # str, name of the model it should try instead, when that error occurs + } + } + } + } +} +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 11062fc670..51e1b8e8c2 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -35,6 +35,7 @@ const sidebars = { "completion/message_trimming", "completion/model_alias", "completion/reliable_completions", + "completion/config", "completion/batching", "completion/mock_requests", ], diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 420ed270f759e742e8f30cd89701486f2eaa29b9..f5b05bfc910310a3310450e4d88fcd61cc9e5e72 100644 GIT binary patch delta 21 bcmZqs!qoPKiEBA8FBbz4*xcC2^=BslPNxS_ delta 21 bcmZqs!qoPKiEBA8FBbz4++Mel>(5RAQI7}5 diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index f6017896791181165ee42c32250de66c5c9741cd..dc14ce03a06d6bd76e9742446c8e16e6c000bc12 100644 GIT binary patch delta 2256 zcmZ`)du&tJ89$$|uiy6dJF%T$I}Z~_gg}=B=9ti=&K3r3{y?pkO&IsZv>m>cXi~9-*aass>0&!3a7kMly{Ct#)S#Lx-A8YPF6(@HB19`p@>A%N(HV zj(mRidz|w<&pA%dsSlo2o8B@Sbp(gurXha1=6TaO6Kr|K2~70gp~kNs=imD1@GJ0v zN+RRJsFqfq2e)QFNF=6o5&Gy)VNFd+Apzq`BgG-lKfo&`LW_r>EPdvVmE%jyXUL#c zSwjswz@kX04;!JD*GOvdm+3#YI8`xSOh2ikCt~1G8GcSAWhCAAe5G^7IHe%u9h~<@ zN~=|aIQ;es|k>pkka*#1os@*L-v5AleB~KX0cIHMaU10mL4(9HRfbWlB}gltB?m!Vcds_Gbp(mSIu(1sn||i1 zpb~pxmQqI6m5h`rBgP*cvb!bYjBN_r@XksrQYOh1v%5%o=}~+7g?57`B}jtET9KFl zclmyZk(BxWx%|t#U={6O)za}-pSw<^M31AAA zheO&WlyoU8dbdetu}ZQ^=4XkxN-|5<2dZzE>~e#vT`}^Nrr9K$=#gxt3St2;81G6j z{fZuf#w~j-+c5w+O{8hX1o)wXwoQPi?As}e#EWgC{!%!E#M}~3mny))@sk$$;5_Mg zIQSGdT6=#zc1e1etqcN4HJ6MPMH07cq#1pW*htvm|H zTw`UslRFawz8^)0hMtc05Bi2jV)3D5$VV?7g-Wo|n@8chFiabcL5^l1$_Bz0;yF(w?}=oU<+m-ge-dui*YM_o-kh}u zvid+yA6P;M0|)lKx;rn_-VlNx3&CuCI45k%3!CV`2}rE}Uv(%awC07@A_3!fpx|FU ztv>0Ub>#gUvKzbd{w-AVI{c}D=gmb@f`Z;%B%F2!+%{OU)=ksh2>8nx+VY0BtfB4j z8_=;&kE@rLmy-#s^L&jvvYE_pR(A5-f~~5vMX}JL=nV0{adxz9tsvKo)-67A&8OR{ z;jY!UTefoChpm9jhhZhE7eRrNMUF?wqQ=G~6}oK=++rZKt&Y3y1EjA96pXIpk?zMP zlFXGips4V0*fa54c=5$<*7V01D4C4ss3naj=hG>0@+y?9&U`rq-{bMfJvjrPgNlYv zK{s*-PC<=ICvTzWPr(3hM!G#ybs7{3-O7VLLD$a0FF;E_I17(x#JduY58!qB*ID=s zczWYq_$I$GrIr+Q{dREDpS%lJ+GhYGy?z$TnqtZ^j2N$tiamSs6;!1QMwxUOm0vU<^#wB|)*%Env}q0kw0907uuM=ktm6tTM7~23IoQe;?ih+-SZG zhjh%H>~kS=<}$2NfrtKK9zp>YqsW4jcLw$1K4wL(xra$x7;K>71!z=NFuIEVcmaDJ zM*8#2nFUy_332p;0Pc~26r<2pJeDybqgbBU|`{;w2V;&11zN^rZ^essyI>_ zTno3Xd6^~33*d3-V$3L7d_%QLx2Lp4k^T;m@U_Tch#7Cw(!FSgDqb(^q+{ui>)qOz zDP|rqkkc`6X)Vt%GT)JY>?yzdjCh-#LC*}@(!o|&dfy+cP1ajnG$!;Fn3b3)R`~;~d<|qc~TnbW?vS3KN`lh> zBPGgWx~#3TIc1ft5*Dv`2E)X>%eOuaSbp38@ua16`DV#^=fwL4Q#T!@e!r{bEs;a-eHY*(R-&9kT5ZBL!re?6~C=m+Vrn zSB3A6JLHs|l23LPvyPcUXI3mZ)64cSXx#p!eFr+A<^XB(pPxZF_(|Yp@EJFx?6Ovh z$l4LBdcV@+(_%EG8LTL}M=K`tvi3~bD2EpU=i<1#?Za@GS-f1~0GWFky2J(Do*yK7 z2lpit(g12^0>{As@D&N4fWLsA*iXVKGIJ9AU?-oRgq@`06a)iq>2rKT(iK!#u=OVU z5B9~Wg-3gni9P%FlH@6<)fVWEe~{Ns!KTKqDE%)AQ&hzgf1)>jP}NB6?vG11P_kf- zOHxvL^zq$^SYKQkpuHlP9zWqD^{+sQfy2P&v7-!pL3X|Z@uLMTA3T^EBR zo}3sStICPB^J3kcSf^~*k`=e+#I0o0X-M327kOt^jON5>fdTV_kS||9(U2>@_k3rr zyj|J6BUk<%^3iGdlUu`E3rrF6#?k`A>K_O4#m}Mrs2=$@Zrm71U{0s&N;^g7vZ(2F zahEGrMz!n*S~hCnKM1wgZ;voDe*3msW~SD#y@Z=-ZnJH7aI;3h#jHt#;#nI@IS!t3 zN*t6EF?6lwX6tLZLfq8=AbquxrF4i#x)&?UWX4~G0_)?kgi;hcmefse!e$Cu$`d$Z zOM`=49R)Y9Lrx^~=_owJ15et|z@I@!!e^lyS3f)pm0FXkLS8)!dwDz3(Tq3_ENjr= zN$UT}72=zKpFofdO~QHub~qjzGV}1q%J}RMjO&ijQ+DVNFA~4O%4W^9z^hnF}Now1^bh+cwg`{+N-4+ zgs`6ceiA~g)K6|s!WOtm+NK~V{+lL|!a)pu;CJNU6g=k2uGpA&1qw`1?^5B}&VnXD zKAD2|B7zxNMflL_QrsIyzK?*vi3b&DuX>A?i`_+byR{s5UZCpnaKi$ORqL)jr`>&JidJD$Em@)nmEWnpz zxlUvP8Q1}Q71CpwL+7Civ_4wWGIII?c%YJ8xBxwd3d*7|Ny^@a&%sA@7oo;bL)jEA zk>-o=V_!9r0~kIN3`(o;BdMk+{3esX2FqBb63bUEWI}p)gf9;uu<`8vB zqQ0n_VWV`ET2V{nr`$#gP2|Eftkni6T}l2n4NZJ2(tpiFEOZ2L$m6E dict: ########## experimental completion variants ############################ -def completion_with_config(*, config: Union[dict, str], **kwargs): +def completion_with_config(config: Union[dict, str], **kwargs): if config is not None: if isinstance(config, str): config = read_config_args(config) @@ -2791,23 +2791,17 @@ def completion_with_config(*, config: Union[dict, str], **kwargs): else: raise Exception("Config path not passed in.") - ## load the completion config - completion_config = None - - if config["function"] == "completion": - completion_config = config - - if completion_config is None: + if config is None: raise Exception("No completion config in the config file") - models_with_config = completion_config["model"].keys() + models_with_config = config["model"].keys() model = kwargs["model"] messages = kwargs["messages"] ## completion config - fallback_models = completion_config.get("default_fallback_models", None) - available_models = completion_config.get("available_models", None) - adapt_to_prompt_size = completion_config.get("adapt_to_prompt_size", False) + fallback_models = config.get("default_fallback_models", None) + available_models = config.get("available_models", None) + adapt_to_prompt_size = config.get("adapt_to_prompt_size", False) start_time = time.time() if adapt_to_prompt_size: ## Pick model based on token window @@ -2829,7 +2823,7 @@ def completion_with_config(*, config: Union[dict, str], **kwargs): try: if model in models_with_config: ## Moderation check - if completion_config["model"][model].get("needs_moderation"): + if config["model"][model].get("needs_moderation"): input = " ".join(message["content"] for message in messages) response = litellm.moderation(input=input) flagged = response["results"][0]["flagged"] @@ -2838,8 +2832,8 @@ def completion_with_config(*, config: Union[dict, str], **kwargs): ## Model-specific Error Handling error_handling = None - if completion_config["model"][model].get("error_handling"): - error_handling = completion_config["model"][model]["error_handling"] + if config["model"][model].get("error_handling"): + error_handling = config["model"][model]["error_handling"] try: response = litellm.completion(**kwargs) @@ -2968,7 +2962,6 @@ def completion_with_fallbacks(**kwargs): return response except Exception as e: - print(f"got exception {e} for model {model}") rate_limited_models.add(model) model_expiration_times[model] = ( time.time() + 60