From fc757dc1b47d2eb9d0ea47d6ad224955b705059d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 14 Oct 2023 10:51:54 -0700 Subject: [PATCH] docs(proxy_server.md): add logs, save keys, model fallbacks, config file template to proxy server docs --- docs/my-website/docs/proxy_server.md | 225 +++++++++++++++--- .../docs/tutorials/model_config_proxy.md | 1 + litellm/proxy/proxy_server.py | 19 +- 3 files changed, 194 insertions(+), 51 deletions(-) diff --git a/docs/my-website/docs/proxy_server.md b/docs/my-website/docs/proxy_server.md index ff9a6eee2..2520b78f7 100644 --- a/docs/my-website/docs/proxy_server.md +++ b/docs/my-website/docs/proxy_server.md @@ -300,36 +300,91 @@ print(result) ::: ## Advanced -### Configure Model - -To save API Keys, change model prompt, etc. you'll need to create a local instance of it: -```shell -$ litellm --create_proxy +### Save API Keys +```shell +$ litellm --api_key OPENAI_API_KEY=sk-... ``` -This will create a local project called `litellm-proxy` in your current directory, that has: -* **proxy_cli.py**: Runs the proxy -* **proxy_server.py**: Contains the API calling logic - - `/chat/completions`: receives `openai.ChatCompletion.create` call. - - `/completions`: receives `openai.Completion.create` call. - - `/models`: receives `openai.Model.list()` call -* **secrets.toml**: Stores your api keys, model configs, etc. +LiteLLM will save this to a locally stored config file, and persist this across sessions. + +LiteLLM Proxy supports all litellm supported api keys. To add keys for a specific provider, check this list: + + + -Run it by doing: ```shell -$ cd litellm-proxy -``` -```shell -$ python proxy_cli.py --model ollama/llama # replace with your model name +$ litellm --add_key HUGGINGFACE_API_KEY=my-api-key #[OPTIONAL] ``` -To set api base, temperature, and max tokens, add it to your cli command + + + ```shell -litellm --model ollama/llama2 \ - --api_base http://localhost:11434 \ - --max_tokens 250 \ - --temperature 0.5 +$ litellm --add_key ANTHROPIC_API_KEY=my-api-key ``` + + + + +```shell +$ litellm --add_key TOGETHERAI_API_KEY=my-api-key +``` + + + + + +```shell +$ litellm --add_key REPLICATE_API_KEY=my-api-key +``` + + + + + +```shell +$ litellm --add_key AWS_ACCESS_KEY_ID=my-key-id +$ litellm --add_key AWS_SECRET_ACCESS_KEY=my-secret-access-key +``` + + + + + +```shell +$ litellm --add_key PALM_API_KEY=my-palm-key +``` + + + + + +```shell +$ litellm --add_key AZURE_API_KEY=my-api-key +$ litellm --add_key AZURE_API_BASE=my-api-base + +``` + + + + + +```shell +$ litellm --add_key AI21_API_KEY=my-api-key +``` + + + + + +```shell +$ litellm --add_key COHERE_API_KEY=my-api-key +``` + + + + + ### Create a proxy for multiple LLMs ```shell $ litellm @@ -355,6 +410,121 @@ response = openai.ChatCompletion.create(model="ollama/llama2", messages=[{"role" print(response) ``` +### Logs + +```shell +$ litellm --logs +``` + +This will return the most recent log (the call that went to the LLM API + the received response). + +LiteLLM Proxy will also save your logs to a file called `api_logs.json` in the current directory. + +### Configure Proxy + +If you need to: +* save API keys +* set litellm params (e.g. drop unmapped params, set fallback models, etc.) +* set model-specific params (max tokens, temperature, api base, prompt template) + +You can do set these just for that session (via cli), or persist these across restarts (via config file). + + +E.g.: Set api base, max tokens and temperature. + +**For that session**: +```shell +litellm --model ollama/llama2 \ + --api_base http://localhost:11434 \ + --max_tokens 250 \ + --temperature 0.5 + +# OpenAI-compatible server running on http://0.0.0.0:8000 +``` + +**Across restarts**: +Create a file called `litellm_config.toml` and paste this in there: + +```shell +[model."ollama/llama2"] # run via `litellm --model ollama/llama2` +max_tokens = 250 # set max tokens for the model +temperature = 0.5 # set temperature for the model +api_base = "http://localhost:11434" # set a custom api base for the model +``` + +  + +Save it to the proxy with: +```shell +$ litellm --config -f ./litellm_config.toml +``` +LiteLLM will save a copy of this file in it's package, so it can persist these settings across restarts. + + +**Complete Config File** + +```shell +### API KEYS ### +[keys] +# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key +# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key +# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key +# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key +# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key +# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key +# AWS_ACCESS_KEY_ID = "" # Uncomment to save your Bedrock/Sagemaker access keys +# AWS_SECRET_ACCESS_KEY = "" # Uncomment to save your Bedrock/Sagemaker access keys + +### LITELLM PARAMS ### +[general] +# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead +# drop_params = True # drop any params not supported by the provider (e.g. Ollama) +# default_model = "gpt-4" # route all requests to this model +# fallbacks = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"] # models you want to fallback to in case completion call fails (remember: add relevant keys) + +### MODEL PARAMS ### +[model."ollama/llama2"] # run via `litellm --model ollama/llama2` +# max_tokens = "" # set max tokens for the model +# temperature = "" # set temperature for the model +# api_base = "" # set a custom api base for the model + +[model."ollama/llama2".prompt_template] # [OPTIONAL] LiteLLM can automatically formats the prompt - docs: https://docs.litellm.ai/docs/completion/prompt_formatting +# MODEL_SYSTEM_MESSAGE_START_TOKEN = "[INST] <>\n" # This does not need to be a token, can be any string +# MODEL_SYSTEM_MESSAGE_END_TOKEN = "\n<>\n [/INST]\n" # This does not need to be a token, can be any string + +# MODEL_USER_MESSAGE_START_TOKEN = "[INST] " # This does not need to be a token, can be any string +# MODEL_USER_MESSAGE_END_TOKEN = " [/INST]\n" # Applies only to user messages. Can be any string. + +# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "" # Applies only to assistant messages. Can be any string. +# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "\n" # Applies only to system messages. Can be any string. + +# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt +# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt +``` +[**🔥 [Tutorial] modify a model prompt on the proxy**](./tutorials/model_config_proxy.md) + + +### Clone Proxy +To create a local instance of the proxy run: +```shell +$ litellm --create_proxy +``` +This will create a local project called `litellm-proxy` in your current directory, that has: +* **proxy_cli.py**: Runs the proxy +* **proxy_server.py**: Contains the API calling logic + - `/chat/completions`: receives `openai.ChatCompletion.create` call. + - `/completions`: receives `openai.Completion.create` call. + - `/models`: receives `openai.Model.list()` call +* **secrets.toml**: Stores your api keys, model configs, etc. + +Run it by doing: +```shell +$ cd litellm-proxy +``` +```shell +$ python proxy_cli.py --model ollama/llama # replace with your model name +``` + ### Tracking costs By default litellm proxy writes cost logs to litellm/proxy/costs.json @@ -375,17 +545,6 @@ You can view costs on the cli using litellm --cost ``` -### Ollama Logs -Ollama calls can sometimes fail (out-of-memory errors, etc.). - -To see your logs just call - -```shell -$ curl 'http://0.0.0.0:8000/ollama_logs' -``` - -This will return your logs from `~/.ollama/logs/server.log`. - ### Deploy Proxy diff --git a/docs/my-website/docs/tutorials/model_config_proxy.md b/docs/my-website/docs/tutorials/model_config_proxy.md index eaa81d213..b3ca0be97 100644 --- a/docs/my-website/docs/tutorials/model_config_proxy.md +++ b/docs/my-website/docs/tutorials/model_config_proxy.md @@ -3,6 +3,7 @@ import Image from '@theme/IdealImage'; # Customize Prompt Templates on OpenAI-Compatible server **You will learn:** How to set a custom prompt template on our OpenAI compatible server. +**How?** We will modify the prompt template for CodeLlama ## Step 1: Start OpenAI Compatible server Let's spin up a local OpenAI-compatible server, to call a deployed `codellama/CodeLlama-34b-Instruct-hf` model using Huggingface's [Text-Generation-Inference (TGI)](https://github.com/huggingface/text-generation-inference) format. diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 7c3a47e23..5bae9548c 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -137,22 +137,7 @@ def load_config(): ## load keys if "keys" in user_config: for key in user_config["keys"]: - if key == "HUGGINGFACE_API_KEY": - litellm.huggingface_key = user_config["keys"][key] - elif key == "OPENAI_API_KEY": - litellm.openai_key = user_config["keys"][key] - elif key == "TOGETHERAI_API_KEY": - litellm.togetherai_api_key = user_config["keys"][key] - elif key == "NLP_CLOUD_API_KEY": - litellm.nlp_cloud_key = user_config["keys"][key] - elif key == "ANTHROPIC_API_KEY": - litellm.anthropic_key = user_config["keys"][key] - elif key == "REPLICATE_API_KEY": - litellm.replicate_key = user_config["keys"][key] - elif key == "AWS_ACCESS_KEY_ID": - os.environ["AWS_ACCESS_KEY_ID"] = user_config["keys"][key] - elif key == "AWS_SECRET_ACCESS_KEY": - os.environ["AWS_SECRET_ACCESS_KEY"] = user_config["keys"][key] + os.environ[key] = user_config["keys"][key] # litellm can read keys from the environment ## settings if "general" in user_config: @@ -309,14 +294,12 @@ def track_cost_callback( completion=output_text ) model = kwargs['model'] - print("streaming response_cost", response_cost) # for non streaming responses else: # we pass the completion_response obj if kwargs["stream"] != True: response_cost = litellm.completion_cost(completion_response=completion_response) - print("regular response_cost", response_cost) model = completion_response["model"] # read/write from json for storing daily model costs