From 356332d0da7f2100c456141f26cf7902c3a3fca1 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 20 Nov 2023 15:43:15 -0800 Subject: [PATCH] (cookbook) load test router --- .../test_questions/question1.txt | 43 ++++++++++++ .../test_questions/question2.txt | 65 +++++++++++++++++++ .../test_questions/question3.txt | 50 ++++++++++++++ 3 files changed, 158 insertions(+) create mode 100644 cookbook/litellm_router/test_questions/question1.txt create mode 100644 cookbook/litellm_router/test_questions/question2.txt create mode 100644 cookbook/litellm_router/test_questions/question3.txt diff --git a/cookbook/litellm_router/test_questions/question1.txt b/cookbook/litellm_router/test_questions/question1.txt new file mode 100644 index 0000000000..d633a8ea22 --- /dev/null +++ b/cookbook/litellm_router/test_questions/question1.txt @@ -0,0 +1,43 @@ +Given this context, what is litellm? LiteLLM about: About +Call all LLM APIs using the OpenAI format. Use Bedrock, Azure, OpenAI, Cohere, Anthropic, Ollama, Sagemaker, HuggingFace, Replicate (100+ LLMs). LiteLLM manages + +Translating inputs to the provider's completion and embedding endpoints +Guarantees consistent output, text responses will always be available at ['choices'][0]['message']['content'] +Exception mapping - common exceptions across providers are mapped to the OpenAI exception types. +10/05/2023: LiteLLM is adopting Semantic Versioning for all commits. Learn more +10/16/2023: Self-hosted OpenAI-proxy server Learn more + +Usage (Docs) +Important +LiteLLM v1.0.0 is being launched to require openai>=1.0.0. Track this here + +Open In Colab +pip install litellm +from litellm import completion +import os + +## set ENV variables +os.environ["OPENAI_API_KEY"] = "your-openai-key" +os.environ["COHERE_API_KEY"] = "your-cohere-key" + +messages = [{ "content": "Hello, how are you?","role": "user"}] + +# openai call +response = completion(model="gpt-3.5-turbo", messages=messages) + +# cohere call +response = completion(model="command-nightly", messages=messages) +print(response) +Streaming (Docs) +liteLLM supports streaming the model response back, pass stream=True to get a streaming iterator in response. +Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.) + +from litellm import completion +response = completion(model="gpt-3.5-turbo", messages=messages, stream=True) +for chunk in response: + print(chunk['choices'][0]['delta']) + +# claude 2 +result = completion('claude-2', messages, stream=True) +for chunk in result: + print(chunk['choices'][0]['delta']) \ No newline at end of file diff --git a/cookbook/litellm_router/test_questions/question2.txt b/cookbook/litellm_router/test_questions/question2.txt new file mode 100644 index 0000000000..78188d0666 --- /dev/null +++ b/cookbook/litellm_router/test_questions/question2.txt @@ -0,0 +1,65 @@ +Does litellm support ooobagooba llms? how can i call oobagooba llms. Call all LLM APIs using the OpenAI format. Use Bedrock, Azure, OpenAI, Cohere, Anthropic, Ollama, Sagemaker, HuggingFace, Replicate (100+ LLMs). LiteLLM manages + +Translating inputs to the provider's completion and embedding endpoints +Guarantees consistent output, text responses will always be available at ['choices'][0]['message']['content'] +Exception mapping - common exceptions across providers are mapped to the OpenAI exception types. +10/05/2023: LiteLLM is adopting Semantic Versioning for all commits. Learn more +10/16/2023: Self-hosted OpenAI-proxy server Learn more + +Usage (Docs) +Important +LiteLLM v1.0.0 is being launched to require openai>=1.0.0. Track this here + +Open In Colab +pip install litellm +from litellm import completion +import os + +## set ENV variables +os.environ["OPENAI_API_KEY"] = "your-openai-key" +os.environ["COHERE_API_KEY"] = "your-cohere-key" + +messages = [{ "content": "Hello, how are you?","role": "user"}] + +# openai call +response = completion(model="gpt-3.5-turbo", messages=messages) + +# cohere call +response = completion(model="command-nightly", messages=messages) +print(response) +Streaming (Docs) +liteLLM supports streaming the model response back, pass stream=True to get a streaming iterator in response. +Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.) + +from litellm import completion +response = completion(model="gpt-3.5-turbo", messages=messages, stream=True) +for chunk in response: + print(chunk['choices'][0]['delta']) + +# claude 2 +result = completion('claude-2', messages, stream=True) +for chunk in result: + print(chunk['choices'][0]['delta']) Supported LiteLLM providers Supported Provider (Docs) +Provider Completion Streaming Async Completion Async Streaming +openai ✅ ✅ ✅ ✅ +azure ✅ ✅ ✅ ✅ +aws - sagemaker ✅ ✅ ✅ ✅ +aws - bedrock ✅ ✅ ✅ ✅ +cohere ✅ ✅ ✅ ✅ +anthropic ✅ ✅ ✅ ✅ +huggingface ✅ ✅ ✅ ✅ +replicate ✅ ✅ ✅ ✅ +together_ai ✅ ✅ ✅ ✅ +openrouter ✅ ✅ ✅ ✅ +google - vertex_ai ✅ ✅ ✅ ✅ +google - palm ✅ ✅ ✅ ✅ +ai21 ✅ ✅ ✅ ✅ +baseten ✅ ✅ ✅ ✅ +vllm ✅ ✅ ✅ ✅ +nlp_cloud ✅ ✅ ✅ ✅ +aleph alpha ✅ ✅ ✅ ✅ +petals ✅ ✅ ✅ ✅ +ollama ✅ ✅ ✅ ✅ +deepinfra ✅ ✅ ✅ ✅ +perplexity-ai ✅ ✅ ✅ ✅ +anyscale ✅ ✅ ✅ ✅ \ No newline at end of file diff --git a/cookbook/litellm_router/test_questions/question3.txt b/cookbook/litellm_router/test_questions/question3.txt new file mode 100644 index 0000000000..a122787504 --- /dev/null +++ b/cookbook/litellm_router/test_questions/question3.txt @@ -0,0 +1,50 @@ +What endpoints does the litellm proxy have 💥 OpenAI Proxy Server +LiteLLM Server manages: + +Calling 100+ LLMs Huggingface/Bedrock/TogetherAI/etc. in the OpenAI ChatCompletions & Completions format +Set custom prompt templates + model-specific configs (temperature, max_tokens, etc.) +Quick Start +View all the supported args for the Proxy CLI here + +$ litellm --model huggingface/bigcode/starcoder + +#INFO: Proxy running on http://0.0.0.0:8000 + +Test +In a new shell, run, this will make an openai.ChatCompletion request + +litellm --test + +This will now automatically route any requests for gpt-3.5-turbo to bigcode starcoder, hosted on huggingface inference endpoints. + +Replace openai base +import openai + +openai.api_base = "http://0.0.0.0:8000" + +print(openai.chat.completions.create(model="test", messages=[{"role":"user", "content":"Hey!"}])) + +Supported LLMs +Bedrock +Huggingface (TGI) +Anthropic +VLLM +OpenAI Compatible Server +TogetherAI +Replicate +Petals +Palm +Azure OpenAI +AI21 +Cohere +$ export AWS_ACCESS_KEY_ID="" +$ export AWS_REGION_NAME="" # e.g. us-west-2 +$ export AWS_SECRET_ACCESS_KEY="" + +$ litellm --model bedrock/anthropic.claude-v2 + +Server Endpoints +POST /chat/completions - chat completions endpoint to call 100+ LLMs +POST /completions - completions endpoint +POST /embeddings - embedding endpoint for Azure, OpenAI, Huggingface endpoints +GET /models - available models on server \ No newline at end of file