From 23f03f7e6f563e802b12eaa61014e7c6a44fad27 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 28 Oct 2023 16:14:44 -0700 Subject: [PATCH] docs(simple_proxy.md): add llama index tutorial to docs --- docs/my-website/docs/simple_proxy.md | 199 ++++++++++++++++++++++++++- 1 file changed, 195 insertions(+), 4 deletions(-) diff --git a/docs/my-website/docs/simple_proxy.md b/docs/my-website/docs/simple_proxy.md index 328eb56af..329278c6c 100644 --- a/docs/my-website/docs/simple_proxy.md +++ b/docs/my-website/docs/simple_proxy.md @@ -6,6 +6,12 @@ import TabItem from '@theme/TabItem'; A simple, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs in the OpenAI Input/Output format +LiteLLM Server supports: + +* LLM API Calls in the OpenAI ChatCompletions format +* Caching + Logging capabilities (Redis and Langfuse, respectively) +* Setting API keys in the request headers or in the .env + [**See Code**](https://github.com/BerriAI/litellm/tree/main/litellm_server) :::info @@ -16,14 +22,14 @@ join our [discord](https://discord.gg/wuPM9dRgDw) ## Usage ```shell -docker run -e PORT=8000 -e OPENAI_API_KEY= -p 8000:8000 ghcr.io/berriai/litellm:latest - -# UVICORN: OpenAI Proxy running on http://0.0.0.0:8000 +docker run -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest ``` +OpenAI Proxy running on http://0.0.0.0:8000 ```shell curl http://0.0.0.0:8000/v1/chat/completions \ -H "Content-Type: application/json" \ + -H "Authorization: Bearer $YOUR_API_KEY" -d '{ "model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Say this is a test!"}], @@ -42,6 +48,7 @@ $ docker run -e PORT=8000 -e AWS_ACCESS_KEY_ID= -e AWS_SECRET_A +**Set API Keys in .env** If, you're calling it via Huggingface Inference Endpoints. ```shell $ docker run -e PORT=8000 -e HUGGINGFACE_API_KEY= -p 8000:8000 ghcr.io/berriai/litellm:latest @@ -52,14 +59,40 @@ Else, $ docker run -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest ``` +**Set API Keys in request headers** +```shell +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $HUGGINGFACE_API_KEY" + -d '{ + "model": "huggingface/bigcoder/starcoder", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + +**Set API Keys in .env** ```shell $ docker run -e PORT=8000 -e ANTHROPIC_API_KEY= -p 8000:8000 ghcr.io/berriai/litellm:latest ``` +**Set API Keys in request headers** +```shell +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $ANTHROPIC_API_KEY" + -d '{ + "model": "claude-2", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + + @@ -73,30 +106,71 @@ $ docker run -e PORT=8000 -e OLLAMA_API_BASE= -p 8000:8000 +**Set API Keys in .env** ```shell $ docker run -e PORT=8000 -e TOGETHERAI_API_KEY= -p 8000:8000 ghcr.io/berriai/litellm:latest ``` +**Set API Keys in request headers** +```shell +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $TOGETHERAI_API_KEY" + -d '{ + "model": "together_ai/togethercomputer/llama-2-70b-chat", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + +**Set API Keys in .env** ```shell $ docker run -e PORT=8000 -e REPLICATE_API_KEY= -p 8000:8000 ghcr.io/berriai/litellm:latest ``` +**Set API Keys in request headers** +```shell +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $REPLICATE_API_KEY" + -d '{ + "model": "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + + +**Set API Keys in .env** ```shell $ docker run -e PORT=8000 -e PALM_API_KEY= -p 8000:8000 ghcr.io/berriai/litellm:latest ``` +**Set API Keys in request headers** +```shell +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $PALM_API_KEY" + -d '{ + "model": "palm/chat-bison", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + +**Set API Keys in .env** ```shell $ docker run -e PORT=8000 -e AZURE_API_KEY= -e AZURE_API_BASE= -p 8000:8000 ghcr.io/berriai/litellm:latest ``` @@ -105,23 +179,57 @@ $ docker run -e PORT=8000 -e AZURE_API_KEY= -e AZURE_API_BASE= +**Set API Keys in .env** ```shell $ docker run -e PORT=8000 -e AI21_API_KEY= -p 8000:8000 ghcr.io/berriai/litellm:latest ``` +**Set API Keys in request headers** +```shell +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $AI21_API_KEY" + -d '{ + "model": "j2-mid", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + +**Set API Keys in .env** + ```shell $ docker run -e PORT=8000 -e COHERE_API_KEY= -p 8000:8000 ghcr.io/berriai/litellm:latest ``` +**Set API Keys in request headers** +```shell +curl http://0.0.0.0:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $COHERE_API_KEY" + -d '{ + "model": "command-nightly", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + + -## Tutorials (Chat-UI, NeMO-Guardrails, PromptTools, Phoenix ArizeAI etc.) +## Tutorials (Chat-UI, NeMO-Guardrails, PromptTools, Phoenix ArizeAI, Langchain, ragas, etc.) + +**Start server:** +```shell +`docker run -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest` +``` +The server is now live on http://0.0.0.0:8000 @@ -247,6 +355,89 @@ model = OpenAIModel( ) ``` + + +```python +from langchain.chat_models import ChatOpenAI +from langchain.prompts.chat import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + AIMessagePromptTemplate, + HumanMessagePromptTemplate, +) +from langchain.schema import AIMessage, HumanMessage, SystemMessage + +chat = ChatOpenAI(model_name="claude-instant-1", openai_api_key="my-anthropic-key", openai_api_base="http://0.0.0.0:8000") + +messages = [ + SystemMessage( + content="You are a helpful assistant that translates English to French." + ), + HumanMessage( + content="Translate this sentence from English to French. I love programming." + ), +] +chat(messages) +``` + + + +#### Evaluating with Open-Source LLMs + +Use [Ragas](https://github.com/explodinggradients/ragas/blob/7b123533df80d0ada33a2cb2dd2fdedf36807f33/docs/howtos/customisations/llms.ipynb#L247) to evaluate LLMs for RAG-scenarios. +```python +from langchain.chat_models import ChatOpenAI + +inference_server_url = "http://localhost:8080/v1" + +chat = ChatOpenAI( + model="bedrock/anthropic.claude-v2", + openai_api_key="no-key", + openai_api_base=inference_server_url, + max_tokens=5, + temperature=0, +) + +from ragas.metrics import ( + context_precision, + answer_relevancy, + faithfulness, + context_recall, +) +from ragas.metrics.critique import harmfulness + +# change the LLM + +faithfulness.llm.langchain_llm = chat +answer_relevancy.llm.langchain_llm = chat +context_precision.llm.langchain_llm = chat +context_recall.llm.langchain_llm = chat +harmfulness.llm.langchain_llm = chat + + +# evaluate +from ragas import evaluate + +result = evaluate( + fiqa_eval["baseline"].select(range(5)), # showing only 5 for demonstration + metrics=[faithfulness], +) + +result +``` + + + +```python +!pip install llama-index +``` +```python +from llama_index.llms import OpenAI + +response = OpenAI(model="claude-2", api_key="your-anthropic-key",api_base="http://0.0.0.0:8000").complete('Paul Graham is ') +print(response) +``` +