From 20236c1c69324e8a7a8e9ebda7d62787d076ce22 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 1 Jan 2024 12:40:12 +0530 Subject: [PATCH] (docs) proxy --- docs/my-website/docs/proxy/logging.md | 279 +++++++++--------- .../docs/proxy/streaming_logging.md | 2 +- docs/my-website/docs/proxy/users.md | 8 +- docs/my-website/sidebars.js | 4 +- 4 files changed, 147 insertions(+), 146 deletions(-) diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 55fe6b8e2..bfbc280db 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -3,7 +3,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Logging - Custom Callbacks, OpenTelemetry, Langfuse, Sentry +# Logging - Custom Callbacks, Langfuse, OpenTelemetry, Sentry Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB @@ -290,6 +290,145 @@ ModelResponse( ``` +## Logging Proxy Input/Output - Langfuse +We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse + +**Step 1** Install langfuse + +```shell +pip install langfuse>=2.0.0 +``` + +**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo +litellm_settings: + success_callback: ["langfuse"] +``` + +**Step 3**: Start the proxy, make a test request + +Start proxy +```shell +litellm --config config.yaml --debug +``` + +Test Request +``` +litellm --test +``` + +Expected output on Langfuse + + + +### Logging Metadata to Langfuse + + + + + + +Pass `metadata` as part of the request body + +```shell +curl --location 'http://0.0.0.0:8000/chat/completions' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ], + "metadata": { + "generation_name": "ishaan-test-generation", + "generation_id": "gen-id22", + "trace_id": "trace-id22", + "trace_user_id": "user-id2" + } +}' +``` + + + +Set `extra_body={"metadata": { }}` to `metadata` you want to pass + +```python +import openai +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:8000" +) + +# request sent to model set on litellm proxy, `litellm --model` +response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } + ], + extra_body={ + "metadata": { + "generation_name": "ishaan-generation-openai-client", + "generation_id": "openai-client-gen-id22", + "trace_id": "openai-client-trace-id22", + "trace_user_id": "openai-client-user-id2" + } + } +) + +print(response) +``` + + + +```python +from langchain.chat_models import ChatOpenAI +from langchain.prompts.chat import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +) +from langchain.schema import HumanMessage, SystemMessage + +chat = ChatOpenAI( + openai_api_base="http://0.0.0.0:8000", + model = "gpt-3.5-turbo", + temperature=0.1, + extra_body={ + "metadata": { + "generation_name": "ishaan-generation-langchain-client", + "generation_id": "langchain-client-gen-id22", + "trace_id": "langchain-client-trace-id22", + "trace_user_id": "langchain-client-user-id2" + } + } +) + +messages = [ + SystemMessage( + content="You are a helpful assistant that im using to make a test request to." + ), + HumanMessage( + content="test from litellm. tell me why it's amazing in 1 sentence" + ), +] +response = chat(messages) + +print(response) +``` + + + + + ## OpenTelemetry - Traceloop Traceloop allows you to log LLM Input/Output in the OpenTelemetry format @@ -458,144 +597,6 @@ Here's the log view on Elastic Search. You can see the request `input`, `output` --> -## Logging Proxy Input/Output - Langfuse -We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse - -**Step 1** Install langfuse - -```shell -pip install langfuse>=2.0.0 -``` - -**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` -```yaml -model_list: - - model_name: gpt-3.5-turbo - litellm_params: - model: gpt-3.5-turbo -litellm_settings: - success_callback: ["langfuse"] -``` - -**Step 3**: Start the proxy, make a test request - -Start proxy -```shell -litellm --config config.yaml --debug -``` - -Test Request -``` -litellm --test -``` - -Expected output on Langfuse - - - -### Logging Metadata to Langfuse - - - - - - -Pass `metadata` as part of the request body - -```shell -curl --location 'http://0.0.0.0:8000/chat/completions' \ - --header 'Content-Type: application/json' \ - --data '{ - "model": "gpt-3.5-turbo", - "messages": [ - { - "role": "user", - "content": "what llm are you" - } - ], - "metadata": { - "generation_name": "ishaan-test-generation", - "generation_id": "gen-id22", - "trace_id": "trace-id22", - "trace_user_id": "user-id2" - } -}' -``` - - - -Set `extra_body={"metadata": { }}` to `metadata` you want to pass - -```python -import openai -client = openai.OpenAI( - api_key="anything", - base_url="http://0.0.0.0:8000" -) - -# request sent to model set on litellm proxy, `litellm --model` -response = client.chat.completions.create( - model="gpt-3.5-turbo", - messages = [ - { - "role": "user", - "content": "this is a test request, write a short poem" - } - ], - extra_body={ - "metadata": { - "generation_name": "ishaan-generation-openai-client", - "generation_id": "openai-client-gen-id22", - "trace_id": "openai-client-trace-id22", - "trace_user_id": "openai-client-user-id2" - } - } -) - -print(response) -``` - - - -```python -from langchain.chat_models import ChatOpenAI -from langchain.prompts.chat import ( - ChatPromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -) -from langchain.schema import HumanMessage, SystemMessage - -chat = ChatOpenAI( - openai_api_base="http://0.0.0.0:8000", - model = "gpt-3.5-turbo", - temperature=0.1, - extra_body={ - "metadata": { - "generation_name": "ishaan-generation-langchain-client", - "generation_id": "langchain-client-gen-id22", - "trace_id": "langchain-client-trace-id22", - "trace_user_id": "langchain-client-user-id2" - } - } -) - -messages = [ - SystemMessage( - content="You are a helpful assistant that im using to make a test request to." - ), - HumanMessage( - content="test from litellm. tell me why it's amazing in 1 sentence" - ), -] -response = chat(messages) - -print(response) -``` - - - - ## Logging Proxy Input/Output - DynamoDB We will use the `--config` to set diff --git a/docs/my-website/docs/proxy/streaming_logging.md b/docs/my-website/docs/proxy/streaming_logging.md index 951817f08..48d813b50 100644 --- a/docs/my-website/docs/proxy/streaming_logging.md +++ b/docs/my-website/docs/proxy/streaming_logging.md @@ -1,4 +1,4 @@ -# [Tutorial] Streaming token usage Logging +# Track Token Usage (Streaming) ### Step 1 - Create your custom `litellm` callback class We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)** diff --git a/docs/my-website/docs/proxy/users.md b/docs/my-website/docs/proxy/users.md index a8fdbc749..bce596a5b 100644 --- a/docs/my-website/docs/proxy/users.md +++ b/docs/my-website/docs/proxy/users.md @@ -1,4 +1,4 @@ -# Set Budgets + Rate Limits per user +# 💰 Budgets, Rate Limits per user Requirements: @@ -10,7 +10,7 @@ LiteLLM exposes a `/user/new` endpoint to create budgets for users, that persist This is documented in the swagger (live on your server root endpoint - e.g. `http://0.0.0.0:8000/`). Here's an example request. -```curl +```shell curl --location 'http://localhost:8000/user/new' \ --header 'Authorization: Bearer ' \ --header 'Content-Type: application/json' \ @@ -20,7 +20,7 @@ The request is a normal `/key/generate` request body + a `max_budget` field. **Sample Response** -```curl +```shell { "key": "sk-YF2OxDbrgd1y2KgwxmEA2w", "expires": "2023-12-22T09:53:13.861000Z", @@ -34,7 +34,7 @@ The request is a normal `/key/generate` request body + a `max_budget` field. Set max parallel requests a user can make, when you create user keys - `/key/generate`. -```bash +```shell curl --location 'http://0.0.0.0:8000/key/generate' \ --header 'Authorization: Bearer sk-1234' \ --header 'Content-Type: application/json' \ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index a672bb7b6..d53a39d6c 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -111,10 +111,10 @@ const sidebars = { "proxy/health", "proxy/call_hooks", "proxy/caching", - "proxy/streaming_logging", "proxy/logging", - "proxy/cli", + "proxy/streaming_logging", "proxy/deploy", + "proxy/cli", ] }, "routing",