From fc0ced48c1a3c695186b1d433bb2d99234d6c67e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Est=C3=A9vez?= Date: Fri, 3 May 2024 23:38:54 -0400 Subject: [PATCH 01/32] add_function_to_prompt bug fix This blows up when there's no "functions" in the dictionary even when tools is present because the inner function executes regardless (does not short circuit). --- litellm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index ac8ec35d4..75d6f8b7f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4956,7 +4956,7 @@ def get_optional_params( litellm.add_function_to_prompt ): # if user opts to add it to prompt instead optional_params["functions_unsupported_model"] = non_default_params.pop( - "tools", non_default_params.pop("functions") + "tools", non_default_params.pop("functions", None) ) else: raise UnsupportedParamsError( From 3acad270e57d446673137edf9409ab5b773661d5 Mon Sep 17 00:00:00 2001 From: Mehmet Bektas Date: Sun, 5 May 2024 19:44:25 -0700 Subject: [PATCH 02/32] support sync ollama embeddings --- litellm/llms/ollama.py | 20 ++++++++++++++++++++ litellm/main.py | 20 ++++++++++---------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index 5972d9e8c..f4c06dbe6 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -417,3 +417,23 @@ async def ollama_aembeddings( "total_tokens": total_input_tokens, } return model_response + +def ollama_embeddings( + api_base: str, + model: str, + prompts: list, + optional_params=None, + logging_obj=None, + model_response=None, + encoding=None, +): + return asyncio.run( + ollama_aembeddings( + api_base, + model, + prompts, + optional_params, + logging_obj, + model_response, + encoding) + ) diff --git a/litellm/main.py b/litellm/main.py index 8717af570..9aaf30f05 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2946,16 +2946,16 @@ def embedding( model=model, # type: ignore llm_provider="ollama", # type: ignore ) - if aembedding: - response = ollama.ollama_aembeddings( - api_base=api_base, - model=model, - prompts=input, - encoding=encoding, - logging_obj=logging, - optional_params=optional_params, - model_response=EmbeddingResponse(), - ) + ollama_embeddings_fn = ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings + response = ollama_embeddings_fn( + api_base=api_base, + model=model, + prompts=input, + encoding=encoding, + logging_obj=logging, + optional_params=optional_params, + model_response=EmbeddingResponse(), + ) elif custom_llm_provider == "sagemaker": response = sagemaker.embedding( model=model, From 64a64c68c5d2e511cd1c8db1b7840f09cffca12d Mon Sep 17 00:00:00 2001 From: Mehmet Bektas Date: Sun, 5 May 2024 20:25:43 -0700 Subject: [PATCH 03/32] update ollama test file --- litellm/tests/test_ollama_local.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/litellm/tests/test_ollama_local.py b/litellm/tests/test_ollama_local.py index d4dbc7341..f5d629140 100644 --- a/litellm/tests/test_ollama_local.py +++ b/litellm/tests/test_ollama_local.py @@ -24,6 +24,14 @@ # asyncio.run(test_ollama_aembeddings()) +# def test_ollama_embeddings(): +# litellm.set_verbose = True +# input = "The food was delicious and the waiter..." +# response = litellm.embedding(model="ollama/mistral", input=input) +# print(response) + +# test_ollama_embeddings() + # def test_ollama_streaming(): # try: # litellm.set_verbose = False From 157d7a7c28caac70917c8d101636bba0d76afdd5 Mon Sep 17 00:00:00 2001 From: Mehmet Bektas Date: Mon, 6 May 2024 20:11:45 -0700 Subject: [PATCH 04/32] add ollama embeddings unit tests --- litellm/tests/test_ollama.py | 52 ++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/litellm/tests/test_ollama.py b/litellm/tests/test_ollama.py index 82ec16f0e..cc33ea993 100644 --- a/litellm/tests/test_ollama.py +++ b/litellm/tests/test_ollama.py @@ -1,3 +1,4 @@ +import asyncio import sys, os import traceback from dotenv import load_dotenv @@ -10,10 +11,10 @@ sys.path.insert( ) # Adds the parent directory to the system path import pytest import litellm - +from unittest import mock ## for ollama we can't test making the completion call -from litellm.utils import get_optional_params, get_llm_provider +from litellm.utils import EmbeddingResponse, get_optional_params, get_llm_provider def test_get_ollama_params(): @@ -58,3 +59,50 @@ def test_ollama_json_mode(): except Exception as e: pytest.fail(f"Error occurred: {e}") # test_ollama_json_mode() + + +mock_ollama_embedding_response = EmbeddingResponse(model="ollama/nomic-embed-text") + +@mock.patch( + "litellm.llms.ollama.ollama_embeddings", + return_value=mock_ollama_embedding_response, +) +def test_ollama_embeddings(mock_embeddings): + # assert that ollama_embeddings is called with the right parameters + try: + embeddings = litellm.embedding(model="ollama/nomic-embed-text", input=["hello world"]) + print(embeddings) + mock_embeddings.assert_called_once_with( + api_base="http://localhost:11434", + model="nomic-embed-text", + prompts=["hello world"], + optional_params=mock.ANY, + logging_obj=mock.ANY, + model_response=mock.ANY, + encoding=mock.ANY, + ) + except Exception as e: + pytest.fail(f"Error occurred: {e}") +test_ollama_embeddings() + +@mock.patch( + "litellm.llms.ollama.ollama_aembeddings", + return_value=mock_ollama_embedding_response, +) +def test_ollama_aembeddings(mock_aembeddings): + # assert that ollama_aembeddings is called with the right parameters + try: + embeddings = asyncio.run(litellm.aembedding(model="ollama/nomic-embed-text", input=["hello world"])) + print(embeddings) + mock_aembeddings.assert_called_once_with( + api_base="http://localhost:11434", + model="nomic-embed-text", + prompts=["hello world"], + optional_params=mock.ANY, + logging_obj=mock.ANY, + model_response=mock.ANY, + encoding=mock.ANY, + ) + except Exception as e: + pytest.fail(f"Error occurred: {e}") +test_ollama_aembeddings() From 8e9a4fa9eb97068354d182c2de3add7c2fccf782 Mon Sep 17 00:00:00 2001 From: Mehmet Bektas Date: Mon, 6 May 2024 20:13:11 -0700 Subject: [PATCH 05/32] comment out test method calls, following the pattern --- litellm/tests/test_ollama.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/tests/test_ollama.py b/litellm/tests/test_ollama.py index cc33ea993..77a6c91c3 100644 --- a/litellm/tests/test_ollama.py +++ b/litellm/tests/test_ollama.py @@ -83,7 +83,7 @@ def test_ollama_embeddings(mock_embeddings): ) except Exception as e: pytest.fail(f"Error occurred: {e}") -test_ollama_embeddings() +# test_ollama_embeddings() @mock.patch( "litellm.llms.ollama.ollama_aembeddings", @@ -105,4 +105,4 @@ def test_ollama_aembeddings(mock_aembeddings): ) except Exception as e: pytest.fail(f"Error occurred: {e}") -test_ollama_aembeddings() +# test_ollama_aembeddings() From 7d7b59ff78a44c9375167dabb79bb16b56a17c91 Mon Sep 17 00:00:00 2001 From: nkvch Date: Mon, 6 May 2024 16:59:13 +0200 Subject: [PATCH 06/32] * feat(factory.py): add support for merging consecutive messages of one role when separated with empty message of another role --- litellm/llms/prompt_templates/factory.py | 62 ++++++++++++++---------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 082030368..6e8589d58 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1,27 +1,23 @@ -from enum import Enum -import requests, traceback -import json, re, xml.etree.ElementTree as ET -from jinja2 import Template, exceptions, meta, BaseLoader -from jinja2.sandbox import ImmutableSandboxedEnvironment -from typing import ( - Any, - List, - Mapping, - MutableMapping, - Optional, - Sequence, -) -import litellm -from litellm.types.completion import ( - ChatCompletionUserMessageParam, - ChatCompletionSystemMessageParam, - ChatCompletionMessageParam, - ChatCompletionFunctionMessageParam, - ChatCompletionMessageToolCallParam, - ChatCompletionToolMessageParam, -) -from litellm.types.llms.anthropic import * +import json +import re +import traceback import uuid +import xml.etree.ElementTree as ET +from enum import Enum +from typing import Any, List, Mapping, MutableMapping, Optional, Sequence + +import requests +from jinja2 import BaseLoader, Template, exceptions, meta +from jinja2.sandbox import ImmutableSandboxedEnvironment + +import litellm +from litellm.types.completion import (ChatCompletionFunctionMessageParam, + ChatCompletionMessageParam, + ChatCompletionMessageToolCallParam, + ChatCompletionSystemMessageParam, + ChatCompletionToolMessageParam, + ChatCompletionUserMessageParam) +from litellm.types.llms.anthropic import * def default_pt(messages): @@ -603,9 +599,10 @@ def construct_tool_use_system_prompt( def convert_url_to_base64(url): - import requests import base64 + import requests + for _ in range(3): try: response = requests.get(url) @@ -984,6 +981,7 @@ def anthropic_messages_pt(messages: list): new_messages = [] msg_i = 0 tool_use_param = False + merge_with_previous = False while msg_i < len(messages): user_content = [] init_msg_i = msg_i @@ -1016,7 +1014,13 @@ def anthropic_messages_pt(messages: list): msg_i += 1 if user_content: - new_messages.append({"role": "user", "content": user_content}) + if merge_with_previous: + new_messages[-1]["content"].extend(user_content) + merge_with_previous = False + else: + new_messages.append({"role": "user", "content": user_content}) + else: + merge_with_previous = True assistant_content = [] ## MERGE CONSECUTIVE ASSISTANT CONTENT ## @@ -1044,7 +1048,13 @@ def anthropic_messages_pt(messages: list): msg_i += 1 if assistant_content: - new_messages.append({"role": "assistant", "content": assistant_content}) + if merge_with_previous: + new_messages[-1]["content"].extend(assistant_content) + merge_with_previous = False + else: + new_messages.append({"role": "assistant", "content": assistant_content}) + else: + merge_with_previous = True if msg_i == init_msg_i: # prevent infinite loops raise Exception( From 389530efb4aca53048d84d61766b487d8ed1b192 Mon Sep 17 00:00:00 2001 From: nkvch Date: Tue, 7 May 2024 12:45:16 +0200 Subject: [PATCH 07/32] * chore(.gitignore): add 'venv' to the list of ignored files/directories * fix(test_completion.py): fix import order and remove unused imports * feat(test_completion.py): add test for empty assistant message in completion_claude_3_empty_message() --- .gitignore | 1 + litellm/tests/test_completion.py | 38 ++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index abc4ecb0c..4f3f65b93 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .venv +venv .env litellm_uuid.txt __pycache__/ diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 3d0907814..54f040cfa 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1,17 +1,21 @@ -import sys, os +import os +import sys import traceback + from dotenv import load_dotenv load_dotenv() -import os, io +import io +import os sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the, system path import pytest + import litellm -from litellm import embedding, completion, completion_cost, Timeout -from litellm import RateLimitError +from litellm import (RateLimitError, Timeout, completion, completion_cost, + embedding) from litellm.llms.prompt_templates.factory import anthropic_messages_pt # litellm.num_retries=3 @@ -163,6 +167,32 @@ def test_completion_claude_3(): pytest.fail(f"Error occurred: {e}") +def test_completion_claude_3_empty_message(): + litellm.set_verbose = True + messages = [{'role': 'user', 'content': 'please create a logo for a modern AI app. create in SVG format'}, + {'role': 'assistant', 'content': "To create a logo for a modern AI app in SVG format, I'll use the DALL-E 3 Image Generator."}, + {'role': 'user', 'content': 'output SVG'}, + {'role': 'assistant', 'content': 'To generate a logo for a modern AI app in SVG format using DALL-E 3, I need to:\n1. Craft a detailed prompt describing the desired logo style and elements\n2. Specify the image size (SVG is vector-based, so size is less relevant)\n3. Call the generate_image function with the prompt and size\n4. Display the generated SVG logo using the provided syntax\nThe prompt should include keywords related to AI, modern design, and SVG format. Some key elements to incorporate could be a brain symbol, circuit lines, or a robot icon, using a minimalist style and a blue color scheme often associated with technology and intelligence.', + 'tool_calls': [ + {'id': 'toolu_01KEUtRVySSeMrf3g7rCA12E', 'type': 'function', 'function': {'name': 'python_tool', 'arguments': '{"code": "...python code..."}'}} + ]}, + {'role': 'tool', 'content': '...python output...', 'tool_call_id': 'toolu_01KEUtRVySSeMrf3g7rCA12E'}, + {'role': 'assistant', 'content': ''}, # empty message appended by model after tool call response + {'role': 'user', 'content': 'write SVG source youself!'}, + ] + + try: + response = completion( + model="anthropic/claude-3-opus-20240229", + messages=messages, + stream=True, + tools=[{'type': 'function', 'function': {'name': 'python_tool', 'description': 'Execute code', 'parameters': {'type': 'object', 'properties': {'headline': {'description': 'Must have. Title of this tool call (maximum 15 characters).', 'type': 'string'}, 'code': {'description': 'Python code to execute.', 'type': 'string'}}, 'required': ['code', 'headline']}}}] + ) + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_completion_claude_3_function_call(): litellm.set_verbose = True tools = [ From 06d1aec49ef6f30601ebf6043b8af8de071d005d Mon Sep 17 00:00:00 2001 From: nkvch Date: Tue, 7 May 2024 13:42:42 +0200 Subject: [PATCH 08/32] * fix(factory.py): fix conditional statement for merging messages with previous ones --- litellm/llms/prompt_templates/factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 6e8589d58..bce472ea0 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1019,7 +1019,7 @@ def anthropic_messages_pt(messages: list): merge_with_previous = False else: new_messages.append({"role": "user", "content": user_content}) - else: + elif msg_i > 0: merge_with_previous = True assistant_content = [] @@ -1053,7 +1053,7 @@ def anthropic_messages_pt(messages: list): merge_with_previous = False else: new_messages.append({"role": "assistant", "content": assistant_content}) - else: + elif msg_i > 0: merge_with_previous = True if msg_i == init_msg_i: # prevent infinite loops From 1b811cd1529448c563ac11cb83d50eb1011816eb Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 May 2024 13:24:28 -0400 Subject: [PATCH 09/32] unit test and list fix --- .../test_get_optional_params_functions_not_supported.py | 9 +++++++++ litellm/utils.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 litellm/tests/test_get_optional_params_functions_not_supported.py diff --git a/litellm/tests/test_get_optional_params_functions_not_supported.py b/litellm/tests/test_get_optional_params_functions_not_supported.py new file mode 100644 index 000000000..2abfbc41f --- /dev/null +++ b/litellm/tests/test_get_optional_params_functions_not_supported.py @@ -0,0 +1,9 @@ +import litellm +from litellm import get_optional_params + +litellm.add_function_to_prompt = True +optional_params = get_optional_params( + tools= [{'type': 'function', 'function': {'description': 'Get the current weather in a given location', 'name': 'get_current_weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit']}}, 'required': ['location']}}}], + tool_choice= 'auto', +) +assert optional_params is not None \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 75d6f8b7f..c0241f1c3 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5769,7 +5769,7 @@ def get_optional_params( optional_params["extra_body"] = extra_body else: # if user passed in non-default kwargs for specific providers/models, pass them along - for k in passed_params.keys(): + for k in list(passed_params.keys()): if k not in default_params.keys(): optional_params[k] = passed_params[k] print_verbose(f"Final returned optional params: {optional_params}") From 7c5c9a8152762d12fd515c163d42f9a30e79581d Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 May 2024 13:41:05 -0400 Subject: [PATCH 10/32] looks like cohere does support function calling --- litellm/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/utils.py b/litellm/utils.py index c0241f1c3..75476edc7 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4932,6 +4932,7 @@ def get_optional_params( and custom_llm_provider != "mistral" and custom_llm_provider != "anthropic" and custom_llm_provider != "cohere_chat" + and custom_llm_provider != "cohere" and custom_llm_provider != "bedrock" and custom_llm_provider != "ollama_chat" ): From 90eb0ea022792a95a1d613a8cbc0681c3aa01ce8 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 7 May 2024 11:44:03 -0700 Subject: [PATCH 11/32] Added support for the deepseek api --- README.md | 1 + docs/my-website/docs/providers/deepseek.md | 54 ++++++++++++++++++++++ docs/my-website/sidebars.js | 1 + litellm/__init__.py | 3 ++ litellm/main.py | 6 ++- litellm/utils.py | 51 +++++++++++++++++++- 6 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 docs/my-website/docs/providers/deepseek.md diff --git a/README.md b/README.md index 9344c0f22..684d5de73 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,7 @@ curl 'http://0.0.0.0:4000/key/generate' \ | [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | +| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | diff --git a/docs/my-website/docs/providers/deepseek.md b/docs/my-website/docs/providers/deepseek.md new file mode 100644 index 000000000..678561eca --- /dev/null +++ b/docs/my-website/docs/providers/deepseek.md @@ -0,0 +1,54 @@ +# Deepseek +https://deepseek.com/ + +**We support ALL Deepseek models, just set `deepseek/` as a prefix when sending completion requests** + +## API Key +```python +# env variable +os.environ['DEEPSEEK_API_KEY'] +``` + +## Sample Usage +```python +from litellm import completion +import os + +os.environ['DEEPSEEK_API_KEY'] = "" +response = completion( + model="deepseek/deepseek-chat", + messages=[ + {"role": "user", "content": "hello from litellm"} + ], +) +print(response) +``` + +## Sample Usage - Streaming +```python +from litellm import completion +import os + +os.environ['DEEPSEEK_API_KEY'] = "" +response = completion( + model="deepseek/deepseek-chat", + messages=[ + {"role": "user", "content": "hello from litellm"} + ], + stream=True +) + +for chunk in response: + print(chunk) +``` + + +## Supported Models - ALL Deepseek Models Supported! +We support ALL Deepseek models, just set `deepseek/` as a prefix when sending completion requests + +| Model Name | Function Call | +|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| deepseek-chat | `completion(model="deepseek/deepseek-chat", messages)` | +| deepseek-coder | `completion(model="deepseek/deepseek-chat", messages)` | + + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index f5777d6e7..d00d853a0 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -134,6 +134,7 @@ const sidebars = { "providers/ollama", "providers/perplexity", "providers/groq", + "providers/deepseek", "providers/fireworks_ai", "providers/vllm", "providers/xinference", diff --git a/litellm/__init__.py b/litellm/__init__.py index 5e745668b..4f72504f6 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -361,6 +361,7 @@ openai_compatible_endpoints: List = [ "api.deepinfra.com/v1/openai", "api.mistral.ai/v1", "api.groq.com/openai/v1", + "api.deepseek.com/v1", "api.together.xyz/v1", ] @@ -369,6 +370,7 @@ openai_compatible_providers: List = [ "anyscale", "mistral", "groq", + "deepseek", "deepinfra", "perplexity", "xinference", @@ -523,6 +525,7 @@ provider_list: List = [ "anyscale", "mistral", "groq", + "deepseek", "maritalk", "voyage", "cloudflare", diff --git a/litellm/main.py b/litellm/main.py index de35dbfd0..acd7f9b90 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -305,6 +305,7 @@ async def acompletion( or custom_llm_provider == "deepinfra" or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" + or custom_llm_provider == "deepseek" or custom_llm_provider == "text-completion-openai" or custom_llm_provider == "huggingface" or custom_llm_provider == "ollama" @@ -982,6 +983,7 @@ def completion( or custom_llm_provider == "deepinfra" or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" + or custom_llm_provider == "deepseek" or custom_llm_provider == "anyscale" or custom_llm_provider == "mistral" or custom_llm_provider == "openai" @@ -2168,7 +2170,7 @@ def completion( """ assume input to custom LLM api bases follow this format: resp = requests.post( - api_base, + api_base, json={ 'model': 'meta-llama/Llama-2-13b-hf', # model name 'params': { @@ -2565,6 +2567,7 @@ async def aembedding(*args, **kwargs): or custom_llm_provider == "deepinfra" or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" + or custom_llm_provider == "deepseek" or custom_llm_provider == "fireworks_ai" or custom_llm_provider == "ollama" or custom_llm_provider == "vertex_ai" @@ -3085,6 +3088,7 @@ async def atext_completion(*args, **kwargs): or custom_llm_provider == "deepinfra" or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" + or custom_llm_provider == "deepseek" or custom_llm_provider == "fireworks_ai" or custom_llm_provider == "text-completion-openai" or custom_llm_provider == "huggingface" diff --git a/litellm/utils.py b/litellm/utils.py index a938a0ba8..0b4aa0e87 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4929,6 +4929,7 @@ def get_optional_params( and custom_llm_provider != "anyscale" and custom_llm_provider != "together_ai" and custom_llm_provider != "groq" + and custom_llm_provider != "deepseek" and custom_llm_provider != "mistral" and custom_llm_provider != "anthropic" and custom_llm_provider != "cohere_chat" @@ -5614,6 +5615,29 @@ def get_optional_params( if seed is not None: optional_params["seed"] = seed + elif custom_llm_provider == "deepseek": + supported_params = get_supported_openai_params( + model=model, custom_llm_provider=custom_llm_provider + ) + _check_valid_arg(supported_params=supported_params) + + if frequency_penalty is not None: + optional_params["frequency_penalty"] = frequency_penalty + if max_tokens is not None: + optional_params["max_tokens"] = max_tokens + if presence_penalty is not None: + optional_params["presence_penalty"] = presence_penalty + if stop is not None: + optional_params["stop"] = stop + if stream is not None: + optional_params["stream"] = stream + if temperature is not None: + optional_params["temperature"] = temperature + if logprobs is not None: + optional_params["logprobs"] = logprobs + if top_logprobs is not None: + optional_params["top_logprobs"] = top_logprobs + elif custom_llm_provider == "openrouter": supported_params = get_supported_openai_params( model=model, custom_llm_provider=custom_llm_provider @@ -5946,6 +5970,19 @@ def get_supported_openai_params(model: str, custom_llm_provider: str): "response_format", "seed", ] + elif custom_llm_provider == "deepseek": + return [ + # https://platform.deepseek.com/api-docs/api/create-chat-completion + "frequency_penalty", + "max_tokens", + "presence_penalty", + "stop", + "stream", + "temperature", + "top_p", + "logprobs", + "top_logprobs", + ] elif custom_llm_provider == "cohere": return [ "stream", @@ -6239,8 +6276,12 @@ def get_llm_provider( # groq is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.groq.com/openai/v1 api_base = "https://api.groq.com/openai/v1" dynamic_api_key = get_secret("GROQ_API_KEY") + elif custom_llm_provider == "deepseek": + # deepseek is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.deepseek.com/v1 + api_base = "https://api.deepseek.com/v1" + dynamic_api_key = get_secret("DEEPSEEK_API_KEY") elif custom_llm_provider == "fireworks_ai": - # fireworks is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.groq.com/openai/v1 + # fireworks is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.fireworks.ai/inference/v1 if not model.startswith("accounts/fireworks/models"): model = f"accounts/fireworks/models/{model}" api_base = "https://api.fireworks.ai/inference/v1" @@ -6303,6 +6344,9 @@ def get_llm_provider( elif endpoint == "api.groq.com/openai/v1": custom_llm_provider = "groq" dynamic_api_key = get_secret("GROQ_API_KEY") + elif endpoint == "api.deepseek.com/v1": + custom_llm_provider = "deepseek" + dynamic_api_key = get_secret("DEEPSEEK_API_KEY") return model, custom_llm_provider, dynamic_api_key, api_base # check if model in known model provider list -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.) @@ -6901,6 +6945,11 @@ def validate_environment(model: Optional[str] = None) -> dict: keys_in_environment = True else: missing_keys.append("GROQ_API_KEY") + elif custom_llm_provider == "deepseek": + if "DEEPSEEK_API_KEY" in os.environ: + keys_in_environment = True + else: + missing_keys.append("DEEPSEEK_API_KEY") elif custom_llm_provider == "mistral": if "MISTRAL_API_KEY" in os.environ: keys_in_environment = True From 9162f9c2c5410235de79b1a47cd94ce9e50fb122 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 7 May 2024 11:44:55 -0700 Subject: [PATCH 12/32] Added costs & context json --- model_prices_and_context_window.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index a6c233b99..10c70a858 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -739,6 +739,24 @@ "litellm_provider": "mistral", "mode": "embedding" }, + "deepseek-chat": { + "max_tokens": 4096, + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "deepseek", + "mode": "chat" + }, + "deepseek-coder": { + "max_tokens": 4096, + "max_input_tokens": 16000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "deepseek", + "mode": "chat" + }, "groq/llama2-70b-4096": { "max_tokens": 4096, "max_input_tokens": 4096, From 82a4c68e6075c41b9c673c1e7d68b9af16c845cb Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 7 May 2024 11:58:05 -0700 Subject: [PATCH 13/32] Added deepseek completion test --- litellm/tests/test_completion.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 3d0907814..471ebfee6 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1061,16 +1061,16 @@ def test_completion_perplexity_api_2(): ######### HUGGING FACE TESTS ######################## ##################################################### """ -HF Tests we should pass -- TGI: - - Pro Inference API - - Deployed Endpoint -- Coversational - - Free Inference API - - Deployed Endpoint +HF Tests we should pass +- TGI: + - Pro Inference API + - Deployed Endpoint +- Coversational + - Free Inference API + - Deployed Endpoint - Neither TGI or Coversational - - Free Inference API - - Deployed Endpoint + - Free Inference API + - Deployed Endpoint """ @@ -3016,6 +3016,20 @@ async def test_acompletion_gemini(): else: pytest.fail(f"Error occurred: {e}") +# Deepseek tests +def test_completion_deepseek(): + litellm.set_verbose = True + model_name = "deepseek/deepseek-chat" + messages = [{"role": "user", "content": "Hey, how's it going?"}] + try: + response = completion(model=model_name, messages=messages) + # Add any assertions here to check the response + print(response) + except litellm.APIError as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + # Palm tests def test_completion_palm(): From 4c64e3da1062c5c8438cbda5433b6253ae3314dc Mon Sep 17 00:00:00 2001 From: phact Date: Tue, 7 May 2024 14:58:35 -0400 Subject: [PATCH 14/32] locals().copy() --- litellm/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index 75476edc7..db48d3617 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4836,7 +4836,7 @@ def get_optional_params( **kwargs, ): # retrieve all parameters passed to the function - passed_params = locals() + passed_params = locals().copy() special_params = passed_params.pop("kwargs") for k, v in special_params.items(): if k.startswith("aws_") and ( @@ -5770,7 +5770,7 @@ def get_optional_params( optional_params["extra_body"] = extra_body else: # if user passed in non-default kwargs for specific providers/models, pass them along - for k in list(passed_params.keys()): + for k in passed_params.keys(): if k not in default_params.keys(): optional_params[k] = passed_params[k] print_verbose(f"Final returned optional params: {optional_params}") From 872470ff1f301cc0cc0b1c70785c806ba6af5388 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 7 May 2024 12:58:49 -0700 Subject: [PATCH 15/32] feat(slack_alerting.py): reintegrate langfuse trace url for slack alerts this ensures langfuse trace url returned in llm api exception err --- litellm/integrations/langfuse.py | 37 ++++++++++------- litellm/integrations/slack_alerting.py | 35 ++++++++++------ litellm/proxy/_super_secret_config.yaml | 5 ++- litellm/proxy/proxy_server.py | 53 +++++++++++++++++-------- litellm/proxy/utils.py | 19 ++++++++- 5 files changed, 101 insertions(+), 48 deletions(-) diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index fa8b0c61d..caf5437b2 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -262,7 +262,7 @@ class LangFuseLogger: try: tags = [] - metadata = copy.deepcopy(metadata) # Avoid modifying the original metadata + metadata = copy.deepcopy(metadata) # Avoid modifying the original metadata supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3") supports_prompt = Version(langfuse.version.__version__) >= Version("2.7.3") supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3") @@ -276,7 +276,6 @@ class LangFuseLogger: metadata_tags = metadata.pop("tags", []) tags = metadata_tags - # Clean Metadata before logging - never log raw metadata # the raw metadata can contain circular references which leads to infinite recursion # we clean out all extra litellm metadata params before logging @@ -303,18 +302,17 @@ class LangFuseLogger: else: clean_metadata[key] = value - session_id = clean_metadata.pop("session_id", None) trace_name = clean_metadata.pop("trace_name", None) trace_id = clean_metadata.pop("trace_id", None) existing_trace_id = clean_metadata.pop("existing_trace_id", None) update_trace_keys = clean_metadata.pop("update_trace_keys", []) - + if trace_name is None and existing_trace_id is None: # just log `litellm-{call_type}` as the trace name ## DO NOT SET TRACE_NAME if trace-id set. this can lead to overwriting of past traces. trace_name = f"litellm-{kwargs.get('call_type', 'completion')}" - + if existing_trace_id is not None: trace_params = {"id": existing_trace_id} @@ -322,15 +320,18 @@ class LangFuseLogger: for metadata_param_key in update_trace_keys: trace_param_key = metadata_param_key.replace("trace_", "") if trace_param_key not in trace_params: - updated_trace_value = clean_metadata.pop(metadata_param_key, None) + updated_trace_value = clean_metadata.pop( + metadata_param_key, None + ) if updated_trace_value is not None: trace_params[trace_param_key] = updated_trace_value - # Pop the trace specific keys that would have been popped if there were a new trace - for key in list(filter(lambda key: key.startswith("trace_"), clean_metadata.keys())): + for key in list( + filter(lambda key: key.startswith("trace_"), clean_metadata.keys()) + ): clean_metadata.pop(key, None) - + # Special keys that are found in the function arguments and not the metadata if "input" in update_trace_keys: trace_params["input"] = input @@ -342,16 +343,22 @@ class LangFuseLogger: "name": trace_name, "session_id": session_id, "input": input, - "version": clean_metadata.pop("trace_version", clean_metadata.get("version", None)), # If provided just version, it will applied to the trace as well, if applied a trace version it will take precedence + "version": clean_metadata.pop( + "trace_version", clean_metadata.get("version", None) + ), # If provided just version, it will applied to the trace as well, if applied a trace version it will take precedence } - for key in list(filter(lambda key: key.startswith("trace_"), clean_metadata.keys())): - trace_params[key.replace("trace_", "")] = clean_metadata.pop(key, None) - + for key in list( + filter(lambda key: key.startswith("trace_"), clean_metadata.keys()) + ): + trace_params[key.replace("trace_", "")] = clean_metadata.pop( + key, None + ) + if level == "ERROR": trace_params["status_message"] = output else: trace_params["output"] = output - + cost = kwargs.get("response_cost", None) print_verbose(f"trace: {cost}") @@ -454,7 +461,7 @@ class LangFuseLogger: ) generation_client = trace.generation(**generation_params) - + return generation_client.trace_id, generation_id except Exception as e: verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}") diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 5546f7c33..f033d99cd 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -149,16 +149,21 @@ class SlackAlerting(CustomLogger): def _add_langfuse_trace_id_to_alert( self, - request_info: str, request_data: Optional[dict] = None, - kwargs: Optional[dict] = None, - type: Literal["hanging_request", "slow_response"] = "hanging_request", - start_time: Optional[datetime.datetime] = None, - end_time: Optional[datetime.datetime] = None, - ): + ) -> Optional[str]: + """ + Returns langfuse trace url + """ # do nothing for now - pass - return request_info + if ( + request_data is not None + and request_data.get("metadata", {}).get("trace_id", None) is not None + ): + trace_id = request_data["metadata"]["trace_id"] + if litellm.utils.langFuseLogger is not None: + base_url = litellm.utils.langFuseLogger.Langfuse.base_url + return f"{base_url}/trace/{trace_id}" + return None def _response_taking_too_long_callback_helper( self, @@ -501,14 +506,13 @@ class SlackAlerting(CustomLogger): ) if "langfuse" in litellm.success_callback: - request_info = self._add_langfuse_trace_id_to_alert( - request_info=request_info, + langfuse_url = self._add_langfuse_trace_id_to_alert( request_data=request_data, - type="hanging_request", - start_time=start_time, - end_time=end_time, ) + if langfuse_url is not None: + request_info += "\n🪢 Langfuse Trace: {}".format(langfuse_url) + # add deployment latencies to alert _deployment_latency_map = self._get_deployment_latencies_to_alert( metadata=request_data.get("metadata", {}) @@ -701,6 +705,7 @@ Model Info: "daily_reports", "new_model_added", ], + **kwargs, ): """ Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298 @@ -731,6 +736,10 @@ Model Info: formatted_message = ( f"Level: `{level}`\nTimestamp: `{current_time}`\n\nMessage: {message}" ) + + if kwargs: + for key, value in kwargs.items(): + formatted_message += f"\n\n{key}: `{value}`\n\n" if _proxy_base_url is not None: formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index 0475508e3..b7293a17f 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -14,6 +14,9 @@ model_list: api_key: my-fake-key-3 model: openai/my-fake-model-3 model_name: fake-openai-endpoint +- model_name: gpt-4 + litellm_params: + model: gpt-3.5-turbo router_settings: num_retries: 0 enable_pre_call_checks: true @@ -25,7 +28,7 @@ router_settings: routing_strategy: "latency-based-routing" litellm_settings: - success_callback: ["openmeter"] + success_callback: ["langfuse"] general_settings: alerting: ["slack"] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index c22b381e2..d6d27fc4c 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3160,7 +3160,9 @@ def data_generator(response): yield f"data: {json.dumps(chunk)}\n\n" -async def async_data_generator(response, user_api_key_dict): +async def async_data_generator( + response, user_api_key_dict: UserAPIKeyAuth, request_data: dict +): verbose_proxy_logger.debug("inside generator") try: start_time = time.time() @@ -3177,7 +3179,9 @@ async def async_data_generator(response, user_api_key_dict): except Exception as e: traceback.print_exc() await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e + user_api_key_dict=user_api_key_dict, + original_exception=e, + request_data=request_data, ) verbose_proxy_logger.debug( f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`" @@ -3202,8 +3206,14 @@ async def async_data_generator(response, user_api_key_dict): yield f"data: {error_returned}\n\n" -def select_data_generator(response, user_api_key_dict): - return async_data_generator(response=response, user_api_key_dict=user_api_key_dict) +def select_data_generator( + response, user_api_key_dict: UserAPIKeyAuth, request_data: dict +): + return async_data_generator( + response=response, + user_api_key_dict=user_api_key_dict, + request_data=request_data, + ) def get_litellm_model_info(model: dict = {}): @@ -3496,9 +3506,8 @@ async def chat_completion( user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): global general_settings, user_debug, proxy_logging_obj, llm_model_list + data = {} try: - # async with llm_router.sem - data = {} body = await request.body() body_str = body.decode() try: @@ -3689,7 +3698,9 @@ async def chat_completion( "x-litellm-model-api-base": api_base, } selected_data_generator = select_data_generator( - response=response, user_api_key_dict=user_api_key_dict + response=response, + user_api_key_dict=user_api_key_dict, + request_data=data, ) return StreamingResponse( selected_data_generator, @@ -3711,7 +3722,7 @@ async def chat_completion( data["litellm_status"] = "fail" # used for alerting traceback.print_exc() await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e + user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) verbose_proxy_logger.debug( f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`" @@ -3873,7 +3884,9 @@ async def completion( "x-litellm-model-id": model_id, } selected_data_generator = select_data_generator( - response=response, user_api_key_dict=user_api_key_dict + response=response, + user_api_key_dict=user_api_key_dict, + request_data=data, ) return StreamingResponse( @@ -3926,6 +3939,7 @@ async def embeddings( user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): global proxy_logging_obj + data: Any = {} try: # Use orjson to parse JSON data, orjson speeds up requests significantly body = await request.body() @@ -4071,7 +4085,7 @@ async def embeddings( except Exception as e: data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e + user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) traceback.print_exc() if isinstance(e, HTTPException): @@ -4108,6 +4122,7 @@ async def image_generation( user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): global proxy_logging_obj + data = {} try: # Use orjson to parse JSON data, orjson speeds up requests significantly body = await request.body() @@ -4227,7 +4242,7 @@ async def image_generation( except Exception as e: data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e + user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) traceback.print_exc() if isinstance(e, HTTPException): @@ -4268,10 +4283,11 @@ async def audio_transcriptions( https://platform.openai.com/docs/api-reference/audio/createTranscription?lang=curl """ global proxy_logging_obj + data: Dict = {} try: # Use orjson to parse JSON data, orjson speeds up requests significantly form_data = await request.form() - data: Dict = {key: value for key, value in form_data.items() if key != "file"} + data = {key: value for key, value in form_data.items() if key != "file"} # Include original request and headers in the data data["proxy_server_request"] = { # type: ignore @@ -4406,7 +4422,7 @@ async def audio_transcriptions( except Exception as e: data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e + user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) traceback.print_exc() if isinstance(e, HTTPException): @@ -4455,6 +4471,7 @@ async def moderations( ``` """ global proxy_logging_obj + data: Dict = {} try: # Use orjson to parse JSON data, orjson speeds up requests significantly body = await request.body() @@ -4568,7 +4585,7 @@ async def moderations( except Exception as e: data["litellm_status"] = "fail" # used for alerting await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e + user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) traceback.print_exc() if isinstance(e, HTTPException): @@ -7999,8 +8016,8 @@ async def async_queue_request( Now using a FastAPI background task + /chat/completions compatible endpoint """ + data = {} try: - data = {} data = await request.json() # type: ignore # Include original request and headers in the data @@ -8065,7 +8082,9 @@ async def async_queue_request( ): # use generate_responses to stream responses return StreamingResponse( async_data_generator( - user_api_key_dict=user_api_key_dict, response=response + user_api_key_dict=user_api_key_dict, + response=response, + request_data=data, ), media_type="text/event-stream", ) @@ -8073,7 +8092,7 @@ async def async_queue_request( return response except Exception as e: await proxy_logging_obj.post_call_failure_hook( - user_api_key_dict=user_api_key_dict, original_exception=e + user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) if isinstance(e, HTTPException): raise ProxyException( diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 9734806df..0379d5152 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -302,6 +302,7 @@ class ProxyLogging: "budget_alerts", "db_exceptions", ], + request_data: Optional[dict] = None, ): """ Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298 @@ -331,10 +332,19 @@ class ProxyLogging: if _proxy_base_url is not None: formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" + extra_kwargs = {} + if request_data is not None: + _url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert( + request_data=request_data + ) + if _url is not None: + extra_kwargs["🪢 Langfuse Trace"] = _url + formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url) + for client in self.alerting: if client == "slack": await self.slack_alerting_instance.send_alert( - message=message, level=level, alert_type=alert_type + message=message, level=level, alert_type=alert_type, **extra_kwargs ) elif client == "sentry": if litellm.utils.sentry_sdk_instance is not None: @@ -369,6 +379,7 @@ class ProxyLogging: message=f"DB read/write call failed: {error_message}", level="High", alert_type="db_exceptions", + request_data={}, ) ) @@ -384,7 +395,10 @@ class ProxyLogging: litellm.utils.capture_exception(error=original_exception) async def post_call_failure_hook( - self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth + self, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth, + request_data: dict, ): """ Allows users to raise custom exceptions/log when a call fails, without having to deal with parsing Request body. @@ -409,6 +423,7 @@ class ProxyLogging: message=f"LLM API call failed: {str(original_exception)}", level="High", alert_type="llm_exceptions", + request_data=request_data, ) ) From e85468badbb7873097a300fa49064b06146c72b2 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 7 May 2024 13:12:06 -0700 Subject: [PATCH 16/32] test: fix linting error --- litellm/tests/langfuse.log | 137 +++++++++++++++----------------- litellm/tests/test_alangfuse.py | 116 +++++++++++++++++++-------- 2 files changed, 146 insertions(+), 107 deletions(-) diff --git a/litellm/tests/langfuse.log b/litellm/tests/langfuse.log index f47590a29..61bc6ada5 100644 --- a/litellm/tests/langfuse.log +++ b/litellm/tests/langfuse.log @@ -1,79 +1,68 @@ -int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -Traceback (most recent call last): - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/client.py", line 778, in generation - "usage": _convert_usage_input(usage) if usage is not None else None, - ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 77, in _convert_usage_input - "totalCost": extract_by_priority(usage, ["totalCost", "total_cost"]), - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 32, in extract_by_priority - return int(usage[key]) - ^^^^^^^^^^^^^^^ -TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -Traceback (most recent call last): - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/client.py", line 778, in generation - "usage": _convert_usage_input(usage) if usage is not None else None, - ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 77, in _convert_usage_input - "totalCost": extract_by_priority(usage, ["totalCost", "total_cost"]), - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 32, in extract_by_priority - return int(usage[key]) - ^^^^^^^^^^^^^^^ -TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -Traceback (most recent call last): - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/client.py", line 778, in generation - "usage": _convert_usage_input(usage) if usage is not None else None, - ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 77, in _convert_usage_input - "totalCost": extract_by_priority(usage, ["totalCost", "total_cost"]), - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 32, in extract_by_priority - return int(usage[key]) - ^^^^^^^^^^^^^^^ -TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -Traceback (most recent call last): - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/client.py", line 778, in generation - "usage": _convert_usage_input(usage) if usage is not None else None, - ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 77, in _convert_usage_input - "totalCost": extract_by_priority(usage, ["totalCost", "total_cost"]), - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 32, in extract_by_priority - return int(usage[key]) - ^^^^^^^^^^^^^^^ -TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -int() argument must be a string, a bytes-like object or a real number, not 'NoneType' -Traceback (most recent call last): - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/client.py", line 778, in generation - "usage": _convert_usage_input(usage) if usage is not None else None, - ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 77, in _convert_usage_input - "totalCost": extract_by_priority(usage, ["totalCost", "total_cost"]), - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/opt/homebrew/lib/python3.11/site-packages/langfuse/utils.py", line 32, in extract_by_priority - return int(usage[key]) - ^^^^^^^^^^^^^^^ -TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType' consumer is running... -Getting observations... None, None, None, None, litellm-test-98e1cc75-bef8-4280-a2b9-e08633b81acd, None, GENERATION +Creating trace id='litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 22, 420643, tzinfo=datetime.timezone.utc) name='litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' user_id='litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} session_id='litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' release='litellm-test-release' version='litellm-test-version' metadata={'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'} tags=['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False'] public=None +adding task {'id': '9d380abe-bb42-480b-b48f-952ed6776e1c', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 420643, tzinfo=datetime.timezone.utc), 'name': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'userId': 'litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'sessionId': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'release': 'litellm-test-release', 'version': 'litellm-test-version', 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}, 'tags': ['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False']}} +Creating generation trace_id='litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' name='litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 419075) metadata={'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} level= status_message=None parent_observation_id=None version='litellm-test-version' id='litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' end_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 419879) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 419879) model='gpt-3.5-turbo' model_parameters={'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None... +item size 1224 +adding task {'id': '0d3ae4f8-e352-4acd-98ec-d21be7e8f5eb', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419075), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': , 'version': 'litellm-test-version', 'id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419879), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419879), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': , 'totalCost': 5.4999999999999995e-05}}} +item size 1359 +Creating trace id='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 22, 423093, tzinfo=datetime.timezone.utc) name='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' user_id='litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} session_id='litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' release='litellm-test-release' version='litellm-test-version' metadata={'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'} tags=['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False'] public=None +adding task {'id': '1b34abb5-4a24-4042-a8c3-9f3ea0254f2b', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 423093, tzinfo=datetime.timezone.utc), 'name': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'userId': 'litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'sessionId': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'release': 'litellm-test-release', 'version': 'litellm-test-version', 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}, 'tags': ['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False']}} +Creating generation trace_id='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' name='litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 421978) metadata={'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} level= status_message=None parent_observation_id=None version='litellm-test-version' id='litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' end_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 422551) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 422551) model='gpt-3.5-turbo' model_parameters={'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None... +item size 1224 +adding task {'id': '050ba9cd-3eff-443b-9637-705406ceb8cb', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 421978), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': , 'version': 'litellm-test-version', 'id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 422551), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 422551), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': , 'totalCost': 5.4999999999999995e-05}}} +item size 1359 +flushing queue +successfully flushed about 0 items. +Creating trace id='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 22, 425221, tzinfo=datetime.timezone.utc) name=None user_id=None input=None output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} session_id=None release=None version=None metadata={'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'} tags=None public=None +adding task {'id': 'd5173131-5bef-46cd-aa5a-6dcd01f6c000', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 425221, tzinfo=datetime.timezone.utc), 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}}} +Creating generation trace_id='litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' name='litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 424075) metadata={'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False} input={'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]} output={'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'} level= status_message=None parent_observation_id=None version='litellm-test-version' id='litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5' end_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 424526) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 22, 424526) model='gpt-3.5-turbo' model_parameters={'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None... +item size 630 +adding task {'id': 'ed61fc8d-aede-4c33-9ce8-377d498169f4', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424075), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': , 'version': 'litellm-test-version', 'id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424526), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424526), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': , 'totalCost': 5.4999999999999995e-05}}} +uploading batch of 15 items +uploading data: {'batch': [{'id': 'e3840349-5e27-4921-84fc-f11ac428b7c5', 'type': 'trace-create', 'body': {'id': '77e94058-6f8a-43bc-97ef-1a8d4966592c', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 289521, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 289935, tzinfo=datetime.timezone.utc)}, {'id': '54036ec0-06ff-44d1-ac3a-f6085a3983ab', 'type': 'generation-create', 'body': {'traceId': '77e94058-6f8a-43bc-97ef-1a8d4966592c', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 21, 970003), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': , 'id': 'time-13-11-21-970003_chatcmpl-30ccf23d-ac57-4183-ab2f-b93f084c4187', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 286720), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 286720), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': }}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 290909, tzinfo=datetime.timezone.utc)}, {'id': '4bf88864-4937-48a4-8e9b-b1cf6a29cc5c', 'type': 'trace-create', 'body': {'id': '04190fd5-8a1f-41d9-b8be-878945c35293', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 292743, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 292929, tzinfo=datetime.timezone.utc)}, {'id': '050a1ed2-b54e-46ab-9145-04baca33524e', 'type': 'generation-create', 'body': {'traceId': '04190fd5-8a1f-41d9-b8be-878945c35293', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 282826), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': , 'id': 'time-13-11-22-282826_chatcmpl-684e7a99-b0ad-43e3-a0e9-acbce76e5457', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 288054), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 288054), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': }}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 293730, tzinfo=datetime.timezone.utc)}, {'id': '94b80fdf-7df9-4b69-8500-df55a4748802', 'type': 'trace-create', 'body': {'id': '82588025-780b-4045-a131-06dcaf2c54ca', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 293803, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 293964, tzinfo=datetime.timezone.utc)}, {'id': '659db88e-6adc-4c52-82d8-dac517773242', 'type': 'generation-create', 'body': {'traceId': '82588025-780b-4045-a131-06dcaf2c54ca', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 280988), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': , 'id': 'time-13-11-22-280988_chatcmpl-4ecaabdd-be67-4122-a3bf-b95466ffee0a', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 287168), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 287168), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': }}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 294336, tzinfo=datetime.timezone.utc)}, {'id': '383b9518-93ff-4943-ae0c-b3256ee3c1a7', 'type': 'trace-create', 'body': {'id': 'fe18bb31-ded9-4ad2-8417-41e0e3de195c', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 295711, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 295870, tzinfo=datetime.timezone.utc)}, {'id': '127d6d13-e8b0-44f2-bba1-cc5d9710b0b4', 'type': 'generation-create', 'body': {'traceId': 'fe18bb31-ded9-4ad2-8417-41e0e3de195c', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 284370), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': , 'id': 'time-13-11-22-284370_chatcmpl-bf93ab8e-ecf2-4f04-9506-ef51a1c4c9d0', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 288779), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 288779), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': }}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 296237, tzinfo=datetime.timezone.utc)}, {'id': '2bc81d4d-f6a5-415b-acaa-feb883c41bbb', 'type': 'trace-create', 'body': {'id': '99b7014a-c3c0-4040-92ad-64a665ab6abe', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 297355, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 297502, tzinfo=datetime.timezone.utc)}, {'id': 'd51705a9-088a-4f49-b494-f4fa8e6bc59e', 'type': 'generation-create', 'body': {'traceId': '99b7014a-c3c0-4040-92ad-64a665ab6abe', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 285989), 'metadata': {'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'This is a test'}]}, 'output': {'content': "It's simple to use and easy to get started", 'role': 'assistant'}, 'level': , 'id': 'time-13-11-22-285989_chatcmpl-1a3c46e4-d474-4d19-92d8-8a7ee7ac0799', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 295600), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 295600), 'model': 'chatgpt-v-2', 'modelParameters': {'temperature': '0.7', 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': }}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 297845, tzinfo=datetime.timezone.utc)}, {'id': '9d380abe-bb42-480b-b48f-952ed6776e1c', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 420643, tzinfo=datetime.timezone.utc), 'name': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'userId': 'litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'sessionId': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'release': 'litellm-test-release', 'version': 'litellm-test-version', 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}, 'tags': ['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 421233, tzinfo=datetime.timezone.utc)}, {'id': '0d3ae4f8-e352-4acd-98ec-d21be7e8f5eb', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419075), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': , 'version': 'litellm-test-version', 'id': 'litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419879), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 419879), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': , 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 421804, tzinfo=datetime.timezone.utc)}, {'id': '1b34abb5-4a24-4042-a8c3-9f3ea0254f2b', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 423093, tzinfo=datetime.timezone.utc), 'name': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'userId': 'litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'sessionId': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'release': 'litellm-test-release', 'version': 'litellm-test-version', 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}, 'tags': ['litellm-test-tag1', 'litellm-test-tag2', 'cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 423311, tzinfo=datetime.timezone.utc)}, {'id': '050ba9cd-3eff-443b-9637-705406ceb8cb', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 421978), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': , 'version': 'litellm-test-version', 'id': 'litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 422551), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 422551), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': , 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 423829, tzinfo=datetime.timezone.utc)}, {'id': 'd5173131-5bef-46cd-aa5a-6dcd01f6c000', 'type': 'trace-create', 'body': {'id': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 425221, tzinfo=datetime.timezone.utc), 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'metadata': {'trace_actual_metadata_key': 'trace_actual_metadata_value', 'generation_id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 425370, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 15, 'sdk_integration': 'litellm', 'sdk_name': 'python', 'sdk_version': '2.27.0', 'public_key': 'pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66'}} +making request: {"batch": [{"id": "e3840349-5e27-4921-84fc-f11ac428b7c5", "type": "trace-create", "body": {"id": "77e94058-6f8a-43bc-97ef-1a8d4966592c", "timestamp": "2024-05-07T20:11:22.289521Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.289935Z"}, {"id": "54036ec0-06ff-44d1-ac3a-f6085a3983ab", "type": "generation-create", "body": {"traceId": "77e94058-6f8a-43bc-97ef-1a8d4966592c", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:21.970003-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-21-970003_chatcmpl-30ccf23d-ac57-4183-ab2f-b93f084c4187", "endTime": "2024-05-07T13:11:22.286720-07:00", "completionStartTime": "2024-05-07T13:11:22.286720-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.290909Z"}, {"id": "4bf88864-4937-48a4-8e9b-b1cf6a29cc5c", "type": "trace-create", "body": {"id": "04190fd5-8a1f-41d9-b8be-878945c35293", "timestamp": "2024-05-07T20:11:22.292743Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.292929Z"}, {"id": "050a1ed2-b54e-46ab-9145-04baca33524e", "type": "generation-create", "body": {"traceId": "04190fd5-8a1f-41d9-b8be-878945c35293", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:22.282826-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-22-282826_chatcmpl-684e7a99-b0ad-43e3-a0e9-acbce76e5457", "endTime": "2024-05-07T13:11:22.288054-07:00", "completionStartTime": "2024-05-07T13:11:22.288054-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.293730Z"}, {"id": "94b80fdf-7df9-4b69-8500-df55a4748802", "type": "trace-create", "body": {"id": "82588025-780b-4045-a131-06dcaf2c54ca", "timestamp": "2024-05-07T20:11:22.293803Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.293964Z"}, {"id": "659db88e-6adc-4c52-82d8-dac517773242", "type": "generation-create", "body": {"traceId": "82588025-780b-4045-a131-06dcaf2c54ca", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:22.280988-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-22-280988_chatcmpl-4ecaabdd-be67-4122-a3bf-b95466ffee0a", "endTime": "2024-05-07T13:11:22.287168-07:00", "completionStartTime": "2024-05-07T13:11:22.287168-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.294336Z"}, {"id": "383b9518-93ff-4943-ae0c-b3256ee3c1a7", "type": "trace-create", "body": {"id": "fe18bb31-ded9-4ad2-8417-41e0e3de195c", "timestamp": "2024-05-07T20:11:22.295711Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.295870Z"}, {"id": "127d6d13-e8b0-44f2-bba1-cc5d9710b0b4", "type": "generation-create", "body": {"traceId": "fe18bb31-ded9-4ad2-8417-41e0e3de195c", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:22.284370-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-22-284370_chatcmpl-bf93ab8e-ecf2-4f04-9506-ef51a1c4c9d0", "endTime": "2024-05-07T13:11:22.288779-07:00", "completionStartTime": "2024-05-07T13:11:22.288779-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.296237Z"}, {"id": "2bc81d4d-f6a5-415b-acaa-feb883c41bbb", "type": "trace-create", "body": {"id": "99b7014a-c3c0-4040-92ad-64a665ab6abe", "timestamp": "2024-05-07T20:11:22.297355Z", "name": "litellm-acompletion", "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.297502Z"}, {"id": "d51705a9-088a-4f49-b494-f4fa8e6bc59e", "type": "generation-create", "body": {"traceId": "99b7014a-c3c0-4040-92ad-64a665ab6abe", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:22.285989-07:00", "metadata": {"cache_hit": false}, "input": {"messages": [{"role": "user", "content": "This is a test"}]}, "output": {"content": "It's simple to use and easy to get started", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-22-285989_chatcmpl-1a3c46e4-d474-4d19-92d8-8a7ee7ac0799", "endTime": "2024-05-07T13:11:22.295600-07:00", "completionStartTime": "2024-05-07T13:11:22.295600-07:00", "model": "chatgpt-v-2", "modelParameters": {"temperature": "0.7", "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS"}}, "timestamp": "2024-05-07T20:11:22.297845Z"}, {"id": "9d380abe-bb42-480b-b48f-952ed6776e1c", "type": "trace-create", "body": {"id": "litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "timestamp": "2024-05-07T20:11:22.420643Z", "name": "litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "userId": "litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "sessionId": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "release": "litellm-test-release", "version": "litellm-test-version", "metadata": {"trace_actual_metadata_key": "trace_actual_metadata_value", "generation_id": "litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}, "tags": ["litellm-test-tag1", "litellm-test-tag2", "cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.421233Z"}, {"id": "0d3ae4f8-e352-4acd-98ec-d21be7e8f5eb", "type": "generation-create", "body": {"traceId": "litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "name": "litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "startTime": "2024-05-07T13:11:22.419075-07:00", "metadata": {"gen_metadata_key": "gen_metadata_value", "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "level": "DEFAULT", "version": "litellm-test-version", "id": "litellm-test-trace1-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "endTime": "2024-05-07T13:11:22.419879-07:00", "completionStartTime": "2024-05-07T13:11:22.419879-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.2", "max_tokens": 100, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-05-07T20:11:22.421804Z"}, {"id": "1b34abb5-4a24-4042-a8c3-9f3ea0254f2b", "type": "trace-create", "body": {"id": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "timestamp": "2024-05-07T20:11:22.423093Z", "name": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "userId": "litellm-test-user1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "sessionId": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "release": "litellm-test-release", "version": "litellm-test-version", "metadata": {"trace_actual_metadata_key": "trace_actual_metadata_value", "generation_id": "litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}, "tags": ["litellm-test-tag1", "litellm-test-tag2", "cache_hit:False"]}, "timestamp": "2024-05-07T20:11:22.423311Z"}, {"id": "050ba9cd-3eff-443b-9637-705406ceb8cb", "type": "generation-create", "body": {"traceId": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "name": "litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "startTime": "2024-05-07T13:11:22.421978-07:00", "metadata": {"gen_metadata_key": "gen_metadata_value", "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "level": "DEFAULT", "version": "litellm-test-version", "id": "litellm-test-trace2-generation-1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "endTime": "2024-05-07T13:11:22.422551-07:00", "completionStartTime": "2024-05-07T13:11:22.422551-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.2", "max_tokens": 100, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-05-07T20:11:22.423829Z"}, {"id": "d5173131-5bef-46cd-aa5a-6dcd01f6c000", "type": "trace-create", "body": {"id": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "timestamp": "2024-05-07T20:11:22.425221Z", "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "metadata": {"trace_actual_metadata_key": "trace_actual_metadata_value", "generation_id": "litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}}, "timestamp": "2024-05-07T20:11:22.425370Z"}], "metadata": {"batch_size": 15, "sdk_integration": "litellm", "sdk_name": "python", "sdk_version": "2.27.0", "public_key": "pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66"}} to https://cloud.langfuse.com/api/public/ingestion +received response: {"errors":[],"successes":[{"id":"e3840349-5e27-4921-84fc-f11ac428b7c5","status":201},{"id":"54036ec0-06ff-44d1-ac3a-f6085a3983ab","status":201},{"id":"4bf88864-4937-48a4-8e9b-b1cf6a29cc5c","status":201},{"id":"050a1ed2-b54e-46ab-9145-04baca33524e","status":201},{"id":"94b80fdf-7df9-4b69-8500-df55a4748802","status":201},{"id":"659db88e-6adc-4c52-82d8-dac517773242","status":201},{"id":"383b9518-93ff-4943-ae0c-b3256ee3c1a7","status":201},{"id":"127d6d13-e8b0-44f2-bba1-cc5d9710b0b4","status":201},{"id":"2bc81d4d-f6a5-415b-acaa-feb883c41bbb","status":201},{"id":"d51705a9-088a-4f49-b494-f4fa8e6bc59e","status":201},{"id":"9d380abe-bb42-480b-b48f-952ed6776e1c","status":201},{"id":"0d3ae4f8-e352-4acd-98ec-d21be7e8f5eb","status":201},{"id":"1b34abb5-4a24-4042-a8c3-9f3ea0254f2b","status":201},{"id":"050ba9cd-3eff-443b-9637-705406ceb8cb","status":201},{"id":"d5173131-5bef-46cd-aa5a-6dcd01f6c000","status":201}]} +successfully uploaded batch of 15 items +item size 1359 +Getting trace litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5 +Getting observations... None, None, None, None, litellm-test-trace1-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5, None, GENERATION +uploading batch of 1 items +uploading data: {'batch': [{'id': 'ed61fc8d-aede-4c33-9ce8-377d498169f4', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'name': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424075), 'metadata': {'gen_metadata_key': 'gen_metadata_value', 'cache_hit': False}, 'input': {'messages': [{'role': 'user', 'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5'}]}, 'output': {'content': 'litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'role': 'assistant'}, 'level': , 'version': 'litellm-test-version', 'id': 'litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424526), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 22, 424526), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.2', 'max_tokens': 100, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': , 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 22, 425776, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 1, 'sdk_integration': 'litellm', 'sdk_name': 'python', 'sdk_version': '2.27.0', 'public_key': 'pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66'}} +making request: {"batch": [{"id": "ed61fc8d-aede-4c33-9ce8-377d498169f4", "type": "generation-create", "body": {"traceId": "litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "name": "litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "startTime": "2024-05-07T13:11:22.424075-07:00", "metadata": {"gen_metadata_key": "gen_metadata_value", "cache_hit": false}, "input": {"messages": [{"role": "user", "content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5"}]}, "output": {"content": "litellm-test-session-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5:litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "role": "assistant"}, "level": "DEFAULT", "version": "litellm-test-version", "id": "litellm-test-trace2-generation-2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5", "endTime": "2024-05-07T13:11:22.424526-07:00", "completionStartTime": "2024-05-07T13:11:22.424526-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.2", "max_tokens": 100, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-05-07T20:11:22.425776Z"}], "metadata": {"batch_size": 1, "sdk_integration": "litellm", "sdk_name": "python", "sdk_version": "2.27.0", "public_key": "pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66"}} to https://cloud.langfuse.com/api/public/ingestion +Getting trace litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5 +received response: {"errors":[],"successes":[{"id":"ed61fc8d-aede-4c33-9ce8-377d498169f4","status":201}]} +successfully uploaded batch of 1 items +Getting observations... None, None, None, None, litellm-test-trace2-c8f258e1-002a-4ab9-98e1-1bf3c84c0bd5, None, GENERATION consumer is running... -Getting observations... None, None, None, None, litellm-test-532d2bc8-f8d6-42fd-8f78-416bae79925d, None, GENERATION -joining 1 consumer threads -consumer thread 0 joined -joining 1 consumer threads -consumer thread 0 joined -joining 1 consumer threads -consumer thread 0 joined -joining 1 consumer threads -consumer thread 0 joined -joining 1 consumer threads -consumer thread 0 joined -joining 1 consumer threads -consumer thread 0 joined +flushing queue +successfully flushed about 0 items. +Creating trace id='litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 45, 796169, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id=None input={'messages': 'redacted-by-litellm'} output={'content': 'redacted-by-litellm', 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=['cache_hit:False'] public=None +adding task {'id': '244ffc62-a30d-4281-8a86-bdfcb3edef05', 'type': 'trace-create', 'body': {'id': 'litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 45, 796169, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': ['cache_hit:False']}} +Creating generation trace_id='litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1' name='litellm-acompletion' start_time=datetime.datetime(2024, 5, 7, 13, 11, 45, 794599) metadata={'cache_hit': False} input={'messages': 'redacted-by-litellm'} output={'content': 'redacted-by-litellm', 'role': 'assistant'} level= status_message=None parent_observation_id=None version=None id='time-13-11-45-794599_chatcmpl-28d76a11-56a6-43d2-9bf6-a6ddf7d8895a' end_time=datetime.datetime(2024, 5, 7, 13, 11, 45, 795329) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 45, 795329) model='gpt-3.5-turbo' model_parameters={'temperature': '0.7', 'stream': False, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=10, output=20, total=None, unit=, input_cost=None, output_cost=None, total_cost=5.4999999999999995e-05) prompt_name=None prompt_version=None... +item size 400 +adding task {'id': 'e9d12a6d-3fca-4adb-a018-bf276733ffa6', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 794599), 'metadata': {'cache_hit': False}, 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': , 'id': 'time-13-11-45-794599_chatcmpl-28d76a11-56a6-43d2-9bf6-a6ddf7d8895a', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 795329), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 795329), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': False, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': , 'totalCost': 5.4999999999999995e-05}}} +item size 876 +uploading batch of 2 items +uploading data: {'batch': [{'id': '244ffc62-a30d-4281-8a86-bdfcb3edef05', 'type': 'trace-create', 'body': {'id': 'litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 45, 796169, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 45, 796433, tzinfo=datetime.timezone.utc)}, {'id': 'e9d12a6d-3fca-4adb-a018-bf276733ffa6', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 794599), 'metadata': {'cache_hit': False}, 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': , 'id': 'time-13-11-45-794599_chatcmpl-28d76a11-56a6-43d2-9bf6-a6ddf7d8895a', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 795329), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 45, 795329), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': False, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 10, 'output': 20, 'unit': , 'totalCost': 5.4999999999999995e-05}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 45, 797038, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 2, 'sdk_integration': 'litellm', 'sdk_name': 'python', 'sdk_version': '2.27.0', 'public_key': 'pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66'}} +making request: {"batch": [{"id": "244ffc62-a30d-4281-8a86-bdfcb3edef05", "type": "trace-create", "body": {"id": "litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1", "timestamp": "2024-05-07T20:11:45.796169Z", "name": "litellm-acompletion", "input": {"messages": "redacted-by-litellm"}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:45.796433Z"}, {"id": "e9d12a6d-3fca-4adb-a018-bf276733ffa6", "type": "generation-create", "body": {"traceId": "litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:45.794599-07:00", "metadata": {"cache_hit": false}, "input": {"messages": "redacted-by-litellm"}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-45-794599_chatcmpl-28d76a11-56a6-43d2-9bf6-a6ddf7d8895a", "endTime": "2024-05-07T13:11:45.795329-07:00", "completionStartTime": "2024-05-07T13:11:45.795329-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.7", "stream": false, "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 10, "output": 20, "unit": "TOKENS", "totalCost": 5.4999999999999995e-05}}, "timestamp": "2024-05-07T20:11:45.797038Z"}], "metadata": {"batch_size": 2, "sdk_integration": "litellm", "sdk_name": "python", "sdk_version": "2.27.0", "public_key": "pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66"}} to https://cloud.langfuse.com/api/public/ingestion +received response: {"errors":[],"successes":[{"id":"244ffc62-a30d-4281-8a86-bdfcb3edef05","status":201},{"id":"e9d12a6d-3fca-4adb-a018-bf276733ffa6","status":201}]} +successfully uploaded batch of 2 items +Getting observations... None, None, None, None, litellm-test-a87c7c71-32cd-4e6c-acc0-7378d6d81bb1, None, GENERATION +consumer is running... +flushing queue +successfully flushed about 0 items. +Creating trace id='litellm-test-d9136466-2e87-4afc-8367-dc51764251c7' timestamp=datetime.datetime(2024, 5, 7, 20, 11, 48, 286447, tzinfo=datetime.timezone.utc) name='litellm-acompletion' user_id=None input={'messages': 'redacted-by-litellm'} output={'content': 'redacted-by-litellm', 'role': 'assistant'} session_id=None release=None version=None metadata=None tags=['cache_hit:False'] public=None +adding task {'id': 'cab47524-1e1e-4404-b8bd-5f526895ac0c', 'type': 'trace-create', 'body': {'id': 'litellm-test-d9136466-2e87-4afc-8367-dc51764251c7', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 48, 286447, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': ['cache_hit:False']}} +Creating generation trace_id='litellm-test-d9136466-2e87-4afc-8367-dc51764251c7' name='litellm-acompletion' start_time=datetime.datetime(2024, 5, 7, 13, 11, 48, 276681) metadata={'cache_hit': False} input={'messages': 'redacted-by-litellm'} output={'content': 'redacted-by-litellm', 'role': 'assistant'} level= status_message=None parent_observation_id=None version=None id='time-13-11-48-276681_chatcmpl-ef076c31-4977-4687-bc83-07bb1f0aa1b2' end_time=datetime.datetime(2024, 5, 7, 13, 11, 48, 285026) completion_start_time=datetime.datetime(2024, 5, 7, 13, 11, 48, 278853) model='gpt-3.5-turbo' model_parameters={'temperature': '0.7', 'stream': True, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'} usage=Usage(input=0, output=98, total=None, unit=, input_cost=None, output_cost=None, total_cost=0.000196) prompt_name=None prompt_version=None... +item size 400 +adding task {'id': '6bacab4d-822a-430f-85a9-4de1fa7ce259', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-d9136466-2e87-4afc-8367-dc51764251c7', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 276681), 'metadata': {'cache_hit': False}, 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': , 'id': 'time-13-11-48-276681_chatcmpl-ef076c31-4977-4687-bc83-07bb1f0aa1b2', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 285026), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 278853), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': True, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 0, 'output': 98, 'unit': , 'totalCost': 0.000196}}} +item size 860 +uploading batch of 2 items +uploading data: {'batch': [{'id': 'cab47524-1e1e-4404-b8bd-5f526895ac0c', 'type': 'trace-create', 'body': {'id': 'litellm-test-d9136466-2e87-4afc-8367-dc51764251c7', 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 48, 286447, tzinfo=datetime.timezone.utc), 'name': 'litellm-acompletion', 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'tags': ['cache_hit:False']}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 48, 286752, tzinfo=datetime.timezone.utc)}, {'id': '6bacab4d-822a-430f-85a9-4de1fa7ce259', 'type': 'generation-create', 'body': {'traceId': 'litellm-test-d9136466-2e87-4afc-8367-dc51764251c7', 'name': 'litellm-acompletion', 'startTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 276681), 'metadata': {'cache_hit': False}, 'input': {'messages': 'redacted-by-litellm'}, 'output': {'content': 'redacted-by-litellm', 'role': 'assistant'}, 'level': , 'id': 'time-13-11-48-276681_chatcmpl-ef076c31-4977-4687-bc83-07bb1f0aa1b2', 'endTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 285026), 'completionStartTime': datetime.datetime(2024, 5, 7, 13, 11, 48, 278853), 'model': 'gpt-3.5-turbo', 'modelParameters': {'temperature': '0.7', 'stream': True, 'max_tokens': 5, 'user': 'langfuse_latency_test_user', 'extra_body': '{}'}, 'usage': {'input': 0, 'output': 98, 'unit': , 'totalCost': 0.000196}}, 'timestamp': datetime.datetime(2024, 5, 7, 20, 11, 48, 287077, tzinfo=datetime.timezone.utc)}], 'metadata': {'batch_size': 2, 'sdk_integration': 'litellm', 'sdk_name': 'python', 'sdk_version': '2.27.0', 'public_key': 'pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66'}} +making request: {"batch": [{"id": "cab47524-1e1e-4404-b8bd-5f526895ac0c", "type": "trace-create", "body": {"id": "litellm-test-d9136466-2e87-4afc-8367-dc51764251c7", "timestamp": "2024-05-07T20:11:48.286447Z", "name": "litellm-acompletion", "input": {"messages": "redacted-by-litellm"}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "tags": ["cache_hit:False"]}, "timestamp": "2024-05-07T20:11:48.286752Z"}, {"id": "6bacab4d-822a-430f-85a9-4de1fa7ce259", "type": "generation-create", "body": {"traceId": "litellm-test-d9136466-2e87-4afc-8367-dc51764251c7", "name": "litellm-acompletion", "startTime": "2024-05-07T13:11:48.276681-07:00", "metadata": {"cache_hit": false}, "input": {"messages": "redacted-by-litellm"}, "output": {"content": "redacted-by-litellm", "role": "assistant"}, "level": "DEFAULT", "id": "time-13-11-48-276681_chatcmpl-ef076c31-4977-4687-bc83-07bb1f0aa1b2", "endTime": "2024-05-07T13:11:48.285026-07:00", "completionStartTime": "2024-05-07T13:11:48.278853-07:00", "model": "gpt-3.5-turbo", "modelParameters": {"temperature": "0.7", "stream": true, "max_tokens": 5, "user": "langfuse_latency_test_user", "extra_body": "{}"}, "usage": {"input": 0, "output": 98, "unit": "TOKENS", "totalCost": 0.000196}}, "timestamp": "2024-05-07T20:11:48.287077Z"}], "metadata": {"batch_size": 2, "sdk_integration": "litellm", "sdk_name": "python", "sdk_version": "2.27.0", "public_key": "pk-lf-47ddd17f-c73c-4edd-b92a-b28835843e66"}} to https://cloud.langfuse.com/api/public/ingestion +received response: {"errors":[],"successes":[{"id":"cab47524-1e1e-4404-b8bd-5f526895ac0c","status":201},{"id":"6bacab4d-822a-430f-85a9-4de1fa7ce259","status":201}]} +successfully uploaded batch of 2 items +Getting observations... None, None, None, None, litellm-test-d9136466-2e87-4afc-8367-dc51764251c7, None, GENERATION joining 1 consumer threads consumer thread 0 joined joining 1 consumer threads diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py index d71738cc6..5a495550c 100644 --- a/litellm/tests/test_alangfuse.py +++ b/litellm/tests/test_alangfuse.py @@ -21,7 +21,7 @@ import pytest @pytest.fixture -def langfuse_client() -> "langfuse.Langfuse": +def langfuse_client(): import langfuse langfuse_client = langfuse.Langfuse( @@ -29,9 +29,12 @@ def langfuse_client() -> "langfuse.Langfuse": secret_key=os.environ["LANGFUSE_SECRET_KEY"], ) - with patch("langfuse.Langfuse", MagicMock(return_value=langfuse_client)) as mock_langfuse_client: + with patch( + "langfuse.Langfuse", MagicMock(return_value=langfuse_client) + ) as mock_langfuse_client: yield mock_langfuse_client() - + + def search_logs(log_file_path, num_good_logs=1): """ Searches the given log file for logs containing the "/api/public" string. @@ -143,7 +146,7 @@ def test_langfuse_logging_async(): pytest.fail(f"An exception occurred - {e}") -async def make_async_calls(metadata = None, **completion_kwargs): +async def make_async_calls(metadata=None, **completion_kwargs): tasks = [] for _ in range(5): tasks.append(create_async_task()) @@ -173,14 +176,14 @@ def create_async_task(**completion_kwargs): By default a standard set of arguments are used for the litellm.acompletion function. """ completion_args = { - "model": "azure/chatgpt-v-2", - "messages": [{"role": "user", "content": "This is a test"}], - "max_tokens": 5, - "temperature": 0.7, - "timeout": 5, - "user": "langfuse_latency_test_user", - "mock_response": "It's simple to use and easy to get started", - } + "model": "azure/chatgpt-v-2", + "messages": [{"role": "user", "content": "This is a test"}], + "max_tokens": 5, + "temperature": 0.7, + "timeout": 5, + "user": "langfuse_latency_test_user", + "mock_response": "It's simple to use and easy to get started", + } completion_args.update(completion_kwargs) return asyncio.create_task(litellm.acompletion(**completion_args)) @@ -195,7 +198,11 @@ async def test_langfuse_logging_without_request_response(stream, langfuse_client litellm.set_verbose = True litellm.turn_off_message_logging = True litellm.success_callback = ["langfuse"] - response = await create_async_task(model="gpt-3.5-turbo", stream=stream, metadata={"trace_id": _unique_trace_name}) + response = await create_async_task( + model="gpt-3.5-turbo", + stream=stream, + metadata={"trace_id": _unique_trace_name}, + ) print(response) if stream: async for chunk in response: @@ -232,49 +239,78 @@ async def test_langfuse_logging_metadata(langfuse_client): Tags is just set for the trace """ import uuid - + litellm.set_verbose = True litellm.success_callback = ["langfuse"] - + trace_identifiers = {} - expected_filtered_metadata_keys = {"trace_name", "trace_id", "existing_trace_id", "trace_user_id", "session_id", "tags", "generation_name", "generation_id", "prompt"} - trace_metadata = {"trace_actual_metadata_key": "trace_actual_metadata_value"} # Allows for setting the metadata on the trace + expected_filtered_metadata_keys = { + "trace_name", + "trace_id", + "existing_trace_id", + "trace_user_id", + "session_id", + "tags", + "generation_name", + "generation_id", + "prompt", + } + trace_metadata = { + "trace_actual_metadata_key": "trace_actual_metadata_value" + } # Allows for setting the metadata on the trace run_id = str(uuid.uuid4()) session_id = f"litellm-test-session-{run_id}" trace_common_metadata = { "session_id": session_id, "tags": ["litellm-test-tag1", "litellm-test-tag2"], - "update_trace_keys": ["output", "trace_metadata"], # Overwrite the following fields in the trace with the last generation's output and the trace_user_id + "update_trace_keys": [ + "output", + "trace_metadata", + ], # Overwrite the following fields in the trace with the last generation's output and the trace_user_id "trace_metadata": trace_metadata, - "gen_metadata_key": "gen_metadata_value", # Metadata key that should not be filtered in the generation + "gen_metadata_key": "gen_metadata_value", # Metadata key that should not be filtered in the generation "trace_release": "litellm-test-release", "version": "litellm-test-version", } - for trace_num in range(1, 3): # Two traces + for trace_num in range(1, 3): # Two traces metadata = copy.deepcopy(trace_common_metadata) trace_id = f"litellm-test-trace{trace_num}-{run_id}" metadata["trace_id"] = trace_id metadata["trace_name"] = trace_id trace_identifiers[trace_id] = [] print(f"Trace: {trace_id}") - for generation_num in range(1, trace_num + 1): # Each trace has a number of generations equal to its trace number + for generation_num in range( + 1, trace_num + 1 + ): # Each trace has a number of generations equal to its trace number metadata["trace_user_id"] = f"litellm-test-user{generation_num}-{run_id}" - generation_id = f"litellm-test-trace{trace_num}-generation-{generation_num}-{run_id}" + generation_id = ( + f"litellm-test-trace{trace_num}-generation-{generation_num}-{run_id}" + ) metadata["generation_id"] = generation_id metadata["generation_name"] = generation_id - metadata["trace_metadata"]["generation_id"] = generation_id # Update to test if trace_metadata is overwritten by update trace keys + metadata["trace_metadata"][ + "generation_id" + ] = generation_id # Update to test if trace_metadata is overwritten by update trace keys trace_identifiers[trace_id].append(generation_id) print(f"Generation: {generation_id}") - response = await create_async_task(model="gpt-3.5-turbo", + response = await create_async_task( + model="gpt-3.5-turbo", mock_response=f"{session_id}:{trace_id}:{generation_id}", - messages=[{"role": "user", "content": f"{session_id}:{trace_id}:{generation_id}"}], + messages=[ + { + "role": "user", + "content": f"{session_id}:{trace_id}:{generation_id}", + } + ], max_tokens=100, temperature=0.2, - metadata=copy.deepcopy(metadata) # Every generation needs its own metadata, langfuse is not async/thread safe without it + metadata=copy.deepcopy( + metadata + ), # Every generation needs its own metadata, langfuse is not async/thread safe without it ) print(response) metadata["existing_trace_id"] = trace_id - + langfuse_client.flush() await asyncio.sleep(2) @@ -284,20 +320,31 @@ async def test_langfuse_logging_metadata(langfuse_client): assert trace.id == trace_id assert trace.session_id == session_id assert trace.metadata != trace_metadata - generations = list(reversed(langfuse_client.get_generations(trace_id=trace_id).data)) + generations = list( + reversed(langfuse_client.get_generations(trace_id=trace_id).data) + ) assert len(generations) == len(generation_ids) - assert trace.input == generations[0].input # Should be set by the first generation - assert trace.output == generations[-1].output # Should be overwritten by the last generation according to update_trace_keys - assert trace.metadata != generations[-1].metadata # Should be overwritten by the last generation according to update_trace_keys + assert ( + trace.input == generations[0].input + ) # Should be set by the first generation + assert ( + trace.output == generations[-1].output + ) # Should be overwritten by the last generation according to update_trace_keys + assert ( + trace.metadata != generations[-1].metadata + ) # Should be overwritten by the last generation according to update_trace_keys assert trace.metadata["generation_id"] == generations[-1].id assert set(trace.tags).issuperset(trace_common_metadata["tags"]) print("trace_from_langfuse", trace) for generation_id, generation in zip(generation_ids, generations): assert generation.id == generation_id assert generation.trace_id == trace_id - assert set(generation.metadata.keys()).isdisjoint(expected_filtered_metadata_keys) + assert set(generation.metadata.keys()).isdisjoint( + expected_filtered_metadata_keys + ) print("generation_from_langfuse", generation) + @pytest.mark.skip(reason="beta test - checking langfuse output") def test_langfuse_logging(): try: @@ -657,7 +704,10 @@ def test_langfuse_existing_trace_id(): assert initial_langfuse_trace_dict == new_langfuse_trace_dict -@pytest.mark.skipif(condition=not os.environ.get("OPENAI_API_KEY", False), reason="Authentication missing for openai") +@pytest.mark.skipif( + condition=not os.environ.get("OPENAI_API_KEY", False), + reason="Authentication missing for openai", +) def test_langfuse_logging_tool_calling(): litellm.set_verbose = True From 6983e7a84f0ff58689cfd9212950b7d66a891e5d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 13:51:50 -0700 Subject: [PATCH 17/32] feat - make lowest_cost pure async --- litellm/router.py | 20 +++++++++++--------- litellm/router_strategy/lowest_cost.py | 22 +++++++++++++++------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 99e2435ac..3f2bef476 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2958,6 +2958,7 @@ class Router: if ( self.routing_strategy != "usage-based-routing-v2" and self.routing_strategy != "simple-shuffle" + and self.routing_strategy != "cost-based-routing" ): # prevent regressions for other routing strategies, that don't have async get available deployments implemented. return self.get_available_deployment( model=model, @@ -3014,6 +3015,16 @@ class Router: messages=messages, input=input, ) + if ( + self.routing_strategy == "cost-based-routing" + and self.lowestcost_logger is not None + ): + deployment = await self.lowestcost_logger.async_get_available_deployments( + model_group=model, + healthy_deployments=healthy_deployments, + messages=messages, + input=input, + ) elif self.routing_strategy == "simple-shuffle": # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm ############## Check if we can do a RPM/TPM based weighted pick ################# @@ -3184,15 +3195,6 @@ class Router: messages=messages, input=input, ) - elif ( - self.routing_strategy == "cost-based-routing" - and self.lowestcost_logger is not None - ): - deployment = self.lowestcost_logger.get_available_deployments( - model_group=model, - healthy_deployments=healthy_deployments, - request_kwargs=request_kwargs, - ) if deployment is None: verbose_router_logger.info( f"get_available_deployment for model: {model}, No deployment available" diff --git a/litellm/router_strategy/lowest_cost.py b/litellm/router_strategy/lowest_cost.py index 44b49378d..2d010fb4f 100644 --- a/litellm/router_strategy/lowest_cost.py +++ b/litellm/router_strategy/lowest_cost.py @@ -40,7 +40,7 @@ class LowestCostLoggingHandler(CustomLogger): self.router_cache = router_cache self.model_list = model_list - def log_success_event(self, kwargs, response_obj, start_time, end_time): + async def log_success_event(self, kwargs, response_obj, start_time, end_time): try: """ Update usage on success @@ -90,7 +90,11 @@ class LowestCostLoggingHandler(CustomLogger): # Update usage # ------------ - request_count_dict = self.router_cache.get_cache(key=cost_key) or {} + request_count_dict = ( + await self.router_cache.async_get_cache(key=cost_key) or {} + ) + + # check local result first if id not in request_count_dict: request_count_dict[id] = {} @@ -111,7 +115,9 @@ class LowestCostLoggingHandler(CustomLogger): request_count_dict[id][precise_minute].get("rpm", 0) + 1 ) - self.router_cache.set_cache(key=cost_key, value=request_count_dict) + await self.router_cache.async_set_cache( + key=cost_key, value=request_count_dict + ) ### TESTING ### if self.test_flag: @@ -172,7 +178,9 @@ class LowestCostLoggingHandler(CustomLogger): # Update usage # ------------ - request_count_dict = self.router_cache.get_cache(key=cost_key) or {} + request_count_dict = ( + await self.router_cache.async_get_cache(key=cost_key) or {} + ) if id not in request_count_dict: request_count_dict[id] = {} @@ -189,7 +197,7 @@ class LowestCostLoggingHandler(CustomLogger): request_count_dict[id][precise_minute].get("rpm", 0) + 1 ) - self.router_cache.set_cache( + await self.router_cache.async_set_cache( key=cost_key, value=request_count_dict ) # reset map within window @@ -200,7 +208,7 @@ class LowestCostLoggingHandler(CustomLogger): traceback.print_exc() pass - def get_available_deployments( + async def async_get_available_deployments( self, model_group: str, healthy_deployments: list, @@ -213,7 +221,7 @@ class LowestCostLoggingHandler(CustomLogger): """ cost_key = f"{model_group}_map" - request_count_dict = self.router_cache.get_cache(key=cost_key) or {} + request_count_dict = await self.router_cache.async_get_cache(key=cost_key) or {} # ----------------------- # Find lowest used model From 8644aec8d302ffbd9e89d21fdf1b20362b28e406 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 13:52:34 -0700 Subject: [PATCH 18/32] test - lowest cost router --- litellm/tests/test_lowest_cost_routing.py | 27 +++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/litellm/tests/test_lowest_cost_routing.py b/litellm/tests/test_lowest_cost_routing.py index 9e627b3b4..217b4a970 100644 --- a/litellm/tests/test_lowest_cost_routing.py +++ b/litellm/tests/test_lowest_cost_routing.py @@ -20,7 +20,8 @@ from litellm.caching import DualCache ### UNIT TESTS FOR cost ROUTING ### -def test_get_available_deployments(): +@pytest.mark.asyncio +async def test_get_available_deployments(): test_cache = DualCache() model_list = [ { @@ -40,7 +41,7 @@ def test_get_available_deployments(): model_group = "gpt-3.5-turbo" ## CHECK WHAT'S SELECTED ## - selected_model = lowest_cost_logger.get_available_deployments( + selected_model = await lowest_cost_logger.async_get_available_deployments( model_group=model_group, healthy_deployments=model_list ) print("selected model: ", selected_model) @@ -48,7 +49,8 @@ def test_get_available_deployments(): assert selected_model["model_info"]["id"] == "groq-llama" -def test_get_available_deployments_custom_price(): +@pytest.mark.asyncio +async def test_get_available_deployments_custom_price(): from litellm._logging import verbose_router_logger import logging @@ -89,7 +91,7 @@ def test_get_available_deployments_custom_price(): model_group = "gpt-3.5-turbo" ## CHECK WHAT'S SELECTED ## - selected_model = lowest_cost_logger.get_available_deployments( + selected_model = await lowest_cost_logger.async_get_available_deployments( model_group=model_group, healthy_deployments=model_list ) print("selected model: ", selected_model) @@ -142,7 +144,7 @@ async def _deploy(lowest_cost_logger, deployment_id, tokens_used, duration): response_obj = {"usage": {"total_tokens": tokens_used}} time.sleep(duration) end_time = time.time() - lowest_cost_logger.log_success_event( + await lowest_cost_logger.async_log_success_event( response_obj=response_obj, kwargs=kwargs, start_time=start_time, @@ -150,14 +152,11 @@ async def _deploy(lowest_cost_logger, deployment_id, tokens_used, duration): ) -async def _gather_deploy(all_deploys): - return await asyncio.gather(*[_deploy(*t) for t in all_deploys]) - - @pytest.mark.parametrize( "ans_rpm", [1, 5] ) # 1 should produce nothing, 10 should select first -def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm): +@pytest.mark.asyncio +async def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm): """ Pass in list of 2 valid models @@ -193,9 +192,13 @@ def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm): model_group = "gpt-3.5-turbo" d1 = [(lowest_cost_logger, "1234", 50, 0.01)] * non_ans_rpm d2 = [(lowest_cost_logger, "5678", 50, 0.01)] * non_ans_rpm - asyncio.run(_gather_deploy([*d1, *d2])) + + await asyncio.gather(*[_deploy(*t) for t in [*d1, *d2]]) + + asyncio.sleep(3) + ## CHECK WHAT'S SELECTED ## - d_ans = lowest_cost_logger.get_available_deployments( + d_ans = await lowest_cost_logger.async_get_available_deployments( model_group=model_group, healthy_deployments=model_list ) assert (d_ans and d_ans["model_info"]["id"]) == ans From 32f3e032e97beabf17b5595a72972eccce7b2640 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 15:10:47 -0700 Subject: [PATCH 19/32] feat - send slack alerts litellm.router --- litellm/integrations/slack_alerting.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 5546f7c33..b6e2f7ba5 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -796,6 +796,14 @@ Model Info: updated_at=litellm.utils.get_utc_datetime(), ) ) + if "llm_exceptions" in self.alert_types: + original_exception = kwargs.get("exception", None) + + await self.send_alert( + message="LLM API Failure - " + str(original_exception), + level="High", + alert_type="llm_exceptions", + ) async def _run_scheduler_helper(self, llm_router: litellm.Router) -> bool: """ From b1230dd9194f8daa7985c0771957cc4819899f5f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 15:12:21 -0700 Subject: [PATCH 20/32] test - slack alerts on router --- litellm/tests/test_alerting.py | 67 ++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py index 3734c29d2..06921d8d6 100644 --- a/litellm/tests/test_alerting.py +++ b/litellm/tests/test_alerting.py @@ -313,3 +313,70 @@ async def test_daily_reports_redis_cache_scheduler(): # second call - expect empty await slack_alerting._run_scheduler_helper(llm_router=router) + + +@pytest.mark.asyncio +async def test_send_llm_exception(slack_alerting): + with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert: + litellm.callbacks = [slack_alerting] + + # on async success + router = litellm.Router( + model_list=[ + { + "model_name": "gpt-5", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": "bad_key", + }, + } + ] + ) + try: + await router.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + except: + pass + + await asyncio.sleep(3) + + mock_send_alert.assert_awaited_once() + + +@pytest.mark.asyncio +@pytest.mark.skip(reason="Local test. Test if slack alerts are sent.") +async def test_send_llm_exception_to_slack(): + from litellm.integrations.slack_alerting import SlackAlerting + + new_alerting = SlackAlerting( + alerting_threshold=0.00002, + alerting=["slack"], + alert_types=["llm_exceptions", "llm_requests_hanging", "llm_too_slow"], + ) + + litellm.callbacks = [new_alerting] + litellm.set_verbose = True + + # on async success + router = litellm.Router( + model_list=[ + { + "model_name": "gpt-5", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": "bad_key", + }, + } + ] + ) + try: + await router.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + except: + pass + + await asyncio.sleep(3) From 5fd3b12d34993bbe4aa8974e7d207b90c1ad1358 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 17:46:18 -0700 Subject: [PATCH 21/32] add router alerting type --- litellm/types/router.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/litellm/types/router.py b/litellm/types/router.py index 4a62a267e..d79fb2e2e 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -346,3 +346,19 @@ class RetryPolicy(BaseModel): RateLimitErrorRetries: Optional[int] = None ContentPolicyViolationErrorRetries: Optional[int] = None InternalServerErrorRetries: Optional[int] = None + + +class RouterAlerting(BaseModel): + """ + Use this configure alerting for the router. Receive alerts on the following events + - LLM API Exceptions + - LLM Responses Too Slow + - LLM Requests Hanging + + Args: + webhook_url: Optional[str] = None - webhook url for alerting + alerting_threshold: Optional[float] = None - threhshold for slow / hanging llm responses (in seconds) + """ + + webhook_url: Optional[str] = None + alerting_threshold: Optional[float] = None From c08352a0ce278d47b7423576813e43997b5c9624 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 18:03:04 -0700 Subject: [PATCH 22/32] router- initialize alerting --- litellm/router.py | 24 +++++++++++++++++++++++- litellm/types/router.py | 8 ++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 3f2bef476..ddb006d52 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -44,6 +44,7 @@ from litellm.types.router import ( updateDeployment, updateLiteLLMParams, RetryPolicy, + AlertingConfig, ) from litellm.integrations.custom_logger import CustomLogger @@ -103,6 +104,7 @@ class Router: ] = "simple-shuffle", routing_strategy_args: dict = {}, # just for latency-based routing semaphore: Optional[asyncio.Semaphore] = None, + alerting_config: Optional[AlertingConfig] = None, ) -> None: """ Initialize the Router class with the given parameters for caching, reliability, and routing strategy. @@ -131,7 +133,7 @@ class Router: cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1. routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle". routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}. - + alerting_config (AlertingConfig): Slack alerting configuration. Defaults to None. Returns: Router: An instance of the litellm.Router class. @@ -316,6 +318,9 @@ class Router: self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = ( model_group_retry_policy ) + self.alerting_config: Optional[AlertingConfig] = alerting_config + if self.alerting_config is not None: + self._initialize_alerting() def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict): if routing_strategy == "least-busy": @@ -3320,6 +3325,23 @@ class Router: ): return retry_policy.ContentPolicyViolationErrorRetries + def _initialize_alerting(self): + from litellm.integrations.slack_alerting import SlackAlerting + + router_alerting_config: AlertingConfig = self.alerting_config + + _slack_alerting_logger = SlackAlerting( + alerting_threshold=router_alerting_config.alerting_threshold, + alerting=["slack"], + default_webhook_url=router_alerting_config.webhook_url, + ) + + litellm.callbacks.append(_slack_alerting_logger) + litellm.success_callback.append( + _slack_alerting_logger.response_taking_too_long_callback + ) + print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n") # noqa + def flush_cache(self): litellm.cache = None self.cache.flush_cache() diff --git a/litellm/types/router.py b/litellm/types/router.py index d79fb2e2e..f3fa89324 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -348,7 +348,7 @@ class RetryPolicy(BaseModel): InternalServerErrorRetries: Optional[int] = None -class RouterAlerting(BaseModel): +class AlertingConfig(BaseModel): """ Use this configure alerting for the router. Receive alerts on the following events - LLM API Exceptions @@ -356,9 +356,9 @@ class RouterAlerting(BaseModel): - LLM Requests Hanging Args: - webhook_url: Optional[str] = None - webhook url for alerting + webhook_url: str - webhook url for alerting, slack provides a webhook url to send alerts to alerting_threshold: Optional[float] = None - threhshold for slow / hanging llm responses (in seconds) """ - webhook_url: Optional[str] = None - alerting_threshold: Optional[float] = None + webhook_url: str + alerting_threshold: Optional[float] = 300 From e8053c3d0bd6fbceb40e8d0a5bb8a6b341a5c3b2 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 18:17:12 -0700 Subject: [PATCH 23/32] fix slack alerting --- litellm/integrations/slack_alerting.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index b6e2f7ba5..d974cfbd3 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -68,11 +68,15 @@ class SlackAlertingCacheKeys(Enum): class SlackAlerting(CustomLogger): + """ + Class for sending Slack Alerts + """ + # Class variables or attributes def __init__( self, internal_usage_cache: Optional[DualCache] = None, - alerting_threshold: float = 300, + alerting_threshold: float = 300, # threshold for slow / hanging llm responses (in seconds) alerting: Optional[List] = [], alert_types: Optional[ List[ @@ -97,6 +101,7 @@ class SlackAlerting(CustomLogger): Dict ] = None, # if user wants to separate alerts to diff channels alerting_args={}, + default_webhook_url: Optional[str] = None, ): self.alerting_threshold = alerting_threshold self.alerting = alerting @@ -106,6 +111,7 @@ class SlackAlerting(CustomLogger): self.alert_to_webhook_url = alert_to_webhook_url self.is_running = False self.alerting_args = SlackAlertingArgs(**alerting_args) + self.default_webhook_url = default_webhook_url def update_values( self, @@ -302,7 +308,7 @@ class SlackAlerting(CustomLogger): except Exception as e: return 0 - async def send_daily_reports(self, router: litellm.Router) -> bool: + async def send_daily_reports(self, router) -> bool: """ Send a daily report on: - Top 5 deployments with most failed requests @@ -740,6 +746,8 @@ Model Info: and alert_type in self.alert_to_webhook_url ): slack_webhook_url = self.alert_to_webhook_url[alert_type] + elif self.default_webhook_url is not None: + slack_webhook_url = self.default_webhook_url else: slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None) @@ -805,7 +813,7 @@ Model Info: alert_type="llm_exceptions", ) - async def _run_scheduler_helper(self, llm_router: litellm.Router) -> bool: + async def _run_scheduler_helper(self, llm_router) -> bool: """ Returns: - True -> report sent @@ -847,7 +855,7 @@ Model Info: return report_sent_bool - async def _run_scheduled_daily_report(self, llm_router: Optional[litellm.Router]): + async def _run_scheduled_daily_report(self, llm_router: Optional[Any] = None): """ If 'daily_reports' enabled From d46544d2bc83d87df66cd978f887be7c4ecf0b34 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 18:26:45 -0700 Subject: [PATCH 24/32] docs setup alerting on router --- docs/my-website/docs/routing.md | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 2b28b925f..f1f6febec 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -1086,6 +1086,46 @@ async def test_acompletion_caching_on_router_caching_groups(): asyncio.run(test_acompletion_caching_on_router_caching_groups()) ``` +## Alerting 🚨 + +Send alerts to slack / your webhook url for the following events +- LLM API Exceptions +- Slow LLM Responses + +Get a slack webhook url from https://api.slack.com/messaging/webhooks + +#### Usage +Initialize an `AlertingConfig` and pass it to `litellm.Router`. The following code will trigger an alert because `api_key=bad-key` which is invalid + +```python +from litellm.router import AlertingConfig +import litellm +import os + +router = litellm.Router( + model_list=[ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": "bad_key", + }, + } + ], + alerting_config= AlertingConfig( + alerting_threshold=10, # threshold for slow / hanging llm responses (in seconds). Defaults to 300 seconds + webhook_url= os.getenv("SLACK_WEBHOOK_URL") # webhook you want to send alerts to + ), +) +try: + await router.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) +except: + pass +``` + ## Track cost for Azure Deployments **Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking From dc742044276de5e85086b5fbc86ba996d6baf47c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 18:27:49 -0700 Subject: [PATCH 25/32] fix typo --- litellm/types/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/types/router.py b/litellm/types/router.py index f3fa89324..6ab83cec2 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -357,7 +357,7 @@ class AlertingConfig(BaseModel): Args: webhook_url: str - webhook url for alerting, slack provides a webhook url to send alerts to - alerting_threshold: Optional[float] = None - threhshold for slow / hanging llm responses (in seconds) + alerting_threshold: Optional[float] = None - threshold for slow / hanging llm responses (in seconds) """ webhook_url: str From 596adf6e2f313efb591d80c934cb7d988a4d300e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 19:04:25 -0700 Subject: [PATCH 26/32] test - slack alerting on litellm router --- litellm/tests/test_alerting.py | 65 ++++++++++++---------------------- 1 file changed, 22 insertions(+), 43 deletions(-) diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py index 06921d8d6..b3232cae1 100644 --- a/litellm/tests/test_alerting.py +++ b/litellm/tests/test_alerting.py @@ -18,6 +18,10 @@ from unittest.mock import patch, MagicMock from litellm.utils import get_api_base from litellm.caching import DualCache from litellm.integrations.slack_alerting import SlackAlerting, DeploymentMetrics +import unittest.mock +from unittest.mock import AsyncMock +import pytest +from litellm.router import AlertingConfig, Router @pytest.mark.parametrize( @@ -315,61 +319,31 @@ async def test_daily_reports_redis_cache_scheduler(): await slack_alerting._run_scheduler_helper(llm_router=router) -@pytest.mark.asyncio -async def test_send_llm_exception(slack_alerting): - with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert: - litellm.callbacks = [slack_alerting] - - # on async success - router = litellm.Router( - model_list=[ - { - "model_name": "gpt-5", - "litellm_params": { - "model": "gpt-3.5-turbo", - "api_key": "bad_key", - }, - } - ] - ) - try: - await router.acompletion( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "Hey, how's it going?"}], - ) - except: - pass - - await asyncio.sleep(3) - - mock_send_alert.assert_awaited_once() - - @pytest.mark.asyncio @pytest.mark.skip(reason="Local test. Test if slack alerts are sent.") async def test_send_llm_exception_to_slack(): - from litellm.integrations.slack_alerting import SlackAlerting - - new_alerting = SlackAlerting( - alerting_threshold=0.00002, - alerting=["slack"], - alert_types=["llm_exceptions", "llm_requests_hanging", "llm_too_slow"], - ) - - litellm.callbacks = [new_alerting] - litellm.set_verbose = True + from litellm.router import AlertingConfig # on async success router = litellm.Router( model_list=[ { - "model_name": "gpt-5", + "model_name": "gpt-3.5-turbo", "litellm_params": { "model": "gpt-3.5-turbo", "api_key": "bad_key", }, - } - ] + }, + { + "model_name": "gpt-5-good", + "litellm_params": { + "model": "gpt-3.5-turbo", + }, + }, + ], + alerting_config=AlertingConfig( + alerting_threshold=0.5, webhook_url=os.getenv("SLACK_WEBHOOK_URL") + ), ) try: await router.acompletion( @@ -379,4 +353,9 @@ async def test_send_llm_exception_to_slack(): except: pass + await router.acompletion( + model="gpt-5-good", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + await asyncio.sleep(3) From 6e72857cf75677191fbf49a34ae6f548e6daa07c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 19:15:58 -0700 Subject: [PATCH 27/32] fix model cost map --- ...model_prices_and_context_window_backup.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index a6c233b99..10c70a858 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -739,6 +739,24 @@ "litellm_provider": "mistral", "mode": "embedding" }, + "deepseek-chat": { + "max_tokens": 4096, + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "deepseek", + "mode": "chat" + }, + "deepseek-coder": { + "max_tokens": 4096, + "max_input_tokens": 16000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "deepseek", + "mode": "chat" + }, "groq/llama2-70b-4096": { "max_tokens": 4096, "max_input_tokens": 4096, From f45feff13c80f34635c17430aa424594c4d4226f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 19:16:32 -0700 Subject: [PATCH 28/32] =?UTF-8?q?bump:=20version=201.36.1=20=E2=86=92=201.?= =?UTF-8?q?36.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 649916fda..39071a5b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.36.1" +version = "1.36.2" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.36.1" +version = "1.36.2" version_files = [ "pyproject.toml:^version" ] From 21d3407b9571ccdcc607fc8224fc733f938990dc Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 7 May 2024 19:48:46 -0700 Subject: [PATCH 29/32] fix replicate test --- litellm/tests/test_completion.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 471ebfee6..32b65faea 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -2168,9 +2168,9 @@ def test_completion_replicate_vicuna(): def test_replicate_custom_prompt_dict(): litellm.set_verbose = True - model_name = "replicate/meta/llama-2-70b-chat" + model_name = "replicate/meta/llama-2-7b" litellm.register_prompt_template( - model="replicate/meta/llama-2-70b-chat", + model="replicate/meta/llama-2-7b", initial_prompt_value="You are a good assistant", # [OPTIONAL] roles={ "system": { @@ -2200,6 +2200,7 @@ def test_replicate_custom_prompt_dict(): repetition_penalty=0.1, num_retries=3, ) + except litellm.APIError as e: pass except litellm.APIConnectionError as e: @@ -3016,6 +3017,7 @@ async def test_acompletion_gemini(): else: pytest.fail(f"Error occurred: {e}") + # Deepseek tests def test_completion_deepseek(): litellm.set_verbose = True From fbcda918ded37d78daf41086812322cb932e70be Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 7 May 2024 18:29:14 -0700 Subject: [PATCH 30/32] feat(ui/model_dashboard.tsx): show if model is config or db model --- litellm/proxy/proxy_server.py | 21 +++++++++++++++---- litellm/router.py | 14 +++++++++++-- litellm/types/router.py | 5 ++++- .../src/components/model_dashboard.tsx | 10 ++++++++- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index ac9cb56cc..f7b8447f1 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2516,20 +2516,21 @@ class ProxyConfig: router = litellm.Router(**router_params) # type:ignore return router, model_list, general_settings - def get_model_info_with_id(self, model) -> RouterModelInfo: + def get_model_info_with_id(self, model, db_model=False) -> RouterModelInfo: """ Common logic across add + delete router models Parameters: - deployment + - db_model -> flag for differentiating model stored in db vs. config -> used on UI Return model info w/ id """ if model.model_info is not None and isinstance(model.model_info, dict): if "id" not in model.model_info: model.model_info["id"] = model.model_id - _model_info = RouterModelInfo(**model.model_info) + _model_info = RouterModelInfo(**model.model_info, db_model=db_model) else: - _model_info = RouterModelInfo(id=model.model_id) + _model_info = RouterModelInfo(id=model.model_id, db_model=db_model) return _model_info async def _delete_deployment(self, db_models: list) -> int: @@ -2624,7 +2625,9 @@ class ProxyConfig: f"Invalid model added to proxy db. Invalid litellm params. litellm_params={_litellm_params}" ) continue # skip to next model - _model_info = self.get_model_info_with_id(model=m) + _model_info = self.get_model_info_with_id( + model=m, db_model=True + ) ## 👈 FLAG = True for db_models added = llm_router.add_deployment( deployment=Deployment( @@ -7449,6 +7452,16 @@ async def update_model( ) ) if _existing_litellm_params is None: + if ( + llm_router is not None + and llm_router.get_deployment(model_id=_model_id) is not None + ): + raise HTTPException( + status_code=400, + detail={ + "error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit." + }, + ) raise Exception("model not found") _existing_litellm_params_dict = dict( _existing_litellm_params.litellm_params diff --git a/litellm/router.py b/litellm/router.py index ddb006d52..e2e1d3409 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2595,11 +2595,21 @@ class Router: except: return None - def get_deployment(self, model_id: str): + def get_deployment(self, model_id: str) -> Optional[Deployment]: + """ + Returns -> Deployment or None + + Raise Exception -> if model found in invalid format + """ for model in self.model_list: if "model_info" in model and "id" in model["model_info"]: if model_id == model["model_info"]["id"]: - return model + if isinstance(model, dict): + return Deployment(**model) + elif isinstance(model, Deployment): + return model + else: + raise Exception("Model invalid format - {}".format(type(model))) return None def get_model_info(self, id: str) -> Optional[dict]: diff --git a/litellm/types/router.py b/litellm/types/router.py index 6ab83cec2..07d16a37a 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -1,6 +1,6 @@ from typing import List, Optional, Union, Dict, Tuple, Literal import httpx -from pydantic import BaseModel, validator +from pydantic import BaseModel, validator, Field from .completion import CompletionRequest from .embedding import EmbeddingRequest import uuid, enum @@ -70,6 +70,9 @@ class ModelInfo(BaseModel): id: Optional[ str ] # Allow id to be optional on input, but it will always be present as a str in the model instance + db_model: bool = ( + False # used for proxy - to separate models which are stored in the db vs. config. + ) def __init__(self, id: Optional[Union[str, int]] = None, **params): if id is None: diff --git a/ui/litellm-dashboard/src/components/model_dashboard.tsx b/ui/litellm-dashboard/src/components/model_dashboard.tsx index 6e207b430..d66a73485 100644 --- a/ui/litellm-dashboard/src/components/model_dashboard.tsx +++ b/ui/litellm-dashboard/src/components/model_dashboard.tsx @@ -37,7 +37,7 @@ import { Badge, BadgeDelta, Button } from "@tremor/react"; import RequestAccess from "./request_model_access"; import { Typography } from "antd"; import TextArea from "antd/es/input/TextArea"; -import { InformationCircleIcon, PencilAltIcon, PencilIcon, StatusOnlineIcon, TrashIcon, RefreshIcon } from "@heroicons/react/outline"; +import { InformationCircleIcon, PencilAltIcon, PencilIcon, StatusOnlineIcon, TrashIcon, RefreshIcon, CheckCircleIcon, XCircleIcon } from "@heroicons/react/outline"; import DeleteModelButton from "./delete_model_button"; const { Title: Title2, Link } = Typography; import { UploadOutlined } from '@ant-design/icons'; @@ -921,6 +921,7 @@ const handleEditSubmit = async (formValues: Record) => { Input Price per token ($) Output Price per token ($) Max Tokens + Status @@ -929,6 +930,7 @@ const handleEditSubmit = async (formValues: Record) => { selectedModelGroup === "all" || model.model_name === selectedModelGroup || selectedModelGroup === null || selectedModelGroup === undefined || selectedModelGroup === "" ) .map((model: any, index: number) => ( + {model.model_name} @@ -958,6 +960,12 @@ const handleEditSubmit = async (formValues: Record) => { {model.input_cost} {model.output_cost} {model.max_tokens} + + { + model.model_info.db_model ? DB Model : Config Model + } + + Date: Tue, 7 May 2024 21:34:08 -0700 Subject: [PATCH 31/32] docs(routing.md): make clear lowest cost routing is async --- docs/my-website/docs/routing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index f1f6febec..0b0c7713c 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -468,7 +468,7 @@ asyncio.run(router_acompletion()) ``` - + Picks a deployment based on the lowest cost From 303e0c622609191e3f592e50149e12ecb6473911 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Tue, 7 May 2024 21:42:18 -0700 Subject: [PATCH 32/32] Revert "* feat(factory.py): add support for merging consecutive messages of one role when separated with empty message of another role" --- .gitignore | 1 - litellm/llms/prompt_templates/factory.py | 56 ++++++++++-------------- litellm/tests/test_completion.py | 38 ++-------------- 3 files changed, 27 insertions(+), 68 deletions(-) diff --git a/.gitignore b/.gitignore index 4f3f65b93..abc4ecb0c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ .venv -venv .env litellm_uuid.txt __pycache__/ diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index bce472ea0..082030368 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1,23 +1,27 @@ -import json -import re -import traceback -import uuid -import xml.etree.ElementTree as ET from enum import Enum -from typing import Any, List, Mapping, MutableMapping, Optional, Sequence - -import requests -from jinja2 import BaseLoader, Template, exceptions, meta +import requests, traceback +import json, re, xml.etree.ElementTree as ET +from jinja2 import Template, exceptions, meta, BaseLoader from jinja2.sandbox import ImmutableSandboxedEnvironment - +from typing import ( + Any, + List, + Mapping, + MutableMapping, + Optional, + Sequence, +) import litellm -from litellm.types.completion import (ChatCompletionFunctionMessageParam, - ChatCompletionMessageParam, - ChatCompletionMessageToolCallParam, - ChatCompletionSystemMessageParam, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam) +from litellm.types.completion import ( + ChatCompletionUserMessageParam, + ChatCompletionSystemMessageParam, + ChatCompletionMessageParam, + ChatCompletionFunctionMessageParam, + ChatCompletionMessageToolCallParam, + ChatCompletionToolMessageParam, +) from litellm.types.llms.anthropic import * +import uuid def default_pt(messages): @@ -599,9 +603,8 @@ def construct_tool_use_system_prompt( def convert_url_to_base64(url): - import base64 - import requests + import base64 for _ in range(3): try: @@ -981,7 +984,6 @@ def anthropic_messages_pt(messages: list): new_messages = [] msg_i = 0 tool_use_param = False - merge_with_previous = False while msg_i < len(messages): user_content = [] init_msg_i = msg_i @@ -1014,13 +1016,7 @@ def anthropic_messages_pt(messages: list): msg_i += 1 if user_content: - if merge_with_previous: - new_messages[-1]["content"].extend(user_content) - merge_with_previous = False - else: - new_messages.append({"role": "user", "content": user_content}) - elif msg_i > 0: - merge_with_previous = True + new_messages.append({"role": "user", "content": user_content}) assistant_content = [] ## MERGE CONSECUTIVE ASSISTANT CONTENT ## @@ -1048,13 +1044,7 @@ def anthropic_messages_pt(messages: list): msg_i += 1 if assistant_content: - if merge_with_previous: - new_messages[-1]["content"].extend(assistant_content) - merge_with_previous = False - else: - new_messages.append({"role": "assistant", "content": assistant_content}) - elif msg_i > 0: - merge_with_previous = True + new_messages.append({"role": "assistant", "content": assistant_content}) if msg_i == init_msg_i: # prevent infinite loops raise Exception( diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 6e58a46b8..32b65faea 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1,21 +1,17 @@ -import os -import sys +import sys, os import traceback - from dotenv import load_dotenv load_dotenv() -import io -import os +import os, io sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the, system path import pytest - import litellm -from litellm import (RateLimitError, Timeout, completion, completion_cost, - embedding) +from litellm import embedding, completion, completion_cost, Timeout +from litellm import RateLimitError from litellm.llms.prompt_templates.factory import anthropic_messages_pt # litellm.num_retries=3 @@ -167,32 +163,6 @@ def test_completion_claude_3(): pytest.fail(f"Error occurred: {e}") -def test_completion_claude_3_empty_message(): - litellm.set_verbose = True - messages = [{'role': 'user', 'content': 'please create a logo for a modern AI app. create in SVG format'}, - {'role': 'assistant', 'content': "To create a logo for a modern AI app in SVG format, I'll use the DALL-E 3 Image Generator."}, - {'role': 'user', 'content': 'output SVG'}, - {'role': 'assistant', 'content': 'To generate a logo for a modern AI app in SVG format using DALL-E 3, I need to:\n1. Craft a detailed prompt describing the desired logo style and elements\n2. Specify the image size (SVG is vector-based, so size is less relevant)\n3. Call the generate_image function with the prompt and size\n4. Display the generated SVG logo using the provided syntax\nThe prompt should include keywords related to AI, modern design, and SVG format. Some key elements to incorporate could be a brain symbol, circuit lines, or a robot icon, using a minimalist style and a blue color scheme often associated with technology and intelligence.', - 'tool_calls': [ - {'id': 'toolu_01KEUtRVySSeMrf3g7rCA12E', 'type': 'function', 'function': {'name': 'python_tool', 'arguments': '{"code": "...python code..."}'}} - ]}, - {'role': 'tool', 'content': '...python output...', 'tool_call_id': 'toolu_01KEUtRVySSeMrf3g7rCA12E'}, - {'role': 'assistant', 'content': ''}, # empty message appended by model after tool call response - {'role': 'user', 'content': 'write SVG source youself!'}, - ] - - try: - response = completion( - model="anthropic/claude-3-opus-20240229", - messages=messages, - stream=True, - tools=[{'type': 'function', 'function': {'name': 'python_tool', 'description': 'Execute code', 'parameters': {'type': 'object', 'properties': {'headline': {'description': 'Must have. Title of this tool call (maximum 15 characters).', 'type': 'string'}, 'code': {'description': 'Python code to execute.', 'type': 'string'}}, 'required': ['code', 'headline']}}}] - ) - print(response) - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - def test_completion_claude_3_function_call(): litellm.set_verbose = True tools = [