mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Litellm dev 2024 12 19 p3 (#7322)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 13s
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 13s
* fix(utils.py): remove unsupported optional params (if drop_params=True) before passing into map openai params Fixes https://github.com/BerriAI/litellm/issues/7242 * test: new test for langfuse prompt management hook Addresses https://github.com/BerriAI/litellm/issues/3893#issuecomment-2549080296 * feat(main.py): add 'get_chat_completion_prompt' customlogger hook allows for langfuse prompt management Addresses https://github.com/BerriAI/litellm/issues/3893#issuecomment-2549080296 * feat(langfuse_prompt_management.py): working e2e langfuse prompt management works with `langfuse/` route * feat(main.py): initial tracing for dynamic langfuse params allows admin to specify langfuse keys by model in model_list * feat(main.py): support passing langfuse credentials dynamically * fix(langfuse_prompt_management.py): create langfuse client based on dynamic callback params allows dynamic langfuse params to work * fix: fix linting errors * docs(prompt_management.md): refactor docs for sdk + proxy prompt management tutorial * docs(prompt_management.md): cleanup doc * docs: cleanup topnav * docs(prompt_management.md): update docs to be easier to use * fix: remove unused imports * docs(prompt_management.md): add architectural overview doc * fix(litellm_logging.py): fix dynamic param passing * fix(langfuse_prompt_management.py): fix linting errors * fix: fix linting errors * fix: use typing_extensions for typealias to ensure python3.8 compatibility * test: use stream_options in test to account for tiktoken diff * fix: improve import error message, and check run test earlier
This commit is contained in:
parent
2c36f25ae1
commit
27a4d08604
17 changed files with 648 additions and 260 deletions
|
@ -1,12 +1,163 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Prompt Management
|
||||
|
||||
LiteLLM supports using [Langfuse](https://langfuse.com/docs/prompts/get-started) for prompt management on the proxy.
|
||||
Run experiments or change the specific model (e.g. from gpt-4o to gpt4o-mini finetune) from your prompt management tool (e.g. Langfuse) instead of making changes in the application.
|
||||
|
||||
Supported Integrations:
|
||||
- [Langfuse](https://langfuse.com/docs/prompts/get-started)
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. Add Langfuse as a 'callback' in your config.yaml
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
import os
|
||||
import litellm
|
||||
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = "public_key" # [OPTIONAL] set here or in `.completion`
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = "secret_key" # [OPTIONAL] set here or in `.completion`
|
||||
|
||||
litellm.set_verbose = True # see raw request to provider
|
||||
|
||||
resp = litellm.completion(
|
||||
model="langfuse/gpt-3.5-turbo",
|
||||
prompt_id="test-chat-prompt",
|
||||
prompt_variables={"user_message": "this is used"}, # [OPTIONAL]
|
||||
messages=[{"role": "user", "content": "<IGNORED>"}],
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: langfuse/gpt-3.5-turbo
|
||||
prompt_id: "<langfuse_prompt_id>"
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
```
|
||||
|
||||
2. Start the proxy
|
||||
|
||||
```bash
|
||||
litellm --config config.yaml --detailed_debug
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="curl" label="CURL">
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "THIS WILL BE IGNORED"
|
||||
}
|
||||
],
|
||||
"prompt_variables": {
|
||||
"key": "this is used"
|
||||
}
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="OpenAI Python SDK" label="OpenAI Python SDK">
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
extra_body={
|
||||
"prompt_variables": { # [OPTIONAL]
|
||||
"key": "this is used"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
**Expected Logs:**
|
||||
|
||||
```
|
||||
POST Request Sent from LiteLLM:
|
||||
curl -X POST \
|
||||
https://api.openai.com/v1/ \
|
||||
-d '{'model': 'gpt-3.5-turbo', 'messages': <YOUR LANGFUSE PROMPT TEMPLATE>}'
|
||||
```
|
||||
|
||||
## How to set model
|
||||
|
||||
### Set the model on LiteLLM
|
||||
|
||||
You can do `langfuse/<litellm_model_name>`
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
litellm.completion(
|
||||
model="langfuse/gpt-3.5-turbo", # or `langfuse/anthropic/claude-3-5-sonnet`
|
||||
...
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: langfuse/gpt-3.5-turbo # OR langfuse/anthropic/claude-3-5-sonnet
|
||||
prompt_id: <langfuse_prompt_id>
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Set the model in Langfuse
|
||||
|
||||
If the model is specified in the Langfuse config, it will be used.
|
||||
|
||||
<Image img={require('../../img/langfuse_prompt_management_model_config.png')} />
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
|
@ -15,43 +166,17 @@ model_list:
|
|||
model: azure/chatgpt-v-2
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["langfuse"] # 👈 KEY CHANGE
|
||||
```
|
||||
|
||||
2. Start the proxy
|
||||
## What is 'prompt_variables'?
|
||||
|
||||
```bash
|
||||
litellm-proxy --config config.yaml
|
||||
```
|
||||
- `prompt_variables`: A dictionary of variables that will be used to replace parts of the prompt.
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "THIS WILL BE IGNORED"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"langfuse_prompt_id": "value",
|
||||
"langfuse_prompt_variables": { # [OPTIONAL]
|
||||
"key": "value"
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
## What is 'langfuse_prompt_id'?
|
||||
## What is 'prompt_id'?
|
||||
|
||||
- `langfuse_prompt_id`: The ID of the prompt that will be used for the request.
|
||||
- `prompt_id`: The ID of the prompt that will be used for the request.
|
||||
|
||||
<Image img={require('../../img/langfuse_prompt_id.png')} />
|
||||
|
||||
|
@ -59,25 +184,29 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
|||
|
||||
### `/chat/completions` messages
|
||||
|
||||
The message will be added to the start of the prompt.
|
||||
The `messages` field sent in by the client is ignored.
|
||||
|
||||
- if the Langfuse prompt is a list, it will be added to the start of the messages list (assuming it's an OpenAI compatible message).
|
||||
The Langfuse prompt will replace the `messages` field.
|
||||
|
||||
- if the Langfuse prompt is a string, it will be added as a system message.
|
||||
To replace parts of the prompt, use the `prompt_variables` field. [See how prompt variables are used](https://github.com/BerriAI/litellm/blob/017f83d038f85f93202a083cf334de3544a3af01/litellm/integrations/langfuse/langfuse_prompt_management.py#L127)
|
||||
|
||||
If the Langfuse prompt is a string, it will be sent as a user message (not all providers support system messages).
|
||||
|
||||
If the Langfuse prompt is a list, it will be sent as is (Langfuse chat prompts are OpenAI compatible).
|
||||
|
||||
## Architectural Overview
|
||||
|
||||
<Image img={require('../../img/prompt_management_architecture_doc.png')} />
|
||||
|
||||
## API Reference
|
||||
|
||||
These are the params you can pass to the `litellm.completion` function in SDK and `litellm_params` in config.yaml
|
||||
|
||||
```python
|
||||
if isinstance(compiled_prompt, list):
|
||||
data["messages"] = compiled_prompt + data["messages"]
|
||||
else:
|
||||
data["messages"] = [
|
||||
{"role": "system", "content": compiled_prompt}
|
||||
] + data["messages"]
|
||||
```
|
||||
|
||||
### `/completions` messages
|
||||
|
||||
The message will be added to the start of the prompt.
|
||||
|
||||
```python
|
||||
data["prompt"] = compiled_prompt + "\n" + data["prompt"]
|
||||
```
|
||||
prompt_id: str # required
|
||||
prompt_variables: Optional[dict] # optional
|
||||
langfuse_public_key: Optional[str] # optional
|
||||
langfuse_secret: Optional[str] # optional
|
||||
langfuse_secret_key: Optional[str] # optional
|
||||
langfuse_host: Optional[str] # optional
|
||||
```
|
||||
|
|
|
@ -130,15 +130,7 @@ const config = {
|
|||
href: 'https://discord.com/invite/wuPM9dRgDw',
|
||||
label: 'Discord',
|
||||
position: 'right',
|
||||
},
|
||||
{
|
||||
type: 'html',
|
||||
position: 'right',
|
||||
value:
|
||||
`<a href=# class=navbar__link data-fr-widget>
|
||||
I'm Confused
|
||||
</a>`
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
footer: {
|
||||
|
|
BIN
docs/my-website/img/langfuse_prompt_management_model_config.png
Normal file
BIN
docs/my-website/img/langfuse_prompt_management_model_config.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 95 KiB |
BIN
docs/my-website/img/prompt_management_architecture_doc.png
Normal file
BIN
docs/my-website/img/prompt_management_architecture_doc.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 184 KiB |
|
@ -135,7 +135,6 @@ const sidebars = {
|
|||
"oidc"
|
||||
]
|
||||
},
|
||||
"proxy/prompt_management",
|
||||
"proxy/caching",
|
||||
"proxy/call_hooks",
|
||||
"proxy/rules",
|
||||
|
@ -228,6 +227,7 @@ const sidebars = {
|
|||
"completion/batching",
|
||||
"completion/mock_requests",
|
||||
"completion/reliable_completions",
|
||||
'tutorials/litellm_proxy_aporia',
|
||||
|
||||
]
|
||||
},
|
||||
|
@ -309,8 +309,29 @@ const sidebars = {
|
|||
label: "LangChain, LlamaIndex, Instructor Integration",
|
||||
items: ["langchain/langchain", "tutorials/instructor"],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Tutorials",
|
||||
items: [
|
||||
|
||||
'tutorials/azure_openai',
|
||||
'tutorials/instructor',
|
||||
"tutorials/gradio_integration",
|
||||
"tutorials/huggingface_codellama",
|
||||
"tutorials/huggingface_tutorial",
|
||||
"tutorials/TogetherAI_liteLLM",
|
||||
"tutorials/finetuned_chat_gpt",
|
||||
"tutorials/text_completion",
|
||||
"tutorials/first_playground",
|
||||
"tutorials/model_fallbacks",
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
id: "proxy/prompt_management"
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Load Testing",
|
||||
|
@ -362,23 +383,7 @@ const sidebars = {
|
|||
"observability/opik_integration",
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Tutorials",
|
||||
items: [
|
||||
'tutorials/litellm_proxy_aporia',
|
||||
'tutorials/azure_openai',
|
||||
'tutorials/instructor',
|
||||
"tutorials/gradio_integration",
|
||||
"tutorials/huggingface_codellama",
|
||||
"tutorials/huggingface_tutorial",
|
||||
"tutorials/TogetherAI_liteLLM",
|
||||
"tutorials/finetuned_chat_gpt",
|
||||
"tutorials/text_completion",
|
||||
"tutorials/first_playground",
|
||||
"tutorials/model_fallbacks",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
type: "category",
|
||||
label: "Extras",
|
||||
|
|
|
@ -14,6 +14,7 @@ from litellm.types.utils import (
|
|||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
ModelResponse,
|
||||
StandardCallbackDynamicParams,
|
||||
StandardLoggingPayload,
|
||||
)
|
||||
|
||||
|
@ -60,6 +61,26 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
|||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
pass
|
||||
|
||||
#### PROMPT MANAGEMENT HOOKS ####
|
||||
|
||||
def get_chat_completion_prompt(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
headers: dict,
|
||||
prompt_id: str,
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
"""
|
||||
Returns:
|
||||
- model: str - the model to use (can be pulled from prompt management tool)
|
||||
- messages: List[AllMessageValues] - the messages to use (can be pulled from prompt management tool)
|
||||
- non_default_params: dict - update with any optional params (e.g. temperature, max_tokens, etc.) to use (can be pulled from prompt management tool)
|
||||
"""
|
||||
return model, messages, non_default_params
|
||||
|
||||
#### PRE-CALL CHECKS - router/proxy only ####
|
||||
"""
|
||||
Allows usage-based-routing-v2 to run pre-call rpm checks within the picked deployment's semaphore (concurrency-safe tpm/rpm checks).
|
||||
|
|
|
@ -3,16 +3,93 @@ Call Hook for LiteLLM Proxy which allows Langfuse prompt management.
|
|||
"""
|
||||
|
||||
import os
|
||||
import traceback
|
||||
from typing import Literal, Optional, Union
|
||||
from functools import lru_cache
|
||||
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union, cast
|
||||
|
||||
from packaging.version import Version
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching.dual_cache import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.secret_managers.main import str_to_bool
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
from litellm.types.utils import StandardCallbackDynamicParams
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langfuse import Langfuse
|
||||
from langfuse.client import ChatPromptClient, TextPromptClient
|
||||
|
||||
LangfuseClass: TypeAlias = Langfuse
|
||||
|
||||
PROMPT_CLIENT = Union[TextPromptClient, ChatPromptClient]
|
||||
else:
|
||||
PROMPT_CLIENT = Any
|
||||
LangfuseClass = Any
|
||||
|
||||
|
||||
@lru_cache(maxsize=10)
|
||||
def langfuse_client_init(
|
||||
langfuse_public_key=None,
|
||||
langfuse_secret=None,
|
||||
langfuse_host=None,
|
||||
flush_interval=1,
|
||||
) -> LangfuseClass:
|
||||
"""
|
||||
Initialize Langfuse client with caching to prevent multiple initializations.
|
||||
|
||||
Args:
|
||||
langfuse_public_key (str, optional): Public key for Langfuse. Defaults to None.
|
||||
langfuse_secret (str, optional): Secret key for Langfuse. Defaults to None.
|
||||
langfuse_host (str, optional): Host URL for Langfuse. Defaults to None.
|
||||
flush_interval (int, optional): Flush interval in seconds. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
Langfuse: Initialized Langfuse client instance
|
||||
|
||||
Raises:
|
||||
Exception: If langfuse package is not installed
|
||||
"""
|
||||
try:
|
||||
import langfuse
|
||||
from langfuse import Langfuse
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"\033[91mLangfuse not installed, try running 'pip install langfuse' to fix this error: {e}\n\033[0m"
|
||||
)
|
||||
|
||||
# Instance variables
|
||||
secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY")
|
||||
public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY")
|
||||
langfuse_host = langfuse_host or os.getenv(
|
||||
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
||||
)
|
||||
|
||||
if not (
|
||||
langfuse_host.startswith("http://") or langfuse_host.startswith("https://")
|
||||
):
|
||||
# add http:// if unset, assume communicating over private network - e.g. render
|
||||
langfuse_host = "http://" + langfuse_host
|
||||
|
||||
langfuse_release = os.getenv("LANGFUSE_RELEASE")
|
||||
langfuse_debug = os.getenv("LANGFUSE_DEBUG")
|
||||
langfuse_flush_interval = os.getenv("LANGFUSE_FLUSH_INTERVAL") or flush_interval
|
||||
|
||||
parameters = {
|
||||
"public_key": public_key,
|
||||
"secret_key": secret_key,
|
||||
"host": langfuse_host,
|
||||
"release": langfuse_release,
|
||||
"debug": langfuse_debug,
|
||||
"flush_interval": langfuse_flush_interval, # flush interval in seconds
|
||||
}
|
||||
|
||||
if Version(langfuse.version.__version__) >= Version("2.6.0"):
|
||||
parameters["sdk_integration"] = "litellm"
|
||||
|
||||
client = Langfuse(**parameters)
|
||||
|
||||
return client
|
||||
|
||||
|
||||
class LangfusePromptManagement(CustomLogger):
|
||||
|
@ -23,102 +100,42 @@ class LangfusePromptManagement(CustomLogger):
|
|||
langfuse_host=None,
|
||||
flush_interval=1,
|
||||
):
|
||||
try:
|
||||
import langfuse
|
||||
from langfuse import Langfuse
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"\033[91mLangfuse not installed, try running 'pip install langfuse' to fix this error: {e}\n{traceback.format_exc()}\033[0m"
|
||||
)
|
||||
# Instance variables
|
||||
self.secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY")
|
||||
self.public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY")
|
||||
self.langfuse_host = langfuse_host or os.getenv(
|
||||
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
||||
)
|
||||
if not (
|
||||
self.langfuse_host.startswith("http://")
|
||||
or self.langfuse_host.startswith("https://")
|
||||
):
|
||||
# add http:// if unset, assume communicating over private network - e.g. render
|
||||
self.langfuse_host = "http://" + self.langfuse_host
|
||||
self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
|
||||
self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
|
||||
self.langfuse_flush_interval = (
|
||||
os.getenv("LANGFUSE_FLUSH_INTERVAL") or flush_interval
|
||||
self.Langfuse = langfuse_client_init(
|
||||
langfuse_public_key=langfuse_public_key,
|
||||
langfuse_secret=langfuse_secret,
|
||||
langfuse_host=langfuse_host,
|
||||
flush_interval=flush_interval,
|
||||
)
|
||||
|
||||
parameters = {
|
||||
"public_key": self.public_key,
|
||||
"secret_key": self.secret_key,
|
||||
"host": self.langfuse_host,
|
||||
"release": self.langfuse_release,
|
||||
"debug": self.langfuse_debug,
|
||||
"flush_interval": self.langfuse_flush_interval, # flush interval in seconds
|
||||
}
|
||||
|
||||
if Version(langfuse.version.__version__) >= Version("2.6.0"):
|
||||
parameters["sdk_integration"] = "litellm"
|
||||
|
||||
self.Langfuse = Langfuse(**parameters)
|
||||
|
||||
# set the current langfuse project id in the environ
|
||||
# this is used by Alerting to link to the correct project
|
||||
try:
|
||||
project_id = self.Langfuse.client.projects.get().data[0].id
|
||||
os.environ["LANGFUSE_PROJECT_ID"] = project_id
|
||||
except Exception:
|
||||
project_id = None
|
||||
|
||||
if os.getenv("UPSTREAM_LANGFUSE_SECRET_KEY") is not None:
|
||||
upstream_langfuse_debug = (
|
||||
str_to_bool(self.upstream_langfuse_debug)
|
||||
if self.upstream_langfuse_debug is not None
|
||||
else None
|
||||
)
|
||||
self.upstream_langfuse_secret_key = os.getenv(
|
||||
"UPSTREAM_LANGFUSE_SECRET_KEY"
|
||||
)
|
||||
self.upstream_langfuse_public_key = os.getenv(
|
||||
"UPSTREAM_LANGFUSE_PUBLIC_KEY"
|
||||
)
|
||||
self.upstream_langfuse_host = os.getenv("UPSTREAM_LANGFUSE_HOST")
|
||||
self.upstream_langfuse_release = os.getenv("UPSTREAM_LANGFUSE_RELEASE")
|
||||
self.upstream_langfuse_debug = os.getenv("UPSTREAM_LANGFUSE_DEBUG")
|
||||
self.upstream_langfuse = Langfuse(
|
||||
public_key=self.upstream_langfuse_public_key,
|
||||
secret_key=self.upstream_langfuse_secret_key,
|
||||
host=self.upstream_langfuse_host,
|
||||
release=self.upstream_langfuse_release,
|
||||
debug=(
|
||||
upstream_langfuse_debug
|
||||
if upstream_langfuse_debug is not None
|
||||
else False
|
||||
),
|
||||
)
|
||||
else:
|
||||
self.upstream_langfuse = None
|
||||
def _get_prompt_from_id(
|
||||
self, langfuse_prompt_id: str, langfuse_client: LangfuseClass
|
||||
) -> PROMPT_CLIENT:
|
||||
return langfuse_client.get_prompt(langfuse_prompt_id)
|
||||
|
||||
def _compile_prompt(
|
||||
self,
|
||||
metadata: dict,
|
||||
langfuse_prompt_client: PROMPT_CLIENT,
|
||||
langfuse_prompt_variables: Optional[dict],
|
||||
call_type: Union[Literal["completion"], Literal["text_completion"]],
|
||||
) -> Optional[Union[str, list]]:
|
||||
compiled_prompt: Optional[Union[str, list]] = None
|
||||
if isinstance(metadata, dict):
|
||||
langfuse_prompt_id = metadata.get("langfuse_prompt_id")
|
||||
|
||||
langfuse_prompt_variables = metadata.get("langfuse_prompt_variables") or {}
|
||||
if (
|
||||
langfuse_prompt_id
|
||||
and isinstance(langfuse_prompt_id, str)
|
||||
and isinstance(langfuse_prompt_variables, dict)
|
||||
):
|
||||
langfuse_prompt = self.Langfuse.get_prompt(langfuse_prompt_id)
|
||||
compiled_prompt = langfuse_prompt.compile(**langfuse_prompt_variables)
|
||||
if langfuse_prompt_variables is None:
|
||||
langfuse_prompt_variables = {}
|
||||
|
||||
compiled_prompt = langfuse_prompt_client.compile(**langfuse_prompt_variables)
|
||||
|
||||
return compiled_prompt
|
||||
|
||||
def _get_model_from_prompt(
|
||||
self, langfuse_prompt_client: PROMPT_CLIENT, model: str
|
||||
) -> str:
|
||||
config = langfuse_prompt_client.config
|
||||
if "model" in config:
|
||||
return config["model"]
|
||||
else:
|
||||
return model.replace("langfuse/", "")
|
||||
|
||||
async def async_pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
|
@ -137,9 +154,29 @@ class LangfusePromptManagement(CustomLogger):
|
|||
) -> Union[Exception, str, dict, None]:
|
||||
|
||||
metadata = data.get("metadata") or {}
|
||||
|
||||
if isinstance(metadata, dict):
|
||||
langfuse_prompt_id = cast(Optional[str], metadata.get("langfuse_prompt_id"))
|
||||
|
||||
langfuse_prompt_variables = cast(
|
||||
Optional[dict], metadata.get("langfuse_prompt_variables") or {}
|
||||
)
|
||||
else:
|
||||
return None
|
||||
|
||||
if langfuse_prompt_id is None:
|
||||
return None
|
||||
|
||||
prompt_client = self._get_prompt_from_id(
|
||||
langfuse_prompt_id=langfuse_prompt_id, langfuse_client=self.Langfuse
|
||||
)
|
||||
compiled_prompt: Optional[Union[str, list]] = None
|
||||
if call_type == "completion" or call_type == "text_completion":
|
||||
compiled_prompt = self._compile_prompt(metadata, call_type)
|
||||
compiled_prompt = self._compile_prompt(
|
||||
langfuse_prompt_client=prompt_client,
|
||||
langfuse_prompt_variables=langfuse_prompt_variables,
|
||||
call_type=call_type,
|
||||
)
|
||||
if compiled_prompt is None:
|
||||
return await super().async_pre_call_hook(
|
||||
user_api_key_dict, cache, data, call_type
|
||||
|
@ -161,3 +198,53 @@ class LangfusePromptManagement(CustomLogger):
|
|||
return await super().async_pre_call_hook(
|
||||
user_api_key_dict, cache, data, call_type
|
||||
)
|
||||
|
||||
def get_chat_completion_prompt(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
headers: dict,
|
||||
prompt_id: str,
|
||||
prompt_variables: Optional[dict],
|
||||
dynamic_callback_params: StandardCallbackDynamicParams,
|
||||
) -> Tuple[
|
||||
str,
|
||||
List[AllMessageValues],
|
||||
dict,
|
||||
]:
|
||||
if prompt_id is None:
|
||||
raise ValueError(
|
||||
"Langfuse prompt id is required. Pass in as parameter 'langfuse_prompt_id'"
|
||||
)
|
||||
langfuse_client = langfuse_client_init(
|
||||
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
||||
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
||||
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
||||
)
|
||||
langfuse_prompt_client = self._get_prompt_from_id(
|
||||
langfuse_prompt_id=prompt_id, langfuse_client=langfuse_client
|
||||
)
|
||||
|
||||
## SET PROMPT
|
||||
compiled_prompt = self._compile_prompt(
|
||||
langfuse_prompt_client=langfuse_prompt_client,
|
||||
langfuse_prompt_variables=prompt_variables,
|
||||
call_type="completion",
|
||||
)
|
||||
|
||||
if compiled_prompt is None:
|
||||
raise ValueError(f"Langfuse prompt not found. Prompt id={prompt_id}")
|
||||
if isinstance(compiled_prompt, list):
|
||||
messages = compiled_prompt
|
||||
elif isinstance(compiled_prompt, str):
|
||||
messages = [{"role": "user", "content": compiled_prompt}]
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Langfuse prompt is not a list or string. Prompt id={prompt_id}, compiled_prompt type={type(compiled_prompt)}"
|
||||
)
|
||||
|
||||
## SET MODEL
|
||||
model = self._get_model_from_prompt(langfuse_prompt_client, model)
|
||||
|
||||
return model, messages, non_default_params
|
||||
|
|
|
@ -34,7 +34,7 @@ from litellm.litellm_core_utils.redact_messages import (
|
|||
redact_message_input_output_from_custom_logger,
|
||||
redact_message_input_output_from_logging,
|
||||
)
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.llms.openai import AllMessageValues, HttpxBinaryResponseContent
|
||||
from litellm.types.rerank import RerankResponse
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
from litellm.types.utils import (
|
||||
|
@ -425,6 +425,40 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
if "custom_llm_provider" in self.model_call_details:
|
||||
self.custom_llm_provider = self.model_call_details["custom_llm_provider"]
|
||||
|
||||
def get_chat_completion_prompt(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
non_default_params: dict,
|
||||
headers: dict,
|
||||
prompt_id: str,
|
||||
prompt_variables: Optional[dict],
|
||||
) -> Tuple[str, List[AllMessageValues], dict]:
|
||||
for (
|
||||
custom_logger_compatible_callback
|
||||
) in litellm._known_custom_logger_compatible_callbacks:
|
||||
if model.startswith(custom_logger_compatible_callback):
|
||||
custom_logger = _init_custom_logger_compatible_class(
|
||||
logging_integration=custom_logger_compatible_callback,
|
||||
internal_usage_cache=None,
|
||||
llm_router=None,
|
||||
)
|
||||
if custom_logger is None:
|
||||
continue
|
||||
model, messages, non_default_params = (
|
||||
custom_logger.get_chat_completion_prompt(
|
||||
model=model,
|
||||
messages=messages,
|
||||
non_default_params=non_default_params,
|
||||
headers=headers,
|
||||
prompt_id=prompt_id,
|
||||
prompt_variables=prompt_variables,
|
||||
dynamic_callback_params=self.standard_callback_dynamic_params,
|
||||
)
|
||||
)
|
||||
|
||||
return model, messages, non_default_params
|
||||
|
||||
def _pre_call(self, input, api_key, model=None, additional_args={}):
|
||||
"""
|
||||
Common helper function across the sync + async pre-call function
|
||||
|
|
|
@ -96,10 +96,10 @@ class VertexAIPartnerModels(VertexBase):
|
|||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
||||
VertexLLM,
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
raise VertexAIError(
|
||||
status_code=400,
|
||||
message="""vertexai import failed please run `pip install -U "google-cloud-aiplatform>=1.38"`""",
|
||||
message=f"""vertexai import failed please run `pip install -U "google-cloud-aiplatform>=1.38"`. Got error: {e}""",
|
||||
)
|
||||
|
||||
if not (
|
||||
|
|
|
@ -75,11 +75,11 @@ class VertexAIModelGardenModels(VertexBase):
|
|||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
||||
VertexLLM,
|
||||
)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
|
||||
raise VertexAIError(
|
||||
status_code=400,
|
||||
message="""vertexai import failed please run `pip install -U "google-cloud-aiplatform>=1.38"`""",
|
||||
message=f"""vertexai import failed please run `pip install -U "google-cloud-aiplatform>=1.38"`. Got error: {e}""",
|
||||
)
|
||||
|
||||
if not (
|
||||
|
|
|
@ -846,6 +846,9 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
client = kwargs.get("client", None)
|
||||
### Admin Controls ###
|
||||
no_log = kwargs.get("no-log", False)
|
||||
### PROMPT MANAGEMENT ###
|
||||
prompt_id = cast(Optional[str], kwargs.get("prompt_id", None))
|
||||
prompt_variables = cast(Optional[dict], kwargs.get("prompt_variables", None))
|
||||
### COPY MESSAGES ### - related issue https://github.com/BerriAI/litellm/discussions/4489
|
||||
messages = get_completion_messages(
|
||||
messages=messages,
|
||||
|
@ -894,11 +897,26 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
]
|
||||
|
||||
default_params = openai_params + all_litellm_params
|
||||
|
||||
litellm_params = {} # used to prevent unbound var errors
|
||||
non_default_params = {
|
||||
k: v for k, v in kwargs.items() if k not in default_params
|
||||
} # model-specific params - pass them straight to the model/provider
|
||||
|
||||
## PROMPT MANAGEMENT HOOKS ##
|
||||
|
||||
if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and prompt_id is not None:
|
||||
model, messages, optional_params = (
|
||||
litellm_logging_obj.get_chat_completion_prompt(
|
||||
model=model,
|
||||
messages=messages,
|
||||
non_default_params=non_default_params,
|
||||
headers=headers,
|
||||
prompt_id=prompt_id,
|
||||
prompt_variables=prompt_variables,
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
if base_url is not None:
|
||||
api_base = base_url
|
||||
|
@ -1002,9 +1020,6 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
and supports_system_message is False
|
||||
):
|
||||
messages = map_system_message_pt(messages=messages)
|
||||
model_api_key = get_api_key(
|
||||
llm_provider=custom_llm_provider, dynamic_api_key=api_key
|
||||
) # get the api key from the environment if required for the model
|
||||
|
||||
if dynamic_api_key is not None:
|
||||
api_key = dynamic_api_key
|
||||
|
|
|
@ -1361,85 +1361,6 @@ class ResponseFormatChunk(TypedDict, total=False):
|
|||
response_schema: dict
|
||||
|
||||
|
||||
all_litellm_params = [
|
||||
"metadata",
|
||||
"litellm_trace_id",
|
||||
"tags",
|
||||
"acompletion",
|
||||
"aimg_generation",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
"caching",
|
||||
"mock_response",
|
||||
"api_key",
|
||||
"api_version",
|
||||
"api_base",
|
||||
"force_timeout",
|
||||
"logger_fn",
|
||||
"verbose",
|
||||
"custom_llm_provider",
|
||||
"litellm_logging_obj",
|
||||
"litellm_call_id",
|
||||
"use_client",
|
||||
"id",
|
||||
"fallbacks",
|
||||
"azure",
|
||||
"headers",
|
||||
"model_list",
|
||||
"num_retries",
|
||||
"context_window_fallback_dict",
|
||||
"retry_policy",
|
||||
"retry_strategy",
|
||||
"roles",
|
||||
"final_prompt_value",
|
||||
"bos_token",
|
||||
"eos_token",
|
||||
"request_timeout",
|
||||
"complete_response",
|
||||
"self",
|
||||
"client",
|
||||
"rpm",
|
||||
"tpm",
|
||||
"max_parallel_requests",
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"input_cost_per_second",
|
||||
"output_cost_per_second",
|
||||
"hf_model_name",
|
||||
"model_info",
|
||||
"proxy_server_request",
|
||||
"preset_cache_key",
|
||||
"caching_groups",
|
||||
"ttl",
|
||||
"cache",
|
||||
"no-log",
|
||||
"base_model",
|
||||
"stream_timeout",
|
||||
"supports_system_message",
|
||||
"region_name",
|
||||
"allowed_model_region",
|
||||
"model_config",
|
||||
"fastest_response",
|
||||
"cooldown_time",
|
||||
"cache_key",
|
||||
"max_retries",
|
||||
"azure_ad_token_provider",
|
||||
"tenant_id",
|
||||
"client_id",
|
||||
"client_secret",
|
||||
"user_continue_message",
|
||||
"configurable_clientside_auth_params",
|
||||
"weight",
|
||||
"ensure_alternating_roles",
|
||||
"assistant_continue_message",
|
||||
"user_continue_message",
|
||||
"fallback_depth",
|
||||
"max_fallbacks",
|
||||
"max_budget",
|
||||
"budget_duration",
|
||||
]
|
||||
|
||||
|
||||
class LoggedLiteLLMParams(TypedDict, total=False):
|
||||
force_timeout: Optional[float]
|
||||
custom_llm_provider: Optional[str]
|
||||
|
@ -1646,6 +1567,87 @@ class StandardCallbackDynamicParams(TypedDict, total=False):
|
|||
turn_off_message_logging: Optional[bool] # when true will not log messages
|
||||
|
||||
|
||||
all_litellm_params = [
|
||||
"metadata",
|
||||
"litellm_trace_id",
|
||||
"tags",
|
||||
"acompletion",
|
||||
"aimg_generation",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
"caching",
|
||||
"mock_response",
|
||||
"api_key",
|
||||
"api_version",
|
||||
"prompt_id",
|
||||
"prompt_variables",
|
||||
"api_base",
|
||||
"force_timeout",
|
||||
"logger_fn",
|
||||
"verbose",
|
||||
"custom_llm_provider",
|
||||
"litellm_logging_obj",
|
||||
"litellm_call_id",
|
||||
"use_client",
|
||||
"id",
|
||||
"fallbacks",
|
||||
"azure",
|
||||
"headers",
|
||||
"model_list",
|
||||
"num_retries",
|
||||
"context_window_fallback_dict",
|
||||
"retry_policy",
|
||||
"retry_strategy",
|
||||
"roles",
|
||||
"final_prompt_value",
|
||||
"bos_token",
|
||||
"eos_token",
|
||||
"request_timeout",
|
||||
"complete_response",
|
||||
"self",
|
||||
"client",
|
||||
"rpm",
|
||||
"tpm",
|
||||
"max_parallel_requests",
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"input_cost_per_second",
|
||||
"output_cost_per_second",
|
||||
"hf_model_name",
|
||||
"model_info",
|
||||
"proxy_server_request",
|
||||
"preset_cache_key",
|
||||
"caching_groups",
|
||||
"ttl",
|
||||
"cache",
|
||||
"no-log",
|
||||
"base_model",
|
||||
"stream_timeout",
|
||||
"supports_system_message",
|
||||
"region_name",
|
||||
"allowed_model_region",
|
||||
"model_config",
|
||||
"fastest_response",
|
||||
"cooldown_time",
|
||||
"cache_key",
|
||||
"max_retries",
|
||||
"azure_ad_token_provider",
|
||||
"tenant_id",
|
||||
"client_id",
|
||||
"client_secret",
|
||||
"user_continue_message",
|
||||
"configurable_clientside_auth_params",
|
||||
"weight",
|
||||
"ensure_alternating_roles",
|
||||
"assistant_continue_message",
|
||||
"user_continue_message",
|
||||
"fallback_depth",
|
||||
"max_fallbacks",
|
||||
"max_budget",
|
||||
"budget_duration",
|
||||
] + list(StandardCallbackDynamicParams.__annotations__.keys())
|
||||
|
||||
|
||||
class KeyGenerationConfig(TypedDict, total=False):
|
||||
required_params: List[
|
||||
str
|
||||
|
|
|
@ -2442,6 +2442,23 @@ def _remove_strict_from_schema(schema):
|
|||
return schema
|
||||
|
||||
|
||||
def _remove_unsupported_params(
|
||||
non_default_params: dict, supported_openai_params: Optional[List[str]]
|
||||
) -> dict:
|
||||
"""
|
||||
Remove unsupported params from non_default_params
|
||||
"""
|
||||
remove_keys = []
|
||||
if supported_openai_params is None:
|
||||
return {} # no supported params, so no optional openai params to send
|
||||
for param in non_default_params.keys():
|
||||
if param not in supported_openai_params:
|
||||
remove_keys.append(param)
|
||||
for key in remove_keys:
|
||||
non_default_params.pop(key, None)
|
||||
return non_default_params
|
||||
|
||||
|
||||
def get_optional_params( # noqa: PLR0915
|
||||
# use the openai defaults
|
||||
# https://platform.openai.com/docs/api-reference/chat/create
|
||||
|
@ -2688,11 +2705,13 @@ def get_optional_params( # noqa: PLR0915
|
|||
# Always keeps this in elif code blocks
|
||||
else:
|
||||
unsupported_params[k] = non_default_params[k]
|
||||
|
||||
if unsupported_params:
|
||||
if litellm.drop_params is True or (
|
||||
drop_params is not None and drop_params is True
|
||||
):
|
||||
pass
|
||||
for k in unsupported_params.keys():
|
||||
non_default_params.pop(k, None)
|
||||
else:
|
||||
raise UnsupportedParamsError(
|
||||
status_code=500,
|
||||
|
|
|
@ -2224,13 +2224,19 @@ def test_bedrock_nova_topk(top_k_param):
|
|||
captured_data = result
|
||||
return result
|
||||
|
||||
with patch('litellm.AmazonConverseConfig._transform_request', side_effect=mock_transform):
|
||||
with patch(
|
||||
"litellm.AmazonConverseConfig._transform_request", side_effect=mock_transform
|
||||
):
|
||||
litellm.completion(**data)
|
||||
|
||||
# Assert that additionalRequestParameters exists and contains topK
|
||||
assert 'additionalModelRequestFields' in captured_data
|
||||
assert 'inferenceConfig' in captured_data['additionalModelRequestFields']
|
||||
assert captured_data['additionalModelRequestFields']['inferenceConfig']['topK'] == 10
|
||||
assert "additionalModelRequestFields" in captured_data
|
||||
assert "inferenceConfig" in captured_data["additionalModelRequestFields"]
|
||||
assert (
|
||||
captured_data["additionalModelRequestFields"]["inferenceConfig"]["topK"]
|
||||
== 10
|
||||
)
|
||||
|
||||
|
||||
def test_bedrock_empty_content_real_call():
|
||||
completion(
|
||||
|
@ -2261,3 +2267,61 @@ def test_bedrock_process_empty_text_blocks():
|
|||
}
|
||||
modified_message = process_empty_text_blocks(**message)
|
||||
assert modified_message["content"][0]["text"] == "Please continue."
|
||||
|
||||
|
||||
def test_nova_optional_params_tool_choice():
|
||||
litellm.drop_params = True
|
||||
litellm.set_verbose = True
|
||||
litellm.completion(
|
||||
messages=[
|
||||
{"role": "user", "content": "A WWII competitive game for 4-8 players"}
|
||||
],
|
||||
model="bedrock/us.amazon.nova-pro-v1:0",
|
||||
temperature=0.3,
|
||||
tools=[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "GameDefinition",
|
||||
"description": "Correctly extracted `GameDefinition` with all the required parameters with correct types",
|
||||
"parameters": {
|
||||
"$defs": {
|
||||
"TurnDurationEnum": {
|
||||
"enum": ["action", "encounter", "battle", "operation"],
|
||||
"title": "TurnDurationEnum",
|
||||
"type": "string",
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"id": {
|
||||
"anyOf": [{"type": "integer"}, {"type": "null"}],
|
||||
"default": None,
|
||||
"title": "Id",
|
||||
},
|
||||
"prompt": {"title": "Prompt", "type": "string"},
|
||||
"name": {"title": "Name", "type": "string"},
|
||||
"description": {"title": "Description", "type": "string"},
|
||||
"competitve": {"title": "Competitve", "type": "boolean"},
|
||||
"players_min": {"title": "Players Min", "type": "integer"},
|
||||
"players_max": {"title": "Players Max", "type": "integer"},
|
||||
"turn_duration": {
|
||||
"$ref": "#/$defs/TurnDurationEnum",
|
||||
"description": "how long the passing of a turn should represent for a game at this scale",
|
||||
},
|
||||
},
|
||||
"required": [
|
||||
"competitve",
|
||||
"description",
|
||||
"name",
|
||||
"players_max",
|
||||
"players_min",
|
||||
"prompt",
|
||||
"turn_duration",
|
||||
],
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
tool_choice={"type": "function", "function": {"name": "GameDefinition"}},
|
||||
)
|
||||
|
|
|
@ -60,7 +60,7 @@ VERTEX_MODELS_TO_NOT_TEST = [
|
|||
"gemini-1.5-flash-exp-0827",
|
||||
"gemini-pro-flash",
|
||||
"gemini-1.5-flash-exp-0827",
|
||||
"gemini-2.0-flash-exp"
|
||||
"gemini-2.0-flash-exp",
|
||||
]
|
||||
|
||||
|
||||
|
@ -308,7 +308,7 @@ async def test_vertex_ai_anthropic_async():
|
|||
# )
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
async def test_vertex_ai_anthropic_async_streaming():
|
||||
async def test_aaavertex_ai_anthropic_async_streaming():
|
||||
# load_vertex_ai_credentials()
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -4513,3 +4513,23 @@ def test_openai_hallucinated_tool_call_util(function_name, expect_modification):
|
|||
else:
|
||||
assert len(response) == 1
|
||||
assert response[0].function.name == function_name
|
||||
|
||||
|
||||
def test_langfuse_completion(monkeypatch):
|
||||
monkeypatch.setenv(
|
||||
"LANGFUSE_PUBLIC_KEY", "pk-lf-b3db7e8e-c2f6-4fc7-825c-a541a8fbe003"
|
||||
)
|
||||
monkeypatch.setenv(
|
||||
"LANGFUSE_SECRET_KEY", "sk-lf-b11ef3a8-361c-4445-9652-12318b8596e4"
|
||||
)
|
||||
monkeypatch.setenv("LANGFUSE_HOST", "https://us.cloud.langfuse.com")
|
||||
litellm.set_verbose = True
|
||||
resp = litellm.completion(
|
||||
model="langfuse/gpt-3.5-turbo",
|
||||
langfuse_public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
|
||||
langfuse_secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
|
||||
langfuse_host="https://us.cloud.langfuse.com",
|
||||
prompt_id="test-chat-prompt",
|
||||
prompt_variables={"user_message": "this is used"},
|
||||
messages=[{"role": "user", "content": "this is ignored"}],
|
||||
)
|
||||
|
|
|
@ -92,7 +92,7 @@ def test_run(model: str):
|
|||
print("Non-stream cost : NONE")
|
||||
print(f"Non-stream cost : {completion_cost(response) * 100:.4f} (response)")
|
||||
|
||||
response = router.completion(**kwargs, stream=True) # type: ignore
|
||||
response = router.completion(**kwargs, stream=True, stream_options={"include_usage": True}) # type: ignore
|
||||
response = stream_chunk_builder(list(response), messages=kwargs["messages"]) # type: ignore
|
||||
output = response.choices[0].message.content.replace("\n", "") # type: ignore
|
||||
streaming_cost_calc = completion_cost(response) * 100
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue