forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_aporio_integration
This commit is contained in:
commit
77656d9f11
19 changed files with 512 additions and 142 deletions
|
@ -72,7 +72,7 @@ Helicone's proxy provides [advanced functionality](https://docs.helicone.ai/gett
|
||||||
To use Helicone as a proxy for your LLM requests:
|
To use Helicone as a proxy for your LLM requests:
|
||||||
|
|
||||||
1. Set Helicone as your base URL via: litellm.api_base
|
1. Set Helicone as your base URL via: litellm.api_base
|
||||||
2. Pass in Helicone request headers via: litellm.headers
|
2. Pass in Helicone request headers via: litellm.metadata
|
||||||
|
|
||||||
Complete Code:
|
Complete Code:
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ print(response)
|
||||||
You can add custom metadata and properties to your requests using Helicone headers. Here are some examples:
|
You can add custom metadata and properties to your requests using Helicone headers. Here are some examples:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
litellm.headers = {
|
litellm.metadata = {
|
||||||
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API
|
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API
|
||||||
"Helicone-User-Id": "user-abc", # Specify the user making the request
|
"Helicone-User-Id": "user-abc", # Specify the user making the request
|
||||||
"Helicone-Property-App": "web", # Custom property to add additional information
|
"Helicone-Property-App": "web", # Custom property to add additional information
|
||||||
|
@ -127,7 +127,7 @@ litellm.headers = {
|
||||||
Enable caching and set up rate limiting policies:
|
Enable caching and set up rate limiting policies:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
litellm.headers = {
|
litellm.metadata = {
|
||||||
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API
|
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API
|
||||||
"Helicone-Cache-Enabled": "true", # Enable caching of responses
|
"Helicone-Cache-Enabled": "true", # Enable caching of responses
|
||||||
"Cache-Control": "max-age=3600", # Set cache limit to 1 hour
|
"Cache-Control": "max-age=3600", # Set cache limit to 1 hour
|
||||||
|
@ -140,7 +140,7 @@ litellm.headers = {
|
||||||
Track multi-step and agentic LLM interactions using session IDs and paths:
|
Track multi-step and agentic LLM interactions using session IDs and paths:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
litellm.headers = {
|
litellm.metadata = {
|
||||||
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API
|
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API
|
||||||
"Helicone-Session-Id": "session-abc-123", # The session ID you want to track
|
"Helicone-Session-Id": "session-abc-123", # The session ID you want to track
|
||||||
"Helicone-Session-Path": "parent-trace/child-trace", # The path of the session
|
"Helicone-Session-Path": "parent-trace/child-trace", # The path of the session
|
||||||
|
@ -157,7 +157,7 @@ By using these two headers, you can effectively group and visualize multi-step L
|
||||||
Set up retry mechanisms and fallback options:
|
Set up retry mechanisms and fallback options:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
litellm.headers = {
|
litellm.metadata = {
|
||||||
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API
|
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API
|
||||||
"Helicone-Retry-Enabled": "true", # Enable retry mechanism
|
"Helicone-Retry-Enabled": "true", # Enable retry mechanism
|
||||||
"helicone-retry-num": "3", # Set number of retries
|
"helicone-retry-num": "3", # Set number of retries
|
||||||
|
|
|
@ -14,7 +14,7 @@ https://github.com/BerriAI/litellm
|
||||||
An all-in-one developer platform for every step of the application lifecycle
|
An all-in-one developer platform for every step of the application lifecycle
|
||||||
https://smith.langchain.com/
|
https://smith.langchain.com/
|
||||||
|
|
||||||
<Image img={require('../../img/langsmith.png')} />
|
<Image img={require('../../img/langsmith_new.png')} />
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
We want to learn how we can make the callbacks better! Meet the LiteLLM [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
|
We want to learn how we can make the callbacks better! Meet the LiteLLM [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
|
||||||
|
|
|
@ -5,6 +5,7 @@ Log Proxy input, output, and exceptions using:
|
||||||
- Langfuse
|
- Langfuse
|
||||||
- OpenTelemetry
|
- OpenTelemetry
|
||||||
- Custom Callbacks
|
- Custom Callbacks
|
||||||
|
- Langsmith
|
||||||
- DataDog
|
- DataDog
|
||||||
- DynamoDB
|
- DynamoDB
|
||||||
- s3 Bucket
|
- s3 Bucket
|
||||||
|
@ -1086,6 +1087,50 @@ litellm_settings:
|
||||||
|
|
||||||
Start the LiteLLM Proxy and make a test request to verify the logs reached your callback API
|
Start the LiteLLM Proxy and make a test request to verify the logs reached your callback API
|
||||||
|
|
||||||
|
## Logging LLM IO to Langsmith
|
||||||
|
|
||||||
|
1. Set `success_callback: ["langsmith"]` on litellm config.yaml
|
||||||
|
|
||||||
|
If you're using a custom LangSmith instance, you can set the
|
||||||
|
`LANGSMITH_BASE_URL` environment variable to point to your instance.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["langsmith"]
|
||||||
|
|
||||||
|
environment_variables:
|
||||||
|
LANGSMITH_API_KEY: "lsv2_pt_xxxxxxxx"
|
||||||
|
LANGSMITH_PROJECT: "litellm-proxy"
|
||||||
|
|
||||||
|
LANGSMITH_BASE_URL: "https://api.smith.langchain.com" # (Optional - only needed if you have a custom Langsmith instance)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data ' {
|
||||||
|
"model": "fake-openai-endpoint",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, Claude gm!"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
Expect to see your log on Langfuse
|
||||||
|
<Image img={require('../../img/langsmith_new.png')} />
|
||||||
|
|
||||||
## Logging LLM IO to Galileo
|
## Logging LLM IO to Galileo
|
||||||
|
|
||||||
[BETA]
|
[BETA]
|
||||||
|
|
BIN
docs/my-website/img/langsmith_new.png
Normal file
BIN
docs/my-website/img/langsmith_new.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 353 KiB |
|
@ -38,7 +38,7 @@ success_callback: List[Union[str, Callable]] = []
|
||||||
failure_callback: List[Union[str, Callable]] = []
|
failure_callback: List[Union[str, Callable]] = []
|
||||||
service_callback: List[Union[str, Callable]] = []
|
service_callback: List[Union[str, Callable]] = []
|
||||||
_custom_logger_compatible_callbacks_literal = Literal[
|
_custom_logger_compatible_callbacks_literal = Literal[
|
||||||
"lago", "openmeter", "logfire", "dynamic_rate_limiter"
|
"lago", "openmeter", "logfire", "dynamic_rate_limiter", "langsmith", "galileo"
|
||||||
]
|
]
|
||||||
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
||||||
_langfuse_default_tags: Optional[
|
_langfuse_default_tags: Optional[
|
||||||
|
|
|
@ -1,13 +1,43 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success, logs events to Langsmith
|
# On success, logs events to Langsmith
|
||||||
import dotenv, os # type: ignore
|
|
||||||
import requests # type: ignore
|
|
||||||
from datetime import datetime
|
|
||||||
import traceback
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
|
import traceback
|
||||||
import types
|
import types
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, List, Optional, Union
|
||||||
|
|
||||||
|
import dotenv # type: ignore
|
||||||
|
import httpx
|
||||||
|
import requests # type: ignore
|
||||||
from pydantic import BaseModel # type: ignore
|
from pydantic import BaseModel # type: ignore
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
|
|
||||||
|
|
||||||
|
class LangsmithInputs(BaseModel):
|
||||||
|
model: Optional[str] = None
|
||||||
|
messages: Optional[List[Any]] = None
|
||||||
|
stream: Optional[bool] = None
|
||||||
|
call_type: Optional[str] = None
|
||||||
|
litellm_call_id: Optional[str] = None
|
||||||
|
completion_start_time: Optional[datetime] = None
|
||||||
|
temperature: Optional[float] = None
|
||||||
|
max_tokens: Optional[int] = None
|
||||||
|
custom_llm_provider: Optional[str] = None
|
||||||
|
input: Optional[List[Any]] = None
|
||||||
|
log_event_type: Optional[str] = None
|
||||||
|
original_response: Optional[Any] = None
|
||||||
|
response_cost: Optional[float] = None
|
||||||
|
|
||||||
|
# LiteLLM Virtual Key specific fields
|
||||||
|
user_api_key: Optional[str] = None
|
||||||
|
user_api_key_user_id: Optional[str] = None
|
||||||
|
user_api_key_team_alias: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
def is_serializable(value):
|
def is_serializable(value):
|
||||||
non_serializable_types = (
|
non_serializable_types = (
|
||||||
|
@ -19,7 +49,7 @@ def is_serializable(value):
|
||||||
return not isinstance(value, non_serializable_types)
|
return not isinstance(value, non_serializable_types)
|
||||||
|
|
||||||
|
|
||||||
class LangsmithLogger:
|
class LangsmithLogger(CustomLogger):
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
|
self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
|
||||||
|
@ -27,71 +57,121 @@ class LangsmithLogger:
|
||||||
self.langsmith_default_run_name = os.getenv(
|
self.langsmith_default_run_name = os.getenv(
|
||||||
"LANGSMITH_DEFAULT_RUN_NAME", "LLMRun"
|
"LANGSMITH_DEFAULT_RUN_NAME", "LLMRun"
|
||||||
)
|
)
|
||||||
|
self.langsmith_base_url = os.getenv(
|
||||||
|
"LANGSMITH_BASE_URL", "https://api.smith.langchain.com"
|
||||||
|
)
|
||||||
|
self.async_httpx_client = AsyncHTTPHandler(
|
||||||
|
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
||||||
|
)
|
||||||
|
|
||||||
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
def _prepare_log_data(self, kwargs, response_obj, start_time, end_time):
|
||||||
# Method definition
|
import datetime
|
||||||
# inspired by Langsmith http api here: https://github.com/langchain-ai/langsmith-cookbook/blob/main/tracing-examples/rest/rest.ipynb
|
from datetime import timezone
|
||||||
metadata = (
|
|
||||||
kwargs.get("litellm_params", {}).get("metadata", {}) or {}
|
metadata = kwargs.get("litellm_params", {}).get("metadata", {}) or {}
|
||||||
) # if metadata is None
|
|
||||||
|
kwargs["user_api_key"] = metadata.get("user_api_key", None)
|
||||||
|
kwargs["user_api_key_user_id"] = metadata.get("user_api_key_user_id", None)
|
||||||
|
kwargs["user_api_key_team_alias"] = metadata.get(
|
||||||
|
"user_api_key_team_alias", None
|
||||||
|
)
|
||||||
|
|
||||||
# set project name and run_name for langsmith logging
|
|
||||||
# users can pass project_name and run name to litellm.completion()
|
|
||||||
# Example: litellm.completion(model, messages, metadata={"project_name": "my-litellm-project", "run_name": "my-langsmith-run"})
|
|
||||||
# if not set litellm will fallback to the environment variable LANGSMITH_PROJECT, then to the default project_name = litellm-completion, run_name = LLMRun
|
|
||||||
project_name = metadata.get("project_name", self.langsmith_project)
|
project_name = metadata.get("project_name", self.langsmith_project)
|
||||||
run_name = metadata.get("run_name", self.langsmith_default_run_name)
|
run_name = metadata.get("run_name", self.langsmith_default_run_name)
|
||||||
print_verbose(
|
run_id = metadata.get("id", None)
|
||||||
|
verbose_logger.debug(
|
||||||
f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
|
f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
|
||||||
)
|
)
|
||||||
langsmith_base_url = os.getenv(
|
|
||||||
"LANGSMITH_BASE_URL", "https://api.smith.langchain.com"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print_verbose(
|
start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat()
|
||||||
f"Langsmith Logging - Enters logging function for model {kwargs}"
|
end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat()
|
||||||
)
|
except:
|
||||||
import requests
|
start_time = datetime.datetime.utcnow().isoformat()
|
||||||
import datetime
|
end_time = datetime.datetime.utcnow().isoformat()
|
||||||
from datetime import timezone
|
|
||||||
|
|
||||||
|
# filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs
|
||||||
|
logged_kwargs = LangsmithInputs(**kwargs)
|
||||||
|
kwargs = logged_kwargs.model_dump()
|
||||||
|
|
||||||
|
new_kwargs = {}
|
||||||
|
for key in kwargs:
|
||||||
|
value = kwargs[key]
|
||||||
|
if key == "start_time" or key == "end_time" or value is None:
|
||||||
|
pass
|
||||||
|
elif key == "original_response" and not isinstance(value, str):
|
||||||
|
new_kwargs[key] = str(value)
|
||||||
|
elif type(value) == datetime.datetime:
|
||||||
|
new_kwargs[key] = value.isoformat()
|
||||||
|
elif type(value) != dict and is_serializable(value=value):
|
||||||
|
new_kwargs[key] = value
|
||||||
|
elif not is_serializable(value=value):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isinstance(response_obj, BaseModel):
|
||||||
try:
|
try:
|
||||||
start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat()
|
response_obj = response_obj.model_dump()
|
||||||
end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat()
|
|
||||||
except:
|
except:
|
||||||
start_time = datetime.datetime.utcnow().isoformat()
|
response_obj = response_obj.dict() # type: ignore
|
||||||
end_time = datetime.datetime.utcnow().isoformat()
|
|
||||||
|
|
||||||
# filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs
|
data = {
|
||||||
new_kwargs = {}
|
"name": run_name,
|
||||||
for key in kwargs:
|
"run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
|
||||||
value = kwargs[key]
|
"inputs": new_kwargs,
|
||||||
if key == "start_time" or key == "end_time" or value is None:
|
"outputs": response_obj,
|
||||||
pass
|
"session_name": project_name,
|
||||||
elif type(value) == datetime.datetime:
|
"start_time": start_time,
|
||||||
new_kwargs[key] = value.isoformat()
|
"end_time": end_time,
|
||||||
elif type(value) != dict and is_serializable(value=value):
|
}
|
||||||
new_kwargs[key] = value
|
|
||||||
|
|
||||||
if isinstance(response_obj, BaseModel):
|
if run_id:
|
||||||
try:
|
data["id"] = run_id
|
||||||
response_obj = response_obj.model_dump()
|
|
||||||
except:
|
|
||||||
response_obj = response_obj.dict() # type: ignore
|
|
||||||
|
|
||||||
data = {
|
verbose_logger.debug("Langsmith Logging data on langsmith: %s", data)
|
||||||
"name": run_name,
|
|
||||||
"run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
|
return data
|
||||||
"inputs": new_kwargs,
|
|
||||||
"outputs": response_obj,
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
"session_name": project_name,
|
try:
|
||||||
"start_time": start_time,
|
verbose_logger.debug(
|
||||||
"end_time": end_time,
|
"Langsmith Async Layer Logging - kwargs: %s, response_obj: %s",
|
||||||
}
|
kwargs,
|
||||||
|
response_obj,
|
||||||
|
)
|
||||||
|
data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
|
||||||
|
url = f"{self.langsmith_base_url}/runs"
|
||||||
|
verbose_logger.debug(f"Langsmith Logging - About to send data to {url} ...")
|
||||||
|
|
||||||
|
headers = {"x-api-key": self.langsmith_api_key}
|
||||||
|
response = await self.async_httpx_client.post(
|
||||||
|
url=url, json=data, headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code >= 300:
|
||||||
|
verbose_logger.error(
|
||||||
|
f"Langmsith Error: {response.status_code} - {response.text}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Run successfully created, response=%s", response.text
|
||||||
|
)
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Langsmith Layer Logging - final response object: {response_obj}. Response text from langsmith={response.text}"
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
|
||||||
|
|
||||||
|
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
try:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Langsmith Sync Layer Logging - kwargs: %s, response_obj: %s",
|
||||||
|
kwargs,
|
||||||
|
response_obj,
|
||||||
|
)
|
||||||
|
data = self._prepare_log_data(kwargs, response_obj, start_time, end_time)
|
||||||
|
url = f"{self.langsmith_base_url}/runs"
|
||||||
|
verbose_logger.debug(f"Langsmith Logging - About to send data to {url} ...")
|
||||||
|
|
||||||
url = f"{langsmith_base_url}/runs"
|
|
||||||
print_verbose(f"Langsmith Logging - About to send data to {url} ...")
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
url=url,
|
url=url,
|
||||||
json=data,
|
json=data,
|
||||||
|
@ -99,12 +179,21 @@ class LangsmithLogger:
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code >= 300:
|
if response.status_code >= 300:
|
||||||
print_verbose(f"Error: {response.status_code}")
|
verbose_logger.error(f"Error: {response.status_code} - {response.text}")
|
||||||
else:
|
else:
|
||||||
print_verbose("Run successfully created")
|
verbose_logger.debug("Run successfully created")
|
||||||
print_verbose(
|
verbose_logger.debug(
|
||||||
f"Langsmith Layer Logging - final response object: {response_obj}"
|
f"Langsmith Layer Logging - final response object: {response_obj}. Response text from langsmith={response.text}"
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
|
verbose_logger.error(f"Langsmith Layer Error - {traceback.format_exc()}")
|
||||||
pass
|
|
||||||
|
def get_run_by_id(self, run_id):
|
||||||
|
|
||||||
|
url = f"{self.langsmith_base_url}/runs/{run_id}"
|
||||||
|
response = requests.get(
|
||||||
|
url=url,
|
||||||
|
headers={"x-api-key": self.langsmith_api_key},
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.json()
|
||||||
|
|
|
@ -39,7 +39,6 @@ from litellm.utils import (
|
||||||
add_breadcrumb,
|
add_breadcrumb,
|
||||||
capture_exception,
|
capture_exception,
|
||||||
customLogger,
|
customLogger,
|
||||||
langsmithLogger,
|
|
||||||
liteDebuggerClient,
|
liteDebuggerClient,
|
||||||
logfireLogger,
|
logfireLogger,
|
||||||
lunaryLogger,
|
lunaryLogger,
|
||||||
|
@ -89,7 +88,6 @@ alerts_channel = None
|
||||||
heliconeLogger = None
|
heliconeLogger = None
|
||||||
athinaLogger = None
|
athinaLogger = None
|
||||||
promptLayerLogger = None
|
promptLayerLogger = None
|
||||||
langsmithLogger = None
|
|
||||||
logfireLogger = None
|
logfireLogger = None
|
||||||
weightsBiasesLogger = None
|
weightsBiasesLogger = None
|
||||||
customLogger = None
|
customLogger = None
|
||||||
|
@ -136,7 +134,7 @@ in_memory_trace_id_cache = ServiceTraceIDCache()
|
||||||
|
|
||||||
|
|
||||||
class Logging:
|
class Logging:
|
||||||
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
|
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
|
||||||
custom_pricing: bool = False
|
custom_pricing: bool = False
|
||||||
stream_options = None
|
stream_options = None
|
||||||
|
|
||||||
|
@ -738,23 +736,6 @@ class Logging:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
)
|
)
|
||||||
if callback == "langsmith":
|
|
||||||
print_verbose("reaches langsmith for logging!")
|
|
||||||
if self.stream:
|
|
||||||
if "complete_streaming_response" not in kwargs:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
print_verbose(
|
|
||||||
"reaches langsmith for streaming logging!"
|
|
||||||
)
|
|
||||||
result = kwargs["complete_streaming_response"]
|
|
||||||
langsmithLogger.log_event(
|
|
||||||
kwargs=self.model_call_details,
|
|
||||||
response_obj=result,
|
|
||||||
start_time=start_time,
|
|
||||||
end_time=end_time,
|
|
||||||
print_verbose=print_verbose,
|
|
||||||
)
|
|
||||||
if callback == "logfire":
|
if callback == "logfire":
|
||||||
global logfireLogger
|
global logfireLogger
|
||||||
verbose_logger.debug("reaches logfire for success logging!")
|
verbose_logger.debug("reaches logfire for success logging!")
|
||||||
|
@ -1337,7 +1318,14 @@ class Logging:
|
||||||
if kwargs.get("no-log", False) == True:
|
if kwargs.get("no-log", False) == True:
|
||||||
print_verbose("no-log request, skipping logging")
|
print_verbose("no-log request, skipping logging")
|
||||||
continue
|
continue
|
||||||
if callback == "cache" and litellm.cache is not None:
|
if (
|
||||||
|
callback == "cache"
|
||||||
|
and litellm.cache is not None
|
||||||
|
and self.model_call_details.get("litellm_params", {}).get(
|
||||||
|
"acompletion", False
|
||||||
|
)
|
||||||
|
is True
|
||||||
|
):
|
||||||
# set_cache once complete streaming response is built
|
# set_cache once complete streaming response is built
|
||||||
print_verbose("async success_callback: reaches cache for logging!")
|
print_verbose("async success_callback: reaches cache for logging!")
|
||||||
kwargs = self.model_call_details
|
kwargs = self.model_call_details
|
||||||
|
@ -1417,6 +1405,9 @@ class Logging:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
if callable(callback): # custom logger functions
|
if callable(callback): # custom logger functions
|
||||||
|
global customLogger
|
||||||
|
if customLogger is None:
|
||||||
|
customLogger = CustomLogger()
|
||||||
if self.stream:
|
if self.stream:
|
||||||
if (
|
if (
|
||||||
"async_complete_streaming_response"
|
"async_complete_streaming_response"
|
||||||
|
@ -1822,7 +1813,7 @@ def set_callbacks(callback_list, function_id=None):
|
||||||
"""
|
"""
|
||||||
Globally sets the callback client
|
Globally sets the callback client
|
||||||
"""
|
"""
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for callback in callback_list:
|
for callback in callback_list:
|
||||||
|
@ -1903,8 +1894,6 @@ def set_callbacks(callback_list, function_id=None):
|
||||||
s3Logger = S3Logger()
|
s3Logger = S3Logger()
|
||||||
elif callback == "wandb":
|
elif callback == "wandb":
|
||||||
weightsBiasesLogger = WeightsBiasesLogger()
|
weightsBiasesLogger = WeightsBiasesLogger()
|
||||||
elif callback == "langsmith":
|
|
||||||
langsmithLogger = LangsmithLogger()
|
|
||||||
elif callback == "logfire":
|
elif callback == "logfire":
|
||||||
logfireLogger = LogfireLogger()
|
logfireLogger = LogfireLogger()
|
||||||
elif callback == "aispend":
|
elif callback == "aispend":
|
||||||
|
@ -1957,6 +1946,15 @@ def _init_custom_logger_compatible_class(
|
||||||
_in_memory_loggers.append(_openmeter_logger)
|
_in_memory_loggers.append(_openmeter_logger)
|
||||||
return _openmeter_logger # type: ignore
|
return _openmeter_logger # type: ignore
|
||||||
|
|
||||||
|
elif logging_integration == "langsmith":
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, LangsmithLogger):
|
||||||
|
return callback # type: ignore
|
||||||
|
|
||||||
|
_langsmith_logger = LangsmithLogger()
|
||||||
|
_in_memory_loggers.append(_langsmith_logger)
|
||||||
|
return _langsmith_logger # type: ignore
|
||||||
|
|
||||||
elif logging_integration == "galileo":
|
elif logging_integration == "galileo":
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if isinstance(callback, GalileoObserve):
|
if isinstance(callback, GalileoObserve):
|
||||||
|
@ -2025,6 +2023,10 @@ def get_custom_logger_compatible_class(
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if isinstance(callback, GalileoObserve):
|
if isinstance(callback, GalileoObserve):
|
||||||
return callback
|
return callback
|
||||||
|
elif logging_integration == "langsmith":
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, LangsmithLogger):
|
||||||
|
return callback
|
||||||
elif logging_integration == "logfire":
|
elif logging_integration == "logfire":
|
||||||
if "LOGFIRE_TOKEN" not in os.environ:
|
if "LOGFIRE_TOKEN" not in os.environ:
|
||||||
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
|
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
|
||||||
|
|
|
@ -1020,6 +1020,26 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
|
"groq/llama3-groq-70b-8192-tool-use-preview": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.00000089,
|
||||||
|
"output_cost_per_token": 0.00000089,
|
||||||
|
"litellm_provider": "groq",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
|
"groq/llama3-groq-8b-8192-tool-use-preview": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.00000019,
|
||||||
|
"output_cost_per_token": 0.00000019,
|
||||||
|
"litellm_provider": "groq",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
"friendliai/mixtral-8x7b-instruct-v0-1": {
|
"friendliai/mixtral-8x7b-instruct-v0-1": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_input_tokens": 32768,
|
"max_input_tokens": 32768,
|
||||||
|
@ -1800,6 +1820,26 @@
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
},
|
},
|
||||||
|
"medlm-medium": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 32768,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_character": 0.0000005,
|
||||||
|
"output_cost_per_character": 0.000001,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
},
|
||||||
|
"medlm-large": {
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 1024,
|
||||||
|
"input_cost_per_character": 0.000005,
|
||||||
|
"output_cost_per_character": 0.000015,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
},
|
||||||
"vertex_ai/claude-3-sonnet@20240229": {
|
"vertex_ai/claude-3-sonnet@20240229": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
|
|
|
@ -1,10 +1,5 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: "*"
|
- model_name: llama-3
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/*
|
model: gpt-4
|
||||||
|
request_timeout: 1
|
||||||
litellm_settings:
|
|
||||||
guardrails:
|
|
||||||
- prompt_injection:
|
|
||||||
callbacks: ["aporio_prompt_injection"]
|
|
||||||
default_on: true
|
|
||||||
|
|
|
@ -17,9 +17,7 @@ model_list:
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
litellm_key_header_name: "X-Litellm-Key"
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
cache: true
|
success_callback: ["langsmith"]
|
||||||
callbacks: ["otel"]
|
|
||||||
|
|
||||||
|
|
|
@ -718,6 +718,9 @@ class Router:
|
||||||
data.get(
|
data.get(
|
||||||
"timeout", None
|
"timeout", None
|
||||||
) # timeout set on litellm_params for this deployment
|
) # timeout set on litellm_params for this deployment
|
||||||
|
or data.get(
|
||||||
|
"request_timeout", None
|
||||||
|
) # timeout set on litellm_params for this deployment
|
||||||
or self.timeout # timeout set on router
|
or self.timeout # timeout set on router
|
||||||
or kwargs.get(
|
or kwargs.get(
|
||||||
"timeout", None
|
"timeout", None
|
||||||
|
|
|
@ -1579,18 +1579,21 @@ async def test_redis_semantic_cache_acompletion():
|
||||||
assert response1.id == response2.id
|
assert response1.id == response2.id
|
||||||
|
|
||||||
|
|
||||||
def test_caching_redis_simple(caplog):
|
def test_caching_redis_simple(caplog, capsys):
|
||||||
"""
|
"""
|
||||||
Relevant issue - https://github.com/BerriAI/litellm/issues/4511
|
Relevant issue - https://github.com/BerriAI/litellm/issues/4511
|
||||||
"""
|
"""
|
||||||
|
litellm.set_verbose = True ## REQUIRED FOR TEST.
|
||||||
litellm.cache = Cache(
|
litellm.cache = Cache(
|
||||||
type="redis", url=os.getenv("REDIS_SSL_URL")
|
type="redis", url=os.getenv("REDIS_SSL_URL")
|
||||||
) # passing `supported_call_types = ["completion"]` has no effect
|
) # passing `supported_call_types = ["completion"]` has no effect
|
||||||
|
|
||||||
s = time.time()
|
s = time.time()
|
||||||
|
|
||||||
|
uuid_str = str(uuid.uuid4())
|
||||||
x = completion(
|
x = completion(
|
||||||
model="gpt-4o",
|
model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": "Hello, how are you? Wink"}],
|
messages=[{"role": "user", "content": f"Hello, how are you? Wink {uuid_str}"}],
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
for m in x:
|
for m in x:
|
||||||
|
@ -1599,8 +1602,8 @@ def test_caching_redis_simple(caplog):
|
||||||
|
|
||||||
s2 = time.time()
|
s2 = time.time()
|
||||||
x = completion(
|
x = completion(
|
||||||
model="gpt-4o",
|
model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": "Hello, how are you? Wink"}],
|
messages=[{"role": "user", "content": f"Hello, how are you? Wink {uuid_str}"}],
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
for m in x:
|
for m in x:
|
||||||
|
@ -1609,11 +1612,15 @@ def test_caching_redis_simple(caplog):
|
||||||
|
|
||||||
redis_async_caching_error = False
|
redis_async_caching_error = False
|
||||||
redis_service_logging_error = False
|
redis_service_logging_error = False
|
||||||
|
captured = capsys.readouterr()
|
||||||
captured_logs = [rec.message for rec in caplog.records]
|
captured_logs = [rec.message for rec in caplog.records]
|
||||||
|
|
||||||
print(f"captured_logs: {captured_logs}")
|
print(f"captured_logs: {captured_logs}")
|
||||||
for item in captured_logs:
|
for item in captured_logs:
|
||||||
if "Error connecting to Async Redis client" in item:
|
if (
|
||||||
|
"Error connecting to Async Redis client" in item
|
||||||
|
or "Set ASYNC Redis Cache" in item
|
||||||
|
):
|
||||||
redis_async_caching_error = True
|
redis_async_caching_error = True
|
||||||
|
|
||||||
if "ServiceLogging.async_service_success_hook" in item:
|
if "ServiceLogging.async_service_success_hook" in item:
|
||||||
|
@ -1621,3 +1628,4 @@ def test_caching_redis_simple(caplog):
|
||||||
|
|
||||||
assert redis_async_caching_error is False
|
assert redis_async_caching_error is False
|
||||||
assert redis_service_logging_error is False
|
assert redis_service_logging_error is False
|
||||||
|
assert "async success_callback: reaches cache for logging" not in captured.out
|
||||||
|
|
|
@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||||
|
|
||||||
# litellm.num_retries = 3
|
# litellm.num_retries=3
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
litellm.success_callback = []
|
litellm.success_callback = []
|
||||||
user_message = "Write a short poem about the sky"
|
user_message = "Write a short poem about the sky"
|
||||||
|
|
|
@ -706,6 +706,33 @@ def test_vertex_ai_completion_cost():
|
||||||
print("calculated_input_cost: {}".format(calculated_input_cost))
|
print("calculated_input_cost: {}".format(calculated_input_cost))
|
||||||
|
|
||||||
|
|
||||||
|
# @pytest.mark.skip(reason="new test - WIP, working on fixing this")
|
||||||
|
def test_vertex_ai_medlm_completion_cost():
|
||||||
|
"""Test for medlm completion cost."""
|
||||||
|
|
||||||
|
with pytest.raises(Exception) as e:
|
||||||
|
model = "vertex_ai/medlm-medium"
|
||||||
|
messages = [{"role": "user", "content": "Test MedLM completion cost."}]
|
||||||
|
predictive_cost = completion_cost(
|
||||||
|
model=model, messages=messages, custom_llm_provider="vertex_ai"
|
||||||
|
)
|
||||||
|
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
model = "vertex_ai/medlm-medium"
|
||||||
|
messages = [{"role": "user", "content": "Test MedLM completion cost."}]
|
||||||
|
predictive_cost = completion_cost(
|
||||||
|
model=model, messages=messages, custom_llm_provider="vertex_ai"
|
||||||
|
)
|
||||||
|
assert predictive_cost > 0
|
||||||
|
|
||||||
|
model = "vertex_ai/medlm-large"
|
||||||
|
messages = [{"role": "user", "content": "Test MedLM completion cost."}]
|
||||||
|
predictive_cost = completion_cost(model=model, messages=messages)
|
||||||
|
assert predictive_cost > 0
|
||||||
|
|
||||||
|
|
||||||
def test_vertex_ai_claude_completion_cost():
|
def test_vertex_ai_claude_completion_cost():
|
||||||
from litellm import Choices, Message, ModelResponse
|
from litellm import Choices, Message, ModelResponse
|
||||||
from litellm.utils import Usage
|
from litellm.utils import Usage
|
||||||
|
|
|
@ -589,7 +589,7 @@ async def test_triton_embeddings():
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
|
|
||||||
# stubbed endpoint is setup to return this
|
# stubbed endpoint is setup to return this
|
||||||
assert response.data[0]["embedding"] == [0.1, 0.2, 0.3]
|
assert response.data[0]["embedding"] == [0.1, 0.2]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
|
@ -1,70 +1,176 @@
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import io
|
import io
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath("../.."))
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
|
||||||
from litellm import completion
|
import asyncio
|
||||||
import litellm
|
import logging
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import completion
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.integrations.langsmith import LangsmithLogger
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
|
|
||||||
|
verbose_logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
litellm.success_callback = ["langsmith"]
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
def test_langsmith_logging():
|
@pytest.mark.asyncio()
|
||||||
|
async def test_async_langsmith_logging():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
test_langsmith_logger = LangsmithLogger()
|
||||||
|
run_id = str(uuid.uuid4())
|
||||||
|
litellm.set_verbose = True
|
||||||
|
litellm.callbacks = ["langsmith"]
|
||||||
|
response = await litellm.acompletion(
|
||||||
model="claude-instant-1.2",
|
model="claude-instant-1.2",
|
||||||
messages=[{"role": "user", "content": "what llm are u"}],
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
|
metadata={
|
||||||
|
"id": run_id,
|
||||||
|
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
||||||
|
"user_api_key_alias": "ishaans-langmsith-key",
|
||||||
|
"user_api_end_user_max_budget": None,
|
||||||
|
"litellm_api_version": "1.40.19",
|
||||||
|
"global_max_parallel_requests": None,
|
||||||
|
"user_api_key_user_id": "admin",
|
||||||
|
"user_api_key_org_id": None,
|
||||||
|
"user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
|
||||||
|
"user_api_key_team_alias": "testing-team",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
print(response)
|
print(response)
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
print("run_id", run_id)
|
||||||
|
logged_run_on_langsmith = test_langsmith_logger.get_run_by_id(run_id=run_id)
|
||||||
|
|
||||||
|
print("logged_run_on_langsmith", logged_run_on_langsmith)
|
||||||
|
|
||||||
|
print("fields in logged_run_on_langsmith", logged_run_on_langsmith.keys())
|
||||||
|
|
||||||
|
input_fields_on_langsmith = logged_run_on_langsmith.get("inputs")
|
||||||
|
extra_fields_on_langsmith = logged_run_on_langsmith.get("extra").get(
|
||||||
|
"invocation_params"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\nLogged INPUT ON LANGSMITH", input_fields_on_langsmith)
|
||||||
|
|
||||||
|
print("\nextra fields on langsmith", extra_fields_on_langsmith)
|
||||||
|
|
||||||
|
assert isinstance(input_fields_on_langsmith, dict)
|
||||||
|
assert "api_key" not in input_fields_on_langsmith
|
||||||
|
assert "api_key" not in extra_fields_on_langsmith
|
||||||
|
|
||||||
|
# assert user_api_key in extra_fields_on_langsmith
|
||||||
|
assert "user_api_key" in extra_fields_on_langsmith
|
||||||
|
assert "user_api_key_user_id" in extra_fields_on_langsmith
|
||||||
|
assert "user_api_key_team_alias" in extra_fields_on_langsmith
|
||||||
|
|
||||||
|
for cb in litellm.callbacks:
|
||||||
|
if isinstance(cb, LangsmithLogger):
|
||||||
|
await cb.async_httpx_client.client.aclose()
|
||||||
|
# test_langsmith_logger.async_httpx_client.close()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# test_langsmith_logging()
|
# test_langsmith_logging()
|
||||||
|
|
||||||
|
|
||||||
def test_langsmith_logging_with_metadata():
|
def test_async_langsmith_logging_with_metadata():
|
||||||
try:
|
try:
|
||||||
|
litellm.success_callback = ["langsmith"]
|
||||||
|
litellm.set_verbose = True
|
||||||
response = completion(
|
response = completion(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": "what llm are u"}],
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
metadata={
|
|
||||||
"run_name": "litellmRUN",
|
|
||||||
"project_name": "litellm-completion",
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
print(response)
|
print(response)
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
for cb in litellm.callbacks:
|
||||||
|
if isinstance(cb, LangsmithLogger):
|
||||||
|
cb.async_httpx_client.close()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
# test_langsmith_logging_with_metadata()
|
@pytest.mark.parametrize("sync_mode", [False, True])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_async_langsmith_logging_with_streaming_and_metadata(sync_mode):
|
||||||
def test_langsmith_logging_with_streaming_and_metadata():
|
|
||||||
try:
|
try:
|
||||||
response = completion(
|
test_langsmith_logger = LangsmithLogger()
|
||||||
model="gpt-3.5-turbo",
|
litellm.success_callback = ["langsmith"]
|
||||||
messages=[{"role": "user", "content": "what llm are u"}],
|
litellm.set_verbose = True
|
||||||
max_tokens=10,
|
run_id = str(uuid.uuid4())
|
||||||
temperature=0.2,
|
|
||||||
metadata={
|
messages = [{"role": "user", "content": "what llm are u"}]
|
||||||
"run_name": "litellmRUN",
|
if sync_mode is True:
|
||||||
"project_name": "litellm-completion",
|
response = completion(
|
||||||
},
|
model="gpt-3.5-turbo",
|
||||||
stream=True,
|
messages=messages,
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
stream=True,
|
||||||
|
metadata={"id": run_id},
|
||||||
|
)
|
||||||
|
for cb in litellm.callbacks:
|
||||||
|
if isinstance(cb, LangsmithLogger):
|
||||||
|
cb.async_httpx_client = AsyncHTTPHandler()
|
||||||
|
for chunk in response:
|
||||||
|
continue
|
||||||
|
time.sleep(3)
|
||||||
|
else:
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=messages,
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
mock_response="This is a mock request",
|
||||||
|
stream=True,
|
||||||
|
metadata={"id": run_id},
|
||||||
|
)
|
||||||
|
for cb in litellm.callbacks:
|
||||||
|
if isinstance(cb, LangsmithLogger):
|
||||||
|
cb.async_httpx_client = AsyncHTTPHandler()
|
||||||
|
async for chunk in response:
|
||||||
|
continue
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
print("run_id", run_id)
|
||||||
|
logged_run_on_langsmith = test_langsmith_logger.get_run_by_id(run_id=run_id)
|
||||||
|
|
||||||
|
print("logged_run_on_langsmith", logged_run_on_langsmith)
|
||||||
|
|
||||||
|
print("fields in logged_run_on_langsmith", logged_run_on_langsmith.keys())
|
||||||
|
|
||||||
|
input_fields_on_langsmith = logged_run_on_langsmith.get("inputs")
|
||||||
|
|
||||||
|
extra_fields_on_langsmith = logged_run_on_langsmith.get("extra").get(
|
||||||
|
"invocation_params"
|
||||||
)
|
)
|
||||||
for chunk in response:
|
|
||||||
continue
|
assert logged_run_on_langsmith.get("run_type") == "llm"
|
||||||
|
print("\nLogged INPUT ON LANGSMITH", input_fields_on_langsmith)
|
||||||
|
|
||||||
|
print("\nextra fields on langsmith", extra_fields_on_langsmith)
|
||||||
|
|
||||||
|
assert isinstance(input_fields_on_langsmith, dict)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
test_langsmith_logging_with_streaming_and_metadata()
|
|
||||||
|
|
|
@ -515,6 +515,7 @@ async def test_completion_predibase_streaming(sync_mode):
|
||||||
response = completion(
|
response = completion(
|
||||||
model="predibase/llama-3-8b-instruct",
|
model="predibase/llama-3-8b-instruct",
|
||||||
tenant_id="c4768f95",
|
tenant_id="c4768f95",
|
||||||
|
max_tokens=10,
|
||||||
api_base="https://serving.app.predibase.com",
|
api_base="https://serving.app.predibase.com",
|
||||||
api_key=os.getenv("PREDIBASE_API_KEY"),
|
api_key=os.getenv("PREDIBASE_API_KEY"),
|
||||||
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
||||||
|
@ -539,6 +540,7 @@ async def test_completion_predibase_streaming(sync_mode):
|
||||||
response = await litellm.acompletion(
|
response = await litellm.acompletion(
|
||||||
model="predibase/llama-3-8b-instruct",
|
model="predibase/llama-3-8b-instruct",
|
||||||
tenant_id="c4768f95",
|
tenant_id="c4768f95",
|
||||||
|
max_tokens=10,
|
||||||
api_base="https://serving.app.predibase.com",
|
api_base="https://serving.app.predibase.com",
|
||||||
api_key=os.getenv("PREDIBASE_API_KEY"),
|
api_key=os.getenv("PREDIBASE_API_KEY"),
|
||||||
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
||||||
|
|
|
@ -417,6 +417,21 @@ def function_setup(
|
||||||
# we only support async dynamo db logging for acompletion/aembedding since that's used on proxy
|
# we only support async dynamo db logging for acompletion/aembedding since that's used on proxy
|
||||||
litellm._async_success_callback.append(callback)
|
litellm._async_success_callback.append(callback)
|
||||||
removed_async_items.append(index)
|
removed_async_items.append(index)
|
||||||
|
elif callback == "langsmith":
|
||||||
|
callback_class = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
||||||
|
callback, internal_usage_cache=None, llm_router=None
|
||||||
|
)
|
||||||
|
|
||||||
|
# don't double add a callback
|
||||||
|
if not any(
|
||||||
|
isinstance(cb, type(callback_class)) for cb in litellm.callbacks
|
||||||
|
):
|
||||||
|
litellm.callbacks.append(callback_class) # type: ignore
|
||||||
|
litellm.input_callback.append(callback_class) # type: ignore
|
||||||
|
litellm.success_callback.append(callback_class) # type: ignore
|
||||||
|
litellm.failure_callback.append(callback_class) # type: ignore
|
||||||
|
litellm._async_success_callback.append(callback_class) # type: ignore
|
||||||
|
litellm._async_failure_callback.append(callback_class) # type: ignore
|
||||||
|
|
||||||
# Pop the async items from success_callback in reverse order to avoid index issues
|
# Pop the async items from success_callback in reverse order to avoid index issues
|
||||||
for index in reversed(removed_async_items):
|
for index in reversed(removed_async_items):
|
||||||
|
|
|
@ -1020,6 +1020,26 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true
|
"supports_function_calling": true
|
||||||
},
|
},
|
||||||
|
"groq/llama3-groq-70b-8192-tool-use-preview": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.00000089,
|
||||||
|
"output_cost_per_token": 0.00000089,
|
||||||
|
"litellm_provider": "groq",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
|
"groq/llama3-groq-8b-8192-tool-use-preview": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.00000019,
|
||||||
|
"output_cost_per_token": 0.00000019,
|
||||||
|
"litellm_provider": "groq",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true
|
||||||
|
},
|
||||||
"friendliai/mixtral-8x7b-instruct-v0-1": {
|
"friendliai/mixtral-8x7b-instruct-v0-1": {
|
||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"max_input_tokens": 32768,
|
"max_input_tokens": 32768,
|
||||||
|
@ -1800,6 +1820,26 @@
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
},
|
},
|
||||||
|
"medlm-medium": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 32768,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_character": 0.0000005,
|
||||||
|
"output_cost_per_character": 0.000001,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
},
|
||||||
|
"medlm-large": {
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 1024,
|
||||||
|
"input_cost_per_character": 0.000005,
|
||||||
|
"output_cost_per_character": 0.000015,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||||
|
},
|
||||||
"vertex_ai/claude-3-sonnet@20240229": {
|
"vertex_ai/claude-3-sonnet@20240229": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue