forked from phoenix/litellm-mirror
Compare commits
18 commits
litellm_in
...
main
Author | SHA1 | Date | |
---|---|---|---|
|
d2b123eef7 | ||
|
859b47f08b | ||
|
bd59f18809 | ||
|
05f810922c | ||
|
0ac2d8b256 | ||
|
9393434d01 | ||
|
d6181b2c9f | ||
|
4ebb7c8a7f | ||
|
eba700a491 | ||
|
a8b8deb793 | ||
|
77f714dc51 | ||
|
8af5b11f54 | ||
|
21156ff5d0 | ||
|
2d2931a215 | ||
|
5d13302e6b | ||
|
07223bdedf | ||
|
562e7defe6 | ||
|
a6da3dea03 |
159 changed files with 3431 additions and 1154 deletions
|
@ -811,7 +811,8 @@ jobs:
|
|||
- run: python ./tests/code_coverage_tests/router_code_coverage.py
|
||||
- run: python ./tests/code_coverage_tests/test_router_strategy_async.py
|
||||
- run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py
|
||||
# - run: python ./tests/documentation_tests/test_env_keys.py
|
||||
- run: python ./tests/documentation_tests/test_env_keys.py
|
||||
- run: python ./tests/documentation_tests/test_router_settings.py
|
||||
- run: python ./tests/documentation_tests/test_api_docs.py
|
||||
- run: python ./tests/code_coverage_tests/ensure_async_clients_test.py
|
||||
- run: helm lint ./deploy/charts/litellm-helm
|
||||
|
@ -1407,7 +1408,7 @@ jobs:
|
|||
command: |
|
||||
docker run -d \
|
||||
-p 4000:4000 \
|
||||
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
||||
-e DATABASE_URL=$PROXY_DATABASE_URL_2 \
|
||||
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||
-e UI_USERNAME="admin" \
|
||||
|
|
135
docs/my-website/docs/moderation.md
Normal file
135
docs/my-website/docs/moderation.md
Normal file
|
@ -0,0 +1,135 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Moderation
|
||||
|
||||
|
||||
### Usage
|
||||
<Tabs>
|
||||
<TabItem value="python" label="LiteLLM Python SDK">
|
||||
|
||||
```python
|
||||
from litellm import moderation
|
||||
|
||||
response = moderation(
|
||||
input="hello from litellm",
|
||||
model="text-moderation-stable"
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="LiteLLM Proxy Server">
|
||||
|
||||
For `/moderations` endpoint, there is **no need to specify `model` in the request or on the litellm config.yaml**
|
||||
|
||||
Start litellm proxy server
|
||||
|
||||
```
|
||||
litellm
|
||||
```
|
||||
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="python" label="OpenAI Python SDK">
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
# set base_url to your proxy server
|
||||
# set api_key to send to proxy server
|
||||
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
|
||||
|
||||
response = client.moderations.create(
|
||||
input="hello from litellm",
|
||||
model="text-moderation-stable" # optional, defaults to `omni-moderation-latest`
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="curl" label="Curl Request">
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/moderations' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--data '{"input": "Sample text goes here", "model": "text-moderation-stable"}'
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Input Params
|
||||
LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers.
|
||||
|
||||
### Required Fields
|
||||
|
||||
- `input`: *string or array* - Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
||||
- If string: A string of text to classify for moderation
|
||||
- If array of strings: An array of strings to classify for moderation
|
||||
- If array of objects: An array of multi-modal inputs to the moderation model, where each object can be:
|
||||
- An object describing an image to classify with:
|
||||
- `type`: *string, required* - Always `image_url`
|
||||
- `image_url`: *object, required* - Contains either an image URL or a data URL for a base64 encoded image
|
||||
- An object describing text to classify with:
|
||||
- `type`: *string, required* - Always `text`
|
||||
- `text`: *string, required* - A string of text to classify
|
||||
|
||||
### Optional Fields
|
||||
|
||||
- `model`: *string (optional)* - The moderation model to use. Defaults to `omni-moderation-latest`.
|
||||
|
||||
## Output Format
|
||||
Here's the exact json output and type you can expect from all moderation calls:
|
||||
|
||||
[**LiteLLM follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/moderations/object)
|
||||
|
||||
|
||||
```python
|
||||
{
|
||||
"id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR",
|
||||
"model": "text-moderation-007",
|
||||
"results": [
|
||||
{
|
||||
"flagged": true,
|
||||
"categories": {
|
||||
"sexual": false,
|
||||
"hate": false,
|
||||
"harassment": true,
|
||||
"self-harm": false,
|
||||
"sexual/minors": false,
|
||||
"hate/threatening": false,
|
||||
"violence/graphic": false,
|
||||
"self-harm/intent": false,
|
||||
"self-harm/instructions": false,
|
||||
"harassment/threatening": true,
|
||||
"violence": true
|
||||
},
|
||||
"category_scores": {
|
||||
"sexual": 0.000011726012417057063,
|
||||
"hate": 0.22706663608551025,
|
||||
"harassment": 0.5215635299682617,
|
||||
"self-harm": 2.227119921371923e-6,
|
||||
"sexual/minors": 7.107352217872176e-8,
|
||||
"hate/threatening": 0.023547329008579254,
|
||||
"violence/graphic": 0.00003391829886822961,
|
||||
"self-harm/intent": 1.646940972932498e-6,
|
||||
"self-harm/instructions": 1.1198755256458526e-9,
|
||||
"harassment/threatening": 0.5694745779037476,
|
||||
"violence": 0.9971134662628174
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
|
||||
## **Supported Providers**
|
||||
|
||||
| Provider |
|
||||
|-------------|
|
||||
| OpenAI |
|
|
@ -4,24 +4,63 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
# Argilla
|
||||
|
||||
Argilla is a tool for annotating datasets.
|
||||
Argilla is a collaborative annotation tool for AI engineers and domain experts who need to build high-quality datasets for their projects.
|
||||
|
||||
|
||||
## Getting Started
|
||||
|
||||
## Usage
|
||||
To log the data to Argilla, first you need to deploy the Argilla server. If you have not deployed the Argilla server, please follow the instructions [here](https://docs.argilla.io/latest/getting_started/quickstart/).
|
||||
|
||||
Next, you will need to configure and create the Argilla dataset.
|
||||
|
||||
```python
|
||||
import argilla as rg
|
||||
|
||||
client = rg.Argilla(api_url="<api_url>", api_key="<api_key>")
|
||||
|
||||
settings = rg.Settings(
|
||||
guidelines="These are some guidelines.",
|
||||
fields=[
|
||||
rg.ChatField(
|
||||
name="user_input",
|
||||
),
|
||||
rg.TextField(
|
||||
name="llm_output",
|
||||
),
|
||||
],
|
||||
questions=[
|
||||
rg.RatingQuestion(
|
||||
name="rating",
|
||||
values=[1, 2, 3, 4, 5, 6, 7],
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
dataset = rg.Dataset(
|
||||
name="my_first_dataset",
|
||||
settings=settings,
|
||||
)
|
||||
|
||||
dataset.create()
|
||||
```
|
||||
|
||||
For further configuration, please refer to the [Argilla documentation](https://docs.argilla.io/latest/how_to_guides/dataset/).
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
<Tabs>
|
||||
<Tab value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import os
|
||||
import litellm
|
||||
import os
|
||||
from litellm import completion
|
||||
|
||||
# add env vars
|
||||
os.environ["ARGILLA_API_KEY"]="argilla.apikey"
|
||||
os.environ["ARGILLA_BASE_URL"]="http://localhost:6900"
|
||||
os.environ["ARGILLA_DATASET_NAME"]="my_second_dataset"
|
||||
os.environ["ARGILLA_DATASET_NAME"]="my_first_dataset"
|
||||
os.environ["OPENAI_API_KEY"]="sk-proj-..."
|
||||
|
||||
litellm.callbacks = ["argilla"]
|
||||
|
|
|
@ -279,7 +279,31 @@ router_settings:
|
|||
| retry_policy | object | Specifies the number of retries for different types of exceptions. [More information here](reliability) |
|
||||
| allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) |
|
||||
| allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) |
|
||||
|
||||
| default_max_parallel_requests | Optional[int] | The default maximum number of parallel requests for a deployment. |
|
||||
| default_priority | (Optional[int]) | The default priority for a request. Only for '.scheduler_acompletion()'. Default is None. |
|
||||
| polling_interval | (Optional[float]) | frequency of polling queue. Only for '.scheduler_acompletion()'. Default is 3ms. |
|
||||
| max_fallbacks | Optional[int] | The maximum number of fallbacks to try before exiting the call. Defaults to 5. |
|
||||
| default_litellm_params | Optional[dict] | The default litellm parameters to add to all requests (e.g. `temperature`, `max_tokens`). |
|
||||
| timeout | Optional[float] | The default timeout for a request. |
|
||||
| debug_level | Literal["DEBUG", "INFO"] | The debug level for the logging library in the router. Defaults to "INFO". |
|
||||
| client_ttl | int | Time-to-live for cached clients in seconds. Defaults to 3600. |
|
||||
| cache_kwargs | dict | Additional keyword arguments for the cache initialization. |
|
||||
| routing_strategy_args | dict | Additional keyword arguments for the routing strategy - e.g. lowest latency routing default ttl |
|
||||
| model_group_alias | dict | Model group alias mapping. E.g. `{"claude-3-haiku": "claude-3-haiku-20240229"}` |
|
||||
| num_retries | int | Number of retries for a request. Defaults to 3. |
|
||||
| default_fallbacks | Optional[List[str]] | Fallbacks to try if no model group-specific fallbacks are defined. |
|
||||
| caching_groups | Optional[List[tuple]] | List of model groups for caching across model groups. Defaults to None. - e.g. caching_groups=[("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")]|
|
||||
| alerting_config | AlertingConfig | [SDK-only arg] Slack alerting configuration. Defaults to None. [Further Docs](../routing.md#alerting-) |
|
||||
| assistants_config | AssistantsConfig | Set on proxy via `assistant_settings`. [Further docs](../assistants.md) |
|
||||
| set_verbose | boolean | [DEPRECATED PARAM - see debug docs](./debugging.md) If true, sets the logging level to verbose. |
|
||||
| retry_after | int | Time to wait before retrying a request in seconds. Defaults to 0. If `x-retry-after` is received from LLM API, this value is overridden. |
|
||||
| provider_budget_config | ProviderBudgetConfig | Provider budget configuration. Use this to set llm_provider budget limits. example $100/day to OpenAI, $100/day to Azure, etc. Defaults to None. [Further Docs](./provider_budget_routing.md) |
|
||||
| enable_pre_call_checks | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) |
|
||||
| model_group_retry_policy | Dict[str, RetryPolicy] | [SDK-only arg] Set retry policy for model groups. |
|
||||
| context_window_fallbacks | List[Dict[str, List[str]]] | Fallback models for context window violations. |
|
||||
| redis_url | str | URL for Redis server. **Known performance issue with Redis URL.** |
|
||||
| cache_responses | boolean | Flag to enable caching LLM Responses, if cache set under `router_settings`. If true, caches responses. Defaults to False. |
|
||||
| router_general_settings | RouterGeneralSettings | [SDK-Only] Router general settings - contains optimizations like 'async_only_mode'. [Docs](../routing.md#router-general-settings) |
|
||||
|
||||
### environment variables - Reference
|
||||
|
||||
|
@ -335,6 +359,8 @@ router_settings:
|
|||
| DD_SITE | Site URL for Datadog (e.g., datadoghq.com)
|
||||
| DD_SOURCE | Source identifier for Datadog logs
|
||||
| DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback
|
||||
| DD_SERVICE | Service identifier for Datadog logs. Defaults to "litellm-server"
|
||||
| DD_VERSION | Version identifier for Datadog logs. Defaults to "unknown"
|
||||
| DEBUG_OTEL | Enable debug mode for OpenTelemetry
|
||||
| DIRECT_URL | Direct URL for service endpoint
|
||||
| DISABLE_ADMIN_UI | Toggle to disable the admin UI
|
||||
|
|
|
@ -357,77 +357,6 @@ curl --location 'http://0.0.0.0:4000/v1/model/info' \
|
|||
--data ''
|
||||
```
|
||||
|
||||
|
||||
### Provider specific wildcard routing
|
||||
**Proxy all models from a provider**
|
||||
|
||||
Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml**
|
||||
|
||||
**Step 1** - define provider specific routing on config.yaml
|
||||
```yaml
|
||||
model_list:
|
||||
# provider specific wildcard routing
|
||||
- model_name: "anthropic/*"
|
||||
litellm_params:
|
||||
model: "anthropic/*"
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: "groq/*"
|
||||
litellm_params:
|
||||
model: "groq/*"
|
||||
api_key: os.environ/GROQ_API_KEY
|
||||
- model_name: "fo::*:static::*" # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*"
|
||||
litellm_params:
|
||||
model: "openai/fo::*:static::*"
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
```
|
||||
|
||||
Step 2 - Run litellm proxy
|
||||
|
||||
```shell
|
||||
$ litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
Step 3 Test it
|
||||
|
||||
Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
|
||||
```shell
|
||||
curl http://localhost:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "anthropic/claude-3-sonnet-20240229",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, Claude!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
|
||||
```shell
|
||||
curl http://localhost:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "groq/llama3-8b-8192",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, Claude!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*`
|
||||
```shell
|
||||
curl http://localhost:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "fo::hi::static::hi",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, Claude!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
### Load Balancing
|
||||
|
||||
:::info
|
||||
|
|
|
@ -50,18 +50,22 @@ You can see the full DB Schema [here](https://github.com/BerriAI/litellm/blob/ma
|
|||
| LiteLLM_ErrorLogs | Captures failed requests and errors. Stores exception details and request information. Helps with debugging and monitoring. | **Medium - on errors only** |
|
||||
| LiteLLM_AuditLog | Tracks changes to system configuration. Records who made changes and what was modified. Maintains history of updates to teams, users, and models. | **Off by default**, **High - when enabled** |
|
||||
|
||||
## How to Disable `LiteLLM_SpendLogs`
|
||||
## Disable `LiteLLM_SpendLogs` & `LiteLLM_ErrorLogs`
|
||||
|
||||
You can disable spend_logs by setting `disable_spend_logs` to `True` on the `general_settings` section of your proxy_config.yaml file.
|
||||
You can disable spend_logs and error_logs by setting `disable_spend_logs` and `disable_error_logs` to `True` on the `general_settings` section of your proxy_config.yaml file.
|
||||
|
||||
```yaml
|
||||
general_settings:
|
||||
disable_spend_logs: True
|
||||
disable_spend_logs: True # Disable writing spend logs to DB
|
||||
disable_error_logs: True # Disable writing error logs to DB
|
||||
```
|
||||
|
||||
### What is the impact of disabling these logs?
|
||||
|
||||
### What is the impact of disabling `LiteLLM_SpendLogs`?
|
||||
|
||||
When disabling spend logs (`disable_spend_logs: True`):
|
||||
- You **will not** be able to view Usage on the LiteLLM UI
|
||||
- You **will** continue seeing cost metrics on s3, Prometheus, Langfuse (any other Logging integration you are using)
|
||||
|
||||
When disabling error logs (`disable_error_logs: True`):
|
||||
- You **will not** be able to view Errors on the LiteLLM UI
|
||||
- You **will** continue seeing error logs in your application logs and any other logging integrations you are using
|
||||
|
|
|
@ -23,6 +23,7 @@ general_settings:
|
|||
|
||||
# OPTIONAL Best Practices
|
||||
disable_spend_logs: True # turn off writing each transaction to the db. We recommend doing this is you don't need to see Usage on the LiteLLM UI and are tracking metrics via Prometheus
|
||||
disable_error_logs: True # turn off writing LLM Exceptions to DB
|
||||
allow_requests_on_db_unavailable: True # Only USE when running LiteLLM on your VPC. Allow requests to still be processed even if the DB is unavailable. We recommend doing this if you're running LiteLLM on VPC that cannot be accessed from the public internet.
|
||||
|
||||
litellm_settings:
|
||||
|
@ -102,17 +103,22 @@ general_settings:
|
|||
allow_requests_on_db_unavailable: True
|
||||
```
|
||||
|
||||
## 6. Disable spend_logs if you're not using the LiteLLM UI
|
||||
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
|
||||
|
||||
By default LiteLLM will write every request to the `LiteLLM_SpendLogs` table. This is used for viewing Usage on the LiteLLM UI.
|
||||
By default, LiteLLM writes several types of logs to the database:
|
||||
- Every LLM API request to the `LiteLLM_SpendLogs` table
|
||||
- LLM Exceptions to the `LiteLLM_LogsErrors` table
|
||||
|
||||
If you're not viewing Usage on the LiteLLM UI (most users use Prometheus when this is disabled), you can disable spend_logs by setting `disable_spend_logs` to `True`.
|
||||
If you're not viewing these logs on the LiteLLM UI (most users use Prometheus for monitoring), you can disable them by setting the following flags to `True`:
|
||||
|
||||
```yaml
|
||||
general_settings:
|
||||
disable_spend_logs: True
|
||||
disable_spend_logs: True # Disable writing spend logs to DB
|
||||
disable_error_logs: True # Disable writing error logs to DB
|
||||
```
|
||||
|
||||
[More information about what the Database is used for here](db_info)
|
||||
|
||||
## 7. Use Helm PreSync Hook for Database Migrations [BETA]
|
||||
|
||||
To ensure only one service manages database migrations, use our [Helm PreSync hook for Database Migrations](https://github.com/BerriAI/litellm/blob/main/deploy/charts/litellm-helm/templates/migrations-job.yaml). This ensures migrations are handled during `helm upgrade` or `helm install`, while LiteLLM pods explicitly disable migrations.
|
||||
|
|
|
@ -192,3 +192,13 @@ Here is a screenshot of the metrics you can monitor with the LiteLLM Grafana Das
|
|||
|----------------------|--------------------------------------|
|
||||
| `litellm_llm_api_failed_requests_metric` | **deprecated** use `litellm_proxy_failed_requests_metric` |
|
||||
| `litellm_requests_metric` | **deprecated** use `litellm_proxy_total_requests_metric` |
|
||||
|
||||
|
||||
## FAQ
|
||||
|
||||
### What are `_created` vs. `_total` metrics?
|
||||
|
||||
- `_created` metrics are metrics that are created when the proxy starts
|
||||
- `_total` metrics are metrics that are incremented for each request
|
||||
|
||||
You should consume the `_total` metrics for your counting purposes
|
|
@ -1891,3 +1891,22 @@ router = Router(
|
|||
debug_level="DEBUG" # defaults to INFO
|
||||
)
|
||||
```
|
||||
|
||||
## Router General Settings
|
||||
|
||||
### Usage
|
||||
|
||||
```python
|
||||
router = Router(model_list=..., router_general_settings=RouterGeneralSettings(async_only_mode=True))
|
||||
```
|
||||
|
||||
### Spec
|
||||
```python
|
||||
class RouterGeneralSettings(BaseModel):
|
||||
async_only_mode: bool = Field(
|
||||
default=False
|
||||
) # this will only initialize async clients. Good for memory utils
|
||||
pass_through_all_models: bool = Field(
|
||||
default=False
|
||||
) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding
|
||||
```
|
174
docs/my-website/docs/text_completion.md
Normal file
174
docs/my-website/docs/text_completion.md
Normal file
|
@ -0,0 +1,174 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Text Completion
|
||||
|
||||
### Usage
|
||||
<Tabs>
|
||||
<TabItem value="python" label="LiteLLM Python SDK">
|
||||
|
||||
```python
|
||||
from litellm import text_completion
|
||||
|
||||
response = text_completion(
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
prompt="Say this is a test",
|
||||
max_tokens=7
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="LiteLLM Proxy Server">
|
||||
|
||||
1. Define models on config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo-instruct
|
||||
litellm_params:
|
||||
model: text-completion-openai/gpt-3.5-turbo-instruct # The `text-completion-openai/` prefix will call openai.completions.create
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: text-davinci-003
|
||||
litellm_params:
|
||||
model: text-completion-openai/text-davinci-003
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
```
|
||||
|
||||
2. Start litellm proxy server
|
||||
|
||||
```
|
||||
litellm --config config.yaml
|
||||
```
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="python" label="OpenAI Python SDK">
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
# set base_url to your proxy server
|
||||
# set api_key to send to proxy server
|
||||
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
|
||||
|
||||
response = client.completions.create(
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
prompt="Say this is a test",
|
||||
max_tokens=7
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="curl" label="Curl Request">
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--data '{
|
||||
"model": "gpt-3.5-turbo-instruct",
|
||||
"prompt": "Say this is a test",
|
||||
"max_tokens": 7
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Input Params
|
||||
|
||||
LiteLLM accepts and translates the [OpenAI Text Completion params](https://platform.openai.com/docs/api-reference/completions) across all supported providers.
|
||||
|
||||
### Required Fields
|
||||
|
||||
- `model`: *string* - ID of the model to use
|
||||
- `prompt`: *string or array* - The prompt(s) to generate completions for
|
||||
|
||||
### Optional Fields
|
||||
|
||||
- `best_of`: *integer* - Generates best_of completions server-side and returns the "best" one
|
||||
- `echo`: *boolean* - Echo back the prompt in addition to the completion.
|
||||
- `frequency_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency.
|
||||
- `logit_bias`: *map* - Modify the likelihood of specified tokens appearing in the completion
|
||||
- `logprobs`: *integer* - Include the log probabilities on the logprobs most likely tokens. Max value of 5
|
||||
- `max_tokens`: *integer* - The maximum number of tokens to generate.
|
||||
- `n`: *integer* - How many completions to generate for each prompt.
|
||||
- `presence_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far.
|
||||
- `seed`: *integer* - If specified, system will attempt to make deterministic samples
|
||||
- `stop`: *string or array* - Up to 4 sequences where the API will stop generating tokens
|
||||
- `stream`: *boolean* - Whether to stream back partial progress. Defaults to false
|
||||
- `suffix`: *string* - The suffix that comes after a completion of inserted text
|
||||
- `temperature`: *number* - What sampling temperature to use, between 0 and 2.
|
||||
- `top_p`: *number* - An alternative to sampling with temperature, called nucleus sampling.
|
||||
- `user`: *string* - A unique identifier representing your end-user
|
||||
|
||||
## Output Format
|
||||
Here's the exact JSON output format you can expect from completion calls:
|
||||
|
||||
|
||||
[**Follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/completions/object)
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="non-streaming" label="Non-Streaming Response">
|
||||
|
||||
```python
|
||||
{
|
||||
"id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
|
||||
"object": "text_completion",
|
||||
"created": 1589478378,
|
||||
"model": "gpt-3.5-turbo-instruct",
|
||||
"system_fingerprint": "fp_44709d6fcb",
|
||||
"choices": [
|
||||
{
|
||||
"text": "\n\nThis is indeed a test",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"finish_reason": "length"
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 5,
|
||||
"completion_tokens": 7,
|
||||
"total_tokens": 12
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="streaming" label="Streaming Response">
|
||||
|
||||
```python
|
||||
{
|
||||
"id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe",
|
||||
"object": "text_completion",
|
||||
"created": 1690759702,
|
||||
"choices": [
|
||||
{
|
||||
"text": "This",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"finish_reason": null
|
||||
}
|
||||
],
|
||||
"model": "gpt-3.5-turbo-instruct"
|
||||
"system_fingerprint": "fp_44709d6fcb",
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## **Supported Providers**
|
||||
|
||||
| Provider | Link to Usage |
|
||||
|-------------|--------------------|
|
||||
| OpenAI | [Usage](../docs/providers/text_completion_openai) |
|
||||
| Azure OpenAI| [Usage](../docs/providers/azure) |
|
||||
|
||||
|
140
docs/my-website/docs/wildcard_routing.md
Normal file
140
docs/my-website/docs/wildcard_routing.md
Normal file
|
@ -0,0 +1,140 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Provider specific Wildcard routing
|
||||
|
||||
**Proxy all models from a provider**
|
||||
|
||||
Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml**
|
||||
|
||||
## Step 1. Define provider specific routing
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "anthropic/*",
|
||||
"litellm_params": {
|
||||
"model": "anthropic/*",
|
||||
"api_key": os.environ["ANTHROPIC_API_KEY"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "groq/*",
|
||||
"litellm_params": {
|
||||
"model": "groq/*",
|
||||
"api_key": os.environ["GROQ_API_KEY"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "fo::*:static::*", # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*"
|
||||
"litellm_params": {
|
||||
"model": "openai/fo::*:static::*",
|
||||
"api_key": os.environ["OPENAI_API_KEY"]
|
||||
}
|
||||
}
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
**Step 1** - define provider specific routing on config.yaml
|
||||
```yaml
|
||||
model_list:
|
||||
# provider specific wildcard routing
|
||||
- model_name: "anthropic/*"
|
||||
litellm_params:
|
||||
model: "anthropic/*"
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: "groq/*"
|
||||
litellm_params:
|
||||
model: "groq/*"
|
||||
api_key: os.environ/GROQ_API_KEY
|
||||
- model_name: "fo::*:static::*" # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*"
|
||||
litellm_params:
|
||||
model: "openai/fo::*:static::*"
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## [PROXY-Only] Step 2 - Run litellm proxy
|
||||
|
||||
```shell
|
||||
$ litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
## Step 3 - Test it
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
|
||||
router = Router(model_list=...)
|
||||
|
||||
# Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
|
||||
resp = completion(model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "Hello, Claude!"}])
|
||||
print(resp)
|
||||
|
||||
# Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
|
||||
resp = completion(model="groq/llama3-8b-8192", messages=[{"role": "user", "content": "Hello, Groq!"}])
|
||||
print(resp)
|
||||
|
||||
# Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*`
|
||||
resp = completion(model="fo::hi::static::hi", messages=[{"role": "user", "content": "Hello, Claude!"}])
|
||||
print(resp)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
|
||||
```bash
|
||||
curl http://localhost:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "anthropic/claude-3-sonnet-20240229",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, Claude!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
|
||||
```shell
|
||||
curl http://localhost:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "groq/llama3-8b-8192",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, Claude!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*`
|
||||
```shell
|
||||
curl http://localhost:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "fo::hi::static::hi",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, Claude!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
|
@ -246,6 +246,7 @@ const sidebars = {
|
|||
"completion/usage",
|
||||
],
|
||||
},
|
||||
"text_completion",
|
||||
"embedding/supported_embedding",
|
||||
"image_generation",
|
||||
{
|
||||
|
@ -261,6 +262,7 @@ const sidebars = {
|
|||
"batches",
|
||||
"realtime",
|
||||
"fine_tuning",
|
||||
"moderation",
|
||||
{
|
||||
type: "link",
|
||||
label: "Use LiteLLM Proxy with Vertex, Bedrock SDK",
|
||||
|
@ -277,7 +279,7 @@ const sidebars = {
|
|||
description: "Learn how to load balance, route, and set fallbacks for your LLM requests",
|
||||
slug: "/routing-load-balancing",
|
||||
},
|
||||
items: ["routing", "scheduler", "proxy/load_balancing", "proxy/reliability", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing"],
|
||||
items: ["routing", "scheduler", "proxy/load_balancing", "proxy/reliability", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing", "wildcard_routing"],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
from typing import Optional, List
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.proxy.proxy_server import PrismaClient, HTTPException
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
import collections
|
||||
import httpx
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
|
@ -114,7 +116,6 @@ async def ui_get_spend_by_tags(
|
|||
|
||||
|
||||
def _forecast_daily_cost(data: list):
|
||||
import requests # type: ignore
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
if len(data) == 0:
|
||||
|
@ -136,17 +137,17 @@ def _forecast_daily_cost(data: list):
|
|||
|
||||
print("last entry date", last_entry_date)
|
||||
|
||||
# Assuming today_date is a datetime object
|
||||
today_date = datetime.now()
|
||||
|
||||
# Calculate the last day of the month
|
||||
last_day_of_todays_month = datetime(
|
||||
today_date.year, today_date.month % 12 + 1, 1
|
||||
) - timedelta(days=1)
|
||||
|
||||
print("last day of todays month", last_day_of_todays_month)
|
||||
# Calculate the remaining days in the month
|
||||
remaining_days = (last_day_of_todays_month - last_entry_date).days
|
||||
|
||||
print("remaining days", remaining_days)
|
||||
|
||||
current_spend_this_month = 0
|
||||
series = {}
|
||||
for entry in data:
|
||||
|
@ -176,13 +177,19 @@ def _forecast_daily_cost(data: list):
|
|||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
url="https://trend-api-production.up.railway.app/forecast",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
# check the status code
|
||||
response.raise_for_status()
|
||||
client = HTTPHandler()
|
||||
|
||||
try:
|
||||
response = client.post(
|
||||
url="https://trend-api-production.up.railway.app/forecast",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={"error": f"Error getting forecast: {e.response.text}"},
|
||||
)
|
||||
|
||||
json_response = response.json()
|
||||
forecast_data = json_response["forecast"]
|
||||
|
@ -206,13 +213,3 @@ def _forecast_daily_cost(data: list):
|
|||
f"Predicted Spend for { today_month } 2024, ${total_predicted_spend}"
|
||||
)
|
||||
return {"response": response_data, "predicted_spend": predicted_spend}
|
||||
|
||||
# print(f"Date: {entry['date']}, Spend: {entry['spend']}, Response: {response.text}")
|
||||
|
||||
|
||||
# _forecast_daily_cost(
|
||||
# [
|
||||
# {"date": "2022-01-01", "spend": 100},
|
||||
|
||||
# ]
|
||||
# )
|
||||
|
|
|
@ -279,11 +279,11 @@ class DataDogLogger(CustomBatchLogger):
|
|||
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
|
||||
|
||||
dd_payload = DatadogPayload(
|
||||
ddsource=os.getenv("DD_SOURCE", "litellm"),
|
||||
ddtags="",
|
||||
hostname="",
|
||||
ddsource=self._get_datadog_source(),
|
||||
ddtags=self._get_datadog_tags(),
|
||||
hostname=self._get_datadog_hostname(),
|
||||
message=json_payload,
|
||||
service="litellm-server",
|
||||
service=self._get_datadog_service(),
|
||||
status=status,
|
||||
)
|
||||
return dd_payload
|
||||
|
@ -387,11 +387,11 @@ class DataDogLogger(CustomBatchLogger):
|
|||
json_payload = json.dumps(_exception_payload)
|
||||
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
|
||||
dd_payload = DatadogPayload(
|
||||
ddsource=os.getenv("DD_SOURCE", "litellm"),
|
||||
ddtags="",
|
||||
hostname="",
|
||||
ddsource=self._get_datadog_source(),
|
||||
ddtags=self._get_datadog_tags(),
|
||||
hostname=self._get_datadog_hostname(),
|
||||
message=json_payload,
|
||||
service="litellm-server",
|
||||
service=self._get_datadog_service(),
|
||||
status=DataDogStatus.ERROR,
|
||||
)
|
||||
|
||||
|
@ -473,11 +473,31 @@ class DataDogLogger(CustomBatchLogger):
|
|||
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
|
||||
|
||||
dd_payload = DatadogPayload(
|
||||
ddsource=os.getenv("DD_SOURCE", "litellm"),
|
||||
ddtags="",
|
||||
hostname="",
|
||||
ddsource=self._get_datadog_source(),
|
||||
ddtags=self._get_datadog_tags(),
|
||||
hostname=self._get_datadog_hostname(),
|
||||
message=json_payload,
|
||||
service="litellm-server",
|
||||
service=self._get_datadog_service(),
|
||||
status=DataDogStatus.INFO,
|
||||
)
|
||||
return dd_payload
|
||||
|
||||
@staticmethod
|
||||
def _get_datadog_tags():
|
||||
return f"env:{os.getenv('DD_ENV', 'unknown')},service:{os.getenv('DD_SERVICE', 'litellm')},version:{os.getenv('DD_VERSION', 'unknown')}"
|
||||
|
||||
@staticmethod
|
||||
def _get_datadog_source():
|
||||
return os.getenv("DD_SOURCE", "litellm")
|
||||
|
||||
@staticmethod
|
||||
def _get_datadog_service():
|
||||
return os.getenv("DD_SERVICE", "litellm-server")
|
||||
|
||||
@staticmethod
|
||||
def _get_datadog_hostname():
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _get_datadog_env():
|
||||
return os.getenv("DD_ENV", "unknown")
|
||||
|
|
|
@ -458,7 +458,7 @@ class AmazonConverseConfig:
|
|||
"""
|
||||
Abbreviations of regions AWS Bedrock supports for cross region inference
|
||||
"""
|
||||
return ["us", "eu"]
|
||||
return ["us", "eu", "apac"]
|
||||
|
||||
def _get_base_model(self, model: str) -> str:
|
||||
"""
|
||||
|
|
|
@ -28,6 +28,62 @@ headers = {
|
|||
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
|
||||
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def mask_sensitive_info(error_message):
|
||||
# Find the start of the key parameter
|
||||
if isinstance(error_message, str):
|
||||
key_index = error_message.find("key=")
|
||||
else:
|
||||
return error_message
|
||||
|
||||
# If key is found
|
||||
if key_index != -1:
|
||||
# Find the end of the key parameter (next & or end of string)
|
||||
next_param = error_message.find("&", key_index)
|
||||
|
||||
if next_param == -1:
|
||||
# If no more parameters, mask until the end of the string
|
||||
masked_message = error_message[: key_index + 4] + "[REDACTED_API_KEY]"
|
||||
else:
|
||||
# Replace the key with redacted value, keeping other parameters
|
||||
masked_message = (
|
||||
error_message[: key_index + 4]
|
||||
+ "[REDACTED_API_KEY]"
|
||||
+ error_message[next_param:]
|
||||
)
|
||||
|
||||
return masked_message
|
||||
|
||||
return error_message
|
||||
|
||||
|
||||
class MaskedHTTPStatusError(httpx.HTTPStatusError):
|
||||
def __init__(
|
||||
self, original_error, message: Optional[str] = None, text: Optional[str] = None
|
||||
):
|
||||
# Create a new error with the masked URL
|
||||
masked_url = mask_sensitive_info(str(original_error.request.url))
|
||||
# Create a new error that looks like the original, but with a masked URL
|
||||
|
||||
super().__init__(
|
||||
message=original_error.message,
|
||||
request=httpx.Request(
|
||||
method=original_error.request.method,
|
||||
url=masked_url,
|
||||
headers=original_error.request.headers,
|
||||
content=original_error.request.content,
|
||||
),
|
||||
response=httpx.Response(
|
||||
status_code=original_error.response.status_code,
|
||||
content=original_error.response.content,
|
||||
headers=original_error.response.headers,
|
||||
),
|
||||
)
|
||||
self.message = message
|
||||
self.text = text
|
||||
|
||||
|
||||
class AsyncHTTPHandler:
|
||||
def __init__(
|
||||
|
@ -155,13 +211,16 @@ class AsyncHTTPHandler:
|
|||
headers=headers,
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
setattr(e, "status_code", e.response.status_code)
|
||||
|
||||
if stream is True:
|
||||
setattr(e, "message", await e.response.aread())
|
||||
setattr(e, "text", await e.response.aread())
|
||||
else:
|
||||
setattr(e, "message", e.response.text)
|
||||
setattr(e, "text", e.response.text)
|
||||
setattr(e, "message", mask_sensitive_info(e.response.text))
|
||||
setattr(e, "text", mask_sensitive_info(e.response.text))
|
||||
|
||||
setattr(e, "status_code", e.response.status_code)
|
||||
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
@ -399,11 +458,17 @@ class HTTPHandler:
|
|||
llm_provider="litellm-httpx-handler",
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
setattr(e, "status_code", e.response.status_code)
|
||||
|
||||
if stream is True:
|
||||
setattr(e, "message", e.response.read())
|
||||
setattr(e, "message", mask_sensitive_info(e.response.read()))
|
||||
setattr(e, "text", mask_sensitive_info(e.response.read()))
|
||||
else:
|
||||
setattr(e, "message", e.response.text)
|
||||
error_text = mask_sensitive_info(e.response.text)
|
||||
setattr(e, "message", error_text)
|
||||
setattr(e, "text", error_text)
|
||||
|
||||
setattr(e, "status_code", e.response.status_code)
|
||||
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
|
|
@ -1159,15 +1159,44 @@ def convert_to_anthropic_tool_result(
|
|||
]
|
||||
}
|
||||
"""
|
||||
content_str: str = ""
|
||||
anthropic_content: Union[
|
||||
str,
|
||||
List[Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]],
|
||||
] = ""
|
||||
if isinstance(message["content"], str):
|
||||
content_str = message["content"]
|
||||
anthropic_content = message["content"]
|
||||
elif isinstance(message["content"], List):
|
||||
content_list = message["content"]
|
||||
anthropic_content_list: List[
|
||||
Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]
|
||||
] = []
|
||||
for content in content_list:
|
||||
if content["type"] == "text":
|
||||
content_str += content["text"]
|
||||
anthropic_content_list.append(
|
||||
AnthropicMessagesToolResultContent(
|
||||
type="text",
|
||||
text=content["text"],
|
||||
)
|
||||
)
|
||||
elif content["type"] == "image_url":
|
||||
if isinstance(content["image_url"], str):
|
||||
image_chunk = convert_to_anthropic_image_obj(content["image_url"])
|
||||
else:
|
||||
image_chunk = convert_to_anthropic_image_obj(
|
||||
content["image_url"]["url"]
|
||||
)
|
||||
anthropic_content_list.append(
|
||||
AnthropicMessagesImageParam(
|
||||
type="image",
|
||||
source=AnthropicContentParamSource(
|
||||
type="base64",
|
||||
media_type=image_chunk["media_type"],
|
||||
data=image_chunk["data"],
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
anthropic_content = anthropic_content_list
|
||||
anthropic_tool_result: Optional[AnthropicMessagesToolResultParam] = None
|
||||
## PROMPT CACHING CHECK ##
|
||||
cache_control = message.get("cache_control", None)
|
||||
|
@ -1178,14 +1207,14 @@ def convert_to_anthropic_tool_result(
|
|||
# We can't determine from openai message format whether it's a successful or
|
||||
# error call result so default to the successful result template
|
||||
anthropic_tool_result = AnthropicMessagesToolResultParam(
|
||||
type="tool_result", tool_use_id=tool_call_id, content=content_str
|
||||
type="tool_result", tool_use_id=tool_call_id, content=anthropic_content
|
||||
)
|
||||
|
||||
if message["role"] == "function":
|
||||
function_message: ChatCompletionFunctionMessage = message
|
||||
tool_call_id = function_message.get("tool_call_id") or str(uuid.uuid4())
|
||||
anthropic_tool_result = AnthropicMessagesToolResultParam(
|
||||
type="tool_result", tool_use_id=tool_call_id, content=content_str
|
||||
type="tool_result", tool_use_id=tool_call_id, content=anthropic_content
|
||||
)
|
||||
|
||||
if anthropic_tool_result is None:
|
||||
|
|
|
@ -107,6 +107,10 @@ def _get_image_mime_type_from_url(url: str) -> Optional[str]:
|
|||
return "image/png"
|
||||
elif url.endswith(".webp"):
|
||||
return "image/webp"
|
||||
elif url.endswith(".mp4"):
|
||||
return "video/mp4"
|
||||
elif url.endswith(".pdf"):
|
||||
return "application/pdf"
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
@ -3383,6 +3383,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-001": {
|
||||
|
@ -3406,6 +3408,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash": {
|
||||
|
@ -3428,6 +3432,8 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-latest": {
|
||||
|
@ -3450,6 +3456,32 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-8b": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 4000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-8b-exp-0924": {
|
||||
|
@ -3472,6 +3504,8 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 4000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-exp-1114": {
|
||||
|
@ -3494,7 +3528,12 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing",
|
||||
"metadata": {
|
||||
"notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro."
|
||||
}
|
||||
},
|
||||
"gemini/gemini-1.5-flash-exp-0827": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -3516,6 +3555,8 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-8b-exp-0827": {
|
||||
|
@ -3537,6 +3578,9 @@
|
|||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 4000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-pro": {
|
||||
|
@ -3550,7 +3594,10 @@
|
|||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
"rpd": 30000,
|
||||
"tpm": 120000,
|
||||
"rpm": 360,
|
||||
"source": "https://ai.google.dev/gemini-api/docs/models/gemini"
|
||||
},
|
||||
"gemini/gemini-1.5-pro": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -3567,6 +3614,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-002": {
|
||||
|
@ -3585,6 +3634,8 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-001": {
|
||||
|
@ -3603,6 +3654,8 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-exp-0801": {
|
||||
|
@ -3620,6 +3673,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-exp-0827": {
|
||||
|
@ -3637,6 +3692,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-latest": {
|
||||
|
@ -3654,6 +3711,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-pro-vision": {
|
||||
|
@ -3668,6 +3727,9 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"rpd": 30000,
|
||||
"tpm": 120000,
|
||||
"rpm": 360,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini/gemini-gemma-2-27b-it": {
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
|
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(e,n,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(e){e.exports={style:{fontFamily:"'__Inter_86ef86', '__Inter_Fallback_86ef86'",fontStyle:"normal"},className:"__className_86ef86"}}},function(e){e.O(0,[971,69,744],function(){return e(e.s=87421)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{61994:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return g}});var l=t(57437),n=t(2265),a=t(47907),i=t(2179),r=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),f=t(777),p=t(37963),j=t(60620),_=t(13565);function g(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("invitation_id"),[g,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,v]=(0,n.useState)(null),[y,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,f.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,p.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),v(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(r.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",g,"token:",I,"formValues:",e),g&&I&&(e.user_email=S,N&&t&&(0,f.m_)(g,t,N,e.password).then(e=>{var s;let t="/ui/";t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id),document.cookie="token="+I,console.log("redirecting to:",t),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(_.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,902,684,777,971,69,744],function(){return e(e.s=61994)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{20723:function(e,s,t){Promise.resolve().then(t.bind(t,667))},667:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return g}});var l=t(57437),n=t(2265),a=t(47907),i=t(2179),r=t(18190),o=t(13810),c=t(10384),u=t(46453),d=t(71801),m=t(52273),h=t(42440),x=t(30953),f=t(777),p=t(37963),j=t(60620),_=t(13565);function g(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("invitation_id"),[g,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,v]=(0,n.useState)(null),[y,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,f.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,p.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),v(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(r.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",g,"token:",I,"formValues:",e),g&&I&&(e.user_email=S,N&&t&&(0,f.m_)(g,t,N,e.password).then(e=>{var s;let t="/ui/";t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id),document.cookie="token="+I,console.log("redirecting to:",t),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(_.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,902,684,777,971,69,744],function(){return e(e.s=20723)}),_N_E=e.O()}]);
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/00256a1984d35914.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/ea3759ed931c00b2.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
|
@ -1,4 +1,4 @@
|
|||
@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/55c55f0601d81cf3-s.woff2) format("woff2");unicode-range:u+0460-052f,u+1c80-1c88,u+20b4,u+2de0-2dff,u+a640-a69f,u+fe2e-fe2f}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/26a46d62cd723877-s.woff2) format("woff2");unicode-range:u+0301,u+0400-045f,u+0490-0491,u+04b0-04b1,u+2116}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/97e0cb1ae144a2a9-s.woff2) format("woff2");unicode-range:u+1f??}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/581909926a08bbc8-s.woff2) format("woff2");unicode-range:u+0370-0377,u+037a-037f,u+0384-038a,u+038c,u+038e-03a1,u+03a3-03ff}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/df0a9ae256c0569c-s.woff2) format("woff2");unicode-range:u+0102-0103,u+0110-0111,u+0128-0129,u+0168-0169,u+01a0-01a1,u+01af-01b0,u+0300-0301,u+0303-0304,u+0308-0309,u+0323,u+0329,u+1ea0-1ef9,u+20ab}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/6d93bde91c0c2823-s.woff2) format("woff2");unicode-range:u+0100-02af,u+0304,u+0308,u+0329,u+1e00-1e9f,u+1ef2-1eff,u+2020,u+20a0-20ab,u+20ad-20c0,u+2113,u+2c60-2c7f,u+a720-a7ff}@font-face{font-family:__Inter_86ef86;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2) format("woff2");unicode-range:u+00??,u+0131,u+0152-0153,u+02bb-02bc,u+02c6,u+02da,u+02dc,u+0304,u+0308,u+0329,u+2000-206f,u+2074,u+20ac,u+2122,u+2191,u+2193,u+2212,u+2215,u+feff,u+fffd}@font-face{font-family:__Inter_Fallback_86ef86;src:local("Arial");ascent-override:90.20%;descent-override:22.48%;line-gap-override:0.00%;size-adjust:107.40%}.__className_86ef86{font-family:__Inter_86ef86,__Inter_Fallback_86ef86;font-style:normal}
|
||||
@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/ec159349637c90ad-s.woff2) format("woff2");unicode-range:u+0460-052f,u+1c80-1c88,u+20b4,u+2de0-2dff,u+a640-a69f,u+fe2e-fe2f}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/513657b02c5c193f-s.woff2) format("woff2");unicode-range:u+0301,u+0400-045f,u+0490-0491,u+04b0-04b1,u+2116}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/fd4db3eb5472fc27-s.woff2) format("woff2");unicode-range:u+1f??}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/51ed15f9841b9f9d-s.woff2) format("woff2");unicode-range:u+0370-0377,u+037a-037f,u+0384-038a,u+038c,u+038e-03a1,u+03a3-03ff}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/05a31a2ca4975f99-s.woff2) format("woff2");unicode-range:u+0102-0103,u+0110-0111,u+0128-0129,u+0168-0169,u+01a0-01a1,u+01af-01b0,u+0300-0301,u+0303-0304,u+0308-0309,u+0323,u+0329,u+1ea0-1ef9,u+20ab}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/d6b16ce4a6175f26-s.woff2) format("woff2");unicode-range:u+0100-02af,u+0304,u+0308,u+0329,u+1e00-1e9f,u+1ef2-1eff,u+2020,u+20a0-20ab,u+20ad-20c0,u+2113,u+2c60-2c7f,u+a720-a7ff}@font-face{font-family:__Inter_12bbc4;font-style:normal;font-weight:100 900;font-display:swap;src:url(/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2) format("woff2");unicode-range:u+00??,u+0131,u+0152-0153,u+02bb-02bc,u+02c6,u+02da,u+02dc,u+0304,u+0308,u+0329,u+2000-206f,u+2074,u+20ac,u+2122,u+2191,u+2193,u+2212,u+2215,u+feff,u+fffd}@font-face{font-family:__Inter_Fallback_12bbc4;src:local("Arial");ascent-override:90.20%;descent-override:22.48%;line-gap-override:0.00%;size-adjust:107.40%}.__className_12bbc4{font-family:__Inter_12bbc4,__Inter_Fallback_12bbc4;font-style:normal}
|
||||
|
||||
/*
|
||||
! tailwindcss v3.4.1 | MIT License | https://tailwindcss.com
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/00256a1984d35914.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[82989,[\"665\",\"static/chunks/3014691f-b24e8254c7593934.js\",\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"902\",\"static/chunks/902-58bf23027703b2e8.js\",\"131\",\"static/chunks/131-3d2257b0ff5aadb2.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"626\",\"static/chunks/626-4e8df4039ecf4386.js\",\"777\",\"static/chunks/777-9d9df0b75010dbf9.js\",\"931\",\"static/chunks/app/page-68b04cd7217f38ce.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/00256a1984d35914.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"WeMIGILYzOYN-R9DXbvCD\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_86ef86\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-b9c71b6f9761a436.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-b9c71b6f9761a436.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/ea3759ed931c00b2.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[82989,[\"665\",\"static/chunks/3014691f-b24e8254c7593934.js\",\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"902\",\"static/chunks/902-292bb6a83427dbc7.js\",\"131\",\"static/chunks/131-4ee1d633e8928742.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"626\",\"static/chunks/626-0c564a21577c9c53.js\",\"777\",\"static/chunks/777-9d9df0b75010dbf9.js\",\"931\",\"static/chunks/app/page-a952da77e0730c7c.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/ea3759ed931c00b2.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"pDx3dChtj-paUmJExuV6u\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[82989,["665","static/chunks/3014691f-b24e8254c7593934.js","936","static/chunks/2f6dbc85-cac2949a76539886.js","902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","684","static/chunks/684-16b194c83a169f6d.js","626","static/chunks/626-4e8df4039ecf4386.js","777","static/chunks/777-9d9df0b75010dbf9.js","931","static/chunks/app/page-68b04cd7217f38ce.js"],""]
|
||||
3:I[82989,["665","static/chunks/3014691f-b24e8254c7593934.js","936","static/chunks/2f6dbc85-cac2949a76539886.js","902","static/chunks/902-292bb6a83427dbc7.js","131","static/chunks/131-4ee1d633e8928742.js","684","static/chunks/684-16b194c83a169f6d.js","626","static/chunks/626-0c564a21577c9c53.js","777","static/chunks/777-9d9df0b75010dbf9.js","931","static/chunks/app/page-a952da77e0730c7c.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["WeMIGILYzOYN-R9DXbvCD",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["pDx3dChtj-paUmJExuV6u",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[87494,["902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","777","static/chunks/777-9d9df0b75010dbf9.js","418","static/chunks/app/model_hub/page-104cada6b5e5b14c.js"],""]
|
||||
3:I[87494,["902","static/chunks/902-292bb6a83427dbc7.js","131","static/chunks/131-4ee1d633e8928742.js","777","static/chunks/777-9d9df0b75010dbf9.js","418","static/chunks/app/model_hub/page-748a83a8e772a56b.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["WeMIGILYzOYN-R9DXbvCD",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["pDx3dChtj-paUmJExuV6u",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","902","static/chunks/902-58bf23027703b2e8.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-9d9df0b75010dbf9.js","461","static/chunks/app/onboarding/page-bad6cfbe58b9d19c.js"],""]
|
||||
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","902","static/chunks/902-292bb6a83427dbc7.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-9d9df0b75010dbf9.js","461","static/chunks/app/onboarding/page-884a15d08f8be397.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["WeMIGILYzOYN-R9DXbvCD",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["pDx3dChtj-paUmJExuV6u",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/ea3759ed931c00b2.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -11,7 +11,27 @@ model_list:
|
|||
model: vertex_ai/claude-3-5-sonnet-v2
|
||||
vertex_ai_project: "adroit-crow-413218"
|
||||
vertex_ai_location: "us-east5"
|
||||
|
||||
- model_name: openai-gpt-4o-realtime-audio
|
||||
litellm_params:
|
||||
model: openai/gpt-4o-realtime-preview-2024-10-01
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: openai/*
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: openai/*
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model_info:
|
||||
access_groups: ["public-openai-models"]
|
||||
- model_name: openai/gpt-4o
|
||||
litellm_params:
|
||||
model: openai/gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model_info:
|
||||
access_groups: ["private-openai-models"]
|
||||
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing-v2
|
||||
#redis_url: "os.environ/REDIS_URL"
|
||||
|
|
|
@ -2183,3 +2183,11 @@ PassThroughEndpointLoggingResultValues = Union[
|
|||
class PassThroughEndpointLoggingTypedDict(TypedDict):
|
||||
result: Optional[PassThroughEndpointLoggingResultValues]
|
||||
kwargs: dict
|
||||
|
||||
|
||||
LiteLLM_ManagementEndpoint_MetadataFields = [
|
||||
"model_rpm_limit",
|
||||
"model_tpm_limit",
|
||||
"guardrails",
|
||||
"tags",
|
||||
]
|
||||
|
|
|
@ -60,6 +60,7 @@ def common_checks( # noqa: PLR0915
|
|||
global_proxy_spend: Optional[float],
|
||||
general_settings: dict,
|
||||
route: str,
|
||||
llm_router: Optional[litellm.Router],
|
||||
) -> bool:
|
||||
"""
|
||||
Common checks across jwt + key-based auth.
|
||||
|
@ -97,7 +98,12 @@ def common_checks( # noqa: PLR0915
|
|||
# this means the team has access to all models on the proxy
|
||||
pass
|
||||
# check if the team model is an access_group
|
||||
elif model_in_access_group(_model, team_object.models) is True:
|
||||
elif (
|
||||
model_in_access_group(
|
||||
model=_model, team_models=team_object.models, llm_router=llm_router
|
||||
)
|
||||
is True
|
||||
):
|
||||
pass
|
||||
elif _model and "*" in _model:
|
||||
pass
|
||||
|
@ -373,36 +379,33 @@ async def get_end_user_object(
|
|||
return None
|
||||
|
||||
|
||||
def model_in_access_group(model: str, team_models: Optional[List[str]]) -> bool:
|
||||
def model_in_access_group(
|
||||
model: str, team_models: Optional[List[str]], llm_router: Optional[litellm.Router]
|
||||
) -> bool:
|
||||
from collections import defaultdict
|
||||
|
||||
from litellm.proxy.proxy_server import llm_router
|
||||
|
||||
if team_models is None:
|
||||
return True
|
||||
if model in team_models:
|
||||
return True
|
||||
|
||||
access_groups = defaultdict(list)
|
||||
access_groups: dict[str, list[str]] = defaultdict(list)
|
||||
if llm_router:
|
||||
access_groups = llm_router.get_model_access_groups()
|
||||
access_groups = llm_router.get_model_access_groups(model_name=model)
|
||||
|
||||
models_in_current_access_groups = []
|
||||
if len(access_groups) > 0: # check if token contains any model access groups
|
||||
for idx, m in enumerate(
|
||||
team_models
|
||||
): # loop token models, if any of them are an access group add the access group
|
||||
if m in access_groups:
|
||||
# if it is an access group we need to remove it from valid_token.models
|
||||
models_in_group = access_groups[m]
|
||||
models_in_current_access_groups.extend(models_in_group)
|
||||
return True
|
||||
|
||||
# Filter out models that are access_groups
|
||||
filtered_models = [m for m in team_models if m not in access_groups]
|
||||
filtered_models += models_in_current_access_groups
|
||||
|
||||
if model in filtered_models:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
@ -523,10 +526,6 @@ async def _cache_management_object(
|
|||
proxy_logging_obj: Optional[ProxyLogging],
|
||||
):
|
||||
await user_api_key_cache.async_set_cache(key=key, value=value)
|
||||
if proxy_logging_obj is not None:
|
||||
await proxy_logging_obj.internal_usage_cache.dual_cache.async_set_cache(
|
||||
key=key, value=value
|
||||
)
|
||||
|
||||
|
||||
async def _cache_team_object(
|
||||
|
@ -878,7 +877,10 @@ async def get_org_object(
|
|||
|
||||
|
||||
async def can_key_call_model(
|
||||
model: str, llm_model_list: Optional[list], valid_token: UserAPIKeyAuth
|
||||
model: str,
|
||||
llm_model_list: Optional[list],
|
||||
valid_token: UserAPIKeyAuth,
|
||||
llm_router: Optional[litellm.Router],
|
||||
) -> Literal[True]:
|
||||
"""
|
||||
Checks if token can call a given model
|
||||
|
@ -898,35 +900,29 @@ async def can_key_call_model(
|
|||
)
|
||||
from collections import defaultdict
|
||||
|
||||
from litellm.proxy.proxy_server import llm_router
|
||||
|
||||
access_groups = defaultdict(list)
|
||||
if llm_router:
|
||||
access_groups = llm_router.get_model_access_groups()
|
||||
access_groups = llm_router.get_model_access_groups(model_name=model)
|
||||
|
||||
models_in_current_access_groups = []
|
||||
if len(access_groups) > 0: # check if token contains any model access groups
|
||||
if (
|
||||
len(access_groups) > 0 and llm_router is not None
|
||||
): # check if token contains any model access groups
|
||||
for idx, m in enumerate(
|
||||
valid_token.models
|
||||
): # loop token models, if any of them are an access group add the access group
|
||||
if m in access_groups:
|
||||
# if it is an access group we need to remove it from valid_token.models
|
||||
models_in_group = access_groups[m]
|
||||
models_in_current_access_groups.extend(models_in_group)
|
||||
return True
|
||||
|
||||
# Filter out models that are access_groups
|
||||
filtered_models = [m for m in valid_token.models if m not in access_groups]
|
||||
|
||||
filtered_models += models_in_current_access_groups
|
||||
verbose_proxy_logger.debug(f"model: {model}; allowed_models: {filtered_models}")
|
||||
|
||||
all_model_access: bool = False
|
||||
|
||||
if (
|
||||
len(filtered_models) == 0
|
||||
or "*" in filtered_models
|
||||
or "openai/*" in filtered_models
|
||||
):
|
||||
len(filtered_models) == 0 and len(valid_token.models) == 0
|
||||
) or "*" in filtered_models:
|
||||
all_model_access = True
|
||||
|
||||
if model is not None and model not in filtered_models and all_model_access is False:
|
||||
|
|
|
@ -28,6 +28,8 @@ from fastapi import (
|
|||
Request,
|
||||
Response,
|
||||
UploadFile,
|
||||
WebSocket,
|
||||
WebSocketDisconnect,
|
||||
status,
|
||||
)
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
@ -195,6 +197,52 @@ def _is_allowed_route(
|
|||
)
|
||||
|
||||
|
||||
async def user_api_key_auth_websocket(websocket: WebSocket):
|
||||
# Accept the WebSocket connection
|
||||
|
||||
request = Request(scope={"type": "http"})
|
||||
request._url = websocket.url
|
||||
|
||||
query_params = websocket.query_params
|
||||
|
||||
model = query_params.get("model")
|
||||
|
||||
async def return_body():
|
||||
return_string = f'{{"model": "{model}"}}'
|
||||
# return string as bytes
|
||||
return return_string.encode()
|
||||
|
||||
request.body = return_body # type: ignore
|
||||
|
||||
# Extract the Authorization header
|
||||
authorization = websocket.headers.get("authorization")
|
||||
|
||||
# If no Authorization header, try the api-key header
|
||||
if not authorization:
|
||||
api_key = websocket.headers.get("api-key")
|
||||
if not api_key:
|
||||
await websocket.close(code=status.WS_1008_POLICY_VIOLATION)
|
||||
raise HTTPException(status_code=403, detail="No API key provided")
|
||||
else:
|
||||
# Extract the API key from the Bearer token
|
||||
if not authorization.startswith("Bearer "):
|
||||
await websocket.close(code=status.WS_1008_POLICY_VIOLATION)
|
||||
raise HTTPException(
|
||||
status_code=403, detail="Invalid Authorization header format"
|
||||
)
|
||||
|
||||
api_key = authorization[len("Bearer ") :].strip()
|
||||
|
||||
# Call user_api_key_auth with the extracted API key
|
||||
# Note: You'll need to modify this to work with WebSocket context if needed
|
||||
try:
|
||||
return await user_api_key_auth(request=request, api_key=f"Bearer {api_key}")
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.exception(e)
|
||||
await websocket.close(code=status.WS_1008_POLICY_VIOLATION)
|
||||
raise HTTPException(status_code=403, detail=str(e))
|
||||
|
||||
|
||||
async def user_api_key_auth( # noqa: PLR0915
|
||||
request: Request,
|
||||
api_key: str = fastapi.Security(api_key_header),
|
||||
|
@ -211,6 +259,7 @@ async def user_api_key_auth( # noqa: PLR0915
|
|||
jwt_handler,
|
||||
litellm_proxy_admin_name,
|
||||
llm_model_list,
|
||||
llm_router,
|
||||
master_key,
|
||||
open_telemetry_logger,
|
||||
prisma_client,
|
||||
|
@ -494,6 +543,7 @@ async def user_api_key_auth( # noqa: PLR0915
|
|||
general_settings=general_settings,
|
||||
global_proxy_spend=global_proxy_spend,
|
||||
route=route,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
|
||||
# return UserAPIKeyAuth object
|
||||
|
@ -857,6 +907,7 @@ async def user_api_key_auth( # noqa: PLR0915
|
|||
model=model,
|
||||
llm_model_list=llm_model_list,
|
||||
valid_token=valid_token,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
|
||||
if fallback_models is not None:
|
||||
|
@ -865,6 +916,7 @@ async def user_api_key_auth( # noqa: PLR0915
|
|||
model=m,
|
||||
llm_model_list=llm_model_list,
|
||||
valid_token=valid_token,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
|
||||
# Check 2. If user_id for this token is in budget - done in common_checks()
|
||||
|
@ -1125,6 +1177,7 @@ async def user_api_key_auth( # noqa: PLR0915
|
|||
general_settings=general_settings,
|
||||
global_proxy_spend=global_proxy_spend,
|
||||
route=route,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
# Token passed all checks
|
||||
if valid_token is None:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import ast
|
||||
import json
|
||||
from typing import List, Optional
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from fastapi import Request, UploadFile, status
|
||||
|
||||
|
@ -8,31 +8,43 @@ from litellm._logging import verbose_proxy_logger
|
|||
from litellm.types.router import Deployment
|
||||
|
||||
|
||||
async def _read_request_body(request: Optional[Request]) -> dict:
|
||||
async def _read_request_body(request: Optional[Request]) -> Dict:
|
||||
"""
|
||||
Asynchronous function to read the request body and parse it as JSON or literal data.
|
||||
Safely read the request body and parse it as JSON.
|
||||
|
||||
Parameters:
|
||||
- request: The request object to read the body from
|
||||
|
||||
Returns:
|
||||
- dict: Parsed request data as a dictionary
|
||||
- dict: Parsed request data as a dictionary or an empty dictionary if parsing fails
|
||||
"""
|
||||
try:
|
||||
request_data: dict = {}
|
||||
if request is None:
|
||||
return request_data
|
||||
return {}
|
||||
|
||||
# Read the request body
|
||||
body = await request.body()
|
||||
|
||||
if body == b"" or body is None:
|
||||
return request_data
|
||||
# Return empty dict if body is empty or None
|
||||
if not body:
|
||||
return {}
|
||||
|
||||
# Decode the body to a string
|
||||
body_str = body.decode()
|
||||
try:
|
||||
request_data = ast.literal_eval(body_str)
|
||||
except Exception:
|
||||
request_data = json.loads(body_str)
|
||||
return request_data
|
||||
except Exception:
|
||||
|
||||
# Attempt JSON parsing (safe for untrusted input)
|
||||
return json.loads(body_str)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
# Log detailed information for debugging
|
||||
verbose_proxy_logger.exception("Invalid JSON payload received.")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
# Catch unexpected errors to avoid crashes
|
||||
verbose_proxy_logger.exception(
|
||||
"Unexpected error reading request body - {}".format(e)
|
||||
)
|
||||
return {}
|
||||
|
||||
|
||||
|
|
|
@ -214,10 +214,10 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
|
|||
prepared_request.url,
|
||||
prepared_request.headers,
|
||||
)
|
||||
_json_data = json.dumps(request_data) # type: ignore
|
||||
|
||||
response = await self.async_handler.post(
|
||||
url=prepared_request.url,
|
||||
json=request_data, # type: ignore
|
||||
data=prepared_request.body, # type: ignore
|
||||
headers=prepared_request.headers, # type: ignore
|
||||
)
|
||||
verbose_proxy_logger.debug("Bedrock AI response: %s", response.text)
|
||||
|
|
87
litellm/proxy/hooks/proxy_failure_handler.py
Normal file
87
litellm/proxy/hooks/proxy_failure_handler.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
"""
|
||||
Runs when LLM Exceptions occur on LiteLLM Proxy
|
||||
"""
|
||||
|
||||
import copy
|
||||
import json
|
||||
import uuid
|
||||
|
||||
import litellm
|
||||
from litellm.proxy._types import LiteLLM_ErrorLogs
|
||||
|
||||
|
||||
async def _PROXY_failure_handler(
|
||||
kwargs, # kwargs to completion
|
||||
completion_response: litellm.ModelResponse, # response from completion
|
||||
start_time=None,
|
||||
end_time=None, # start/end time for completion
|
||||
):
|
||||
"""
|
||||
Async Failure Handler - runs when LLM Exceptions occur on LiteLLM Proxy.
|
||||
This function logs the errors to the Prisma DB
|
||||
|
||||
Can be disabled by setting the following on proxy_config.yaml:
|
||||
```yaml
|
||||
general_settings:
|
||||
disable_error_logs: True
|
||||
```
|
||||
|
||||
"""
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy.proxy_server import general_settings, prisma_client
|
||||
|
||||
if general_settings.get("disable_error_logs") is True:
|
||||
return
|
||||
|
||||
if prisma_client is not None:
|
||||
verbose_proxy_logger.debug(
|
||||
"inside _PROXY_failure_handler kwargs=", extra=kwargs
|
||||
)
|
||||
|
||||
_exception = kwargs.get("exception")
|
||||
_exception_type = _exception.__class__.__name__
|
||||
_model = kwargs.get("model", None)
|
||||
|
||||
_optional_params = kwargs.get("optional_params", {})
|
||||
_optional_params = copy.deepcopy(_optional_params)
|
||||
|
||||
for k, v in _optional_params.items():
|
||||
v = str(v)
|
||||
v = v[:100]
|
||||
|
||||
_status_code = "500"
|
||||
try:
|
||||
_status_code = str(_exception.status_code)
|
||||
except Exception:
|
||||
# Don't let this fail logging the exception to the dB
|
||||
pass
|
||||
|
||||
_litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
_metadata = _litellm_params.get("metadata", {}) or {}
|
||||
_model_id = _metadata.get("model_info", {}).get("id", "")
|
||||
_model_group = _metadata.get("model_group", "")
|
||||
api_base = litellm.get_api_base(model=_model, optional_params=_litellm_params)
|
||||
_exception_string = str(_exception)
|
||||
|
||||
error_log = LiteLLM_ErrorLogs(
|
||||
request_id=str(uuid.uuid4()),
|
||||
model_group=_model_group,
|
||||
model_id=_model_id,
|
||||
litellm_model_name=kwargs.get("model"),
|
||||
request_kwargs=_optional_params,
|
||||
api_base=api_base,
|
||||
exception_type=_exception_type,
|
||||
status_code=_status_code,
|
||||
exception_string=_exception_string,
|
||||
startTime=kwargs.get("start_time"),
|
||||
endTime=kwargs.get("end_time"),
|
||||
)
|
||||
|
||||
error_log_dict = error_log.model_dump()
|
||||
error_log_dict["request_kwargs"] = json.dumps(error_log_dict["request_kwargs"])
|
||||
|
||||
await prisma_client.db.litellm_errorlogs.create(
|
||||
data=error_log_dict # type: ignore
|
||||
)
|
||||
|
||||
pass
|
|
@ -288,12 +288,12 @@ class LiteLLMProxyRequestSetup:
|
|||
|
||||
## KEY-LEVEL SPEND LOGS / TAGS
|
||||
if "tags" in key_metadata and key_metadata["tags"] is not None:
|
||||
if "tags" in data[_metadata_variable_name] and isinstance(
|
||||
data[_metadata_variable_name]["tags"], list
|
||||
):
|
||||
data[_metadata_variable_name]["tags"].extend(key_metadata["tags"])
|
||||
else:
|
||||
data[_metadata_variable_name]["tags"] = key_metadata["tags"]
|
||||
data[_metadata_variable_name]["tags"] = (
|
||||
LiteLLMProxyRequestSetup._merge_tags(
|
||||
request_tags=data[_metadata_variable_name].get("tags"),
|
||||
tags_to_add=key_metadata["tags"],
|
||||
)
|
||||
)
|
||||
if "spend_logs_metadata" in key_metadata and isinstance(
|
||||
key_metadata["spend_logs_metadata"], dict
|
||||
):
|
||||
|
@ -319,6 +319,30 @@ class LiteLLMProxyRequestSetup:
|
|||
data["disable_fallbacks"] = key_metadata["disable_fallbacks"]
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def _merge_tags(request_tags: Optional[list], tags_to_add: Optional[list]) -> list:
|
||||
"""
|
||||
Helper function to merge two lists of tags, ensuring no duplicates.
|
||||
|
||||
Args:
|
||||
request_tags (Optional[list]): List of tags from the original request
|
||||
tags_to_add (Optional[list]): List of tags to add
|
||||
|
||||
Returns:
|
||||
list: Combined list of unique tags
|
||||
"""
|
||||
final_tags = []
|
||||
|
||||
if request_tags and isinstance(request_tags, list):
|
||||
final_tags.extend(request_tags)
|
||||
|
||||
if tags_to_add and isinstance(tags_to_add, list):
|
||||
for tag in tags_to_add:
|
||||
if tag not in final_tags:
|
||||
final_tags.append(tag)
|
||||
|
||||
return final_tags
|
||||
|
||||
|
||||
async def add_litellm_data_to_request( # noqa: PLR0915
|
||||
data: dict,
|
||||
|
@ -442,12 +466,10 @@ async def add_litellm_data_to_request( # noqa: PLR0915
|
|||
## TEAM-LEVEL SPEND LOGS/TAGS
|
||||
team_metadata = user_api_key_dict.team_metadata or {}
|
||||
if "tags" in team_metadata and team_metadata["tags"] is not None:
|
||||
if "tags" in data[_metadata_variable_name] and isinstance(
|
||||
data[_metadata_variable_name]["tags"], list
|
||||
):
|
||||
data[_metadata_variable_name]["tags"].extend(team_metadata["tags"])
|
||||
else:
|
||||
data[_metadata_variable_name]["tags"] = team_metadata["tags"]
|
||||
data[_metadata_variable_name]["tags"] = LiteLLMProxyRequestSetup._merge_tags(
|
||||
request_tags=data[_metadata_variable_name].get("tags"),
|
||||
tags_to_add=team_metadata["tags"],
|
||||
)
|
||||
if "spend_logs_metadata" in team_metadata and isinstance(
|
||||
team_metadata["spend_logs_metadata"], dict
|
||||
):
|
||||
|
|
|
@ -32,6 +32,7 @@ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
|||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||
duration_in_seconds,
|
||||
generate_key_helper_fn,
|
||||
prepare_metadata_fields,
|
||||
)
|
||||
from litellm.proxy.management_helpers.utils import (
|
||||
add_new_member,
|
||||
|
@ -42,7 +43,7 @@ from litellm.proxy.utils import handle_exception_on_proxy
|
|||
router = APIRouter()
|
||||
|
||||
|
||||
def _update_internal_user_params(data_json: dict, data: NewUserRequest) -> dict:
|
||||
def _update_internal_new_user_params(data_json: dict, data: NewUserRequest) -> dict:
|
||||
if "user_id" in data_json and data_json["user_id"] is None:
|
||||
data_json["user_id"] = str(uuid.uuid4())
|
||||
auto_create_key = data_json.pop("auto_create_key", True)
|
||||
|
@ -145,7 +146,7 @@ async def new_user(
|
|||
from litellm.proxy.proxy_server import general_settings, proxy_logging_obj
|
||||
|
||||
data_json = data.json() # type: ignore
|
||||
data_json = _update_internal_user_params(data_json, data)
|
||||
data_json = _update_internal_new_user_params(data_json, data)
|
||||
response = await generate_key_helper_fn(request_type="user", **data_json)
|
||||
|
||||
# Admin UI Logic
|
||||
|
@ -438,6 +439,52 @@ async def user_info( # noqa: PLR0915
|
|||
raise handle_exception_on_proxy(e)
|
||||
|
||||
|
||||
def _update_internal_user_params(data_json: dict, data: UpdateUserRequest) -> dict:
|
||||
non_default_values = {}
|
||||
for k, v in data_json.items():
|
||||
if (
|
||||
v is not None
|
||||
and v
|
||||
not in (
|
||||
[],
|
||||
{},
|
||||
0,
|
||||
)
|
||||
and k not in LiteLLM_ManagementEndpoint_MetadataFields
|
||||
): # models default to [], spend defaults to 0, we should not reset these values
|
||||
non_default_values[k] = v
|
||||
|
||||
is_internal_user = False
|
||||
if data.user_role == LitellmUserRoles.INTERNAL_USER:
|
||||
is_internal_user = True
|
||||
|
||||
if "budget_duration" in non_default_values:
|
||||
duration_s = duration_in_seconds(duration=non_default_values["budget_duration"])
|
||||
user_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
non_default_values["budget_reset_at"] = user_reset_at
|
||||
|
||||
if "max_budget" not in non_default_values:
|
||||
if (
|
||||
is_internal_user and litellm.max_internal_user_budget is not None
|
||||
): # applies internal user limits, if user role updated
|
||||
non_default_values["max_budget"] = litellm.max_internal_user_budget
|
||||
|
||||
if (
|
||||
"budget_duration" not in non_default_values
|
||||
): # applies internal user limits, if user role updated
|
||||
if is_internal_user and litellm.internal_user_budget_duration is not None:
|
||||
non_default_values["budget_duration"] = (
|
||||
litellm.internal_user_budget_duration
|
||||
)
|
||||
duration_s = duration_in_seconds(
|
||||
duration=non_default_values["budget_duration"]
|
||||
)
|
||||
user_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
non_default_values["budget_reset_at"] = user_reset_at
|
||||
|
||||
return non_default_values
|
||||
|
||||
|
||||
@router.post(
|
||||
"/user/update",
|
||||
tags=["Internal User management"],
|
||||
|
@ -459,7 +506,8 @@ async def user_update(
|
|||
"user_id": "test-litellm-user-4",
|
||||
"user_role": "proxy_admin_viewer"
|
||||
}'
|
||||
|
||||
```
|
||||
|
||||
Parameters:
|
||||
- user_id: Optional[str] - Specify a user id. If not set, a unique id will be generated.
|
||||
- user_email: Optional[str] - Specify a user email.
|
||||
|
@ -491,7 +539,7 @@ async def user_update(
|
|||
- duration: Optional[str] - [NOT IMPLEMENTED].
|
||||
- key_alias: Optional[str] - [NOT IMPLEMENTED].
|
||||
|
||||
```
|
||||
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
|
@ -502,46 +550,21 @@ async def user_update(
|
|||
raise Exception("Not connected to DB!")
|
||||
|
||||
# get non default values for key
|
||||
non_default_values = {}
|
||||
for k, v in data_json.items():
|
||||
if v is not None and v not in (
|
||||
[],
|
||||
{},
|
||||
0,
|
||||
): # models default to [], spend defaults to 0, we should not reset these values
|
||||
non_default_values[k] = v
|
||||
non_default_values = _update_internal_user_params(
|
||||
data_json=data_json, data=data
|
||||
)
|
||||
|
||||
is_internal_user = False
|
||||
if data.user_role == LitellmUserRoles.INTERNAL_USER:
|
||||
is_internal_user = True
|
||||
existing_user_row = await prisma_client.get_data(
|
||||
user_id=data.user_id, table_name="user", query_type="find_unique"
|
||||
)
|
||||
|
||||
if "budget_duration" in non_default_values:
|
||||
duration_s = duration_in_seconds(
|
||||
duration=non_default_values["budget_duration"]
|
||||
)
|
||||
user_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
non_default_values["budget_reset_at"] = user_reset_at
|
||||
existing_metadata = existing_user_row.metadata if existing_user_row else {}
|
||||
|
||||
if "max_budget" not in non_default_values:
|
||||
if (
|
||||
is_internal_user and litellm.max_internal_user_budget is not None
|
||||
): # applies internal user limits, if user role updated
|
||||
non_default_values["max_budget"] = litellm.max_internal_user_budget
|
||||
|
||||
if (
|
||||
"budget_duration" not in non_default_values
|
||||
): # applies internal user limits, if user role updated
|
||||
if is_internal_user and litellm.internal_user_budget_duration is not None:
|
||||
non_default_values["budget_duration"] = (
|
||||
litellm.internal_user_budget_duration
|
||||
)
|
||||
duration_s = duration_in_seconds(
|
||||
duration=non_default_values["budget_duration"]
|
||||
)
|
||||
user_reset_at = datetime.now(timezone.utc) + timedelta(
|
||||
seconds=duration_s
|
||||
)
|
||||
non_default_values["budget_reset_at"] = user_reset_at
|
||||
non_default_values = prepare_metadata_fields(
|
||||
data=data,
|
||||
non_default_values=non_default_values,
|
||||
existing_metadata=existing_metadata or {},
|
||||
)
|
||||
|
||||
## ADD USER, IF NEW ##
|
||||
verbose_proxy_logger.debug("/user/update: Received data = %s", data)
|
||||
|
|
|
@ -17,7 +17,7 @@ import secrets
|
|||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Optional, Tuple
|
||||
from typing import List, Optional, Tuple, cast
|
||||
|
||||
import fastapi
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, Query, Request, status
|
||||
|
@ -394,7 +394,8 @@ async def generate_key_fn( # noqa: PLR0915
|
|||
}
|
||||
)
|
||||
_budget_id = getattr(_budget, "budget_id", None)
|
||||
data_json = data.json() # type: ignore
|
||||
data_json = data.model_dump(exclude_unset=True, exclude_none=True) # type: ignore
|
||||
|
||||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||
if "max_budget" in data_json:
|
||||
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
||||
|
@ -420,6 +421,11 @@ async def generate_key_fn( # noqa: PLR0915
|
|||
|
||||
data_json.pop("tags")
|
||||
|
||||
await _enforce_unique_key_alias(
|
||||
key_alias=data_json.get("key_alias", None),
|
||||
prisma_client=prisma_client,
|
||||
)
|
||||
|
||||
response = await generate_key_helper_fn(
|
||||
request_type="key", **data_json, table_name="key"
|
||||
)
|
||||
|
@ -447,12 +453,52 @@ async def generate_key_fn( # noqa: PLR0915
|
|||
raise handle_exception_on_proxy(e)
|
||||
|
||||
|
||||
def prepare_metadata_fields(
|
||||
data: BaseModel, non_default_values: dict, existing_metadata: dict
|
||||
) -> dict:
|
||||
"""
|
||||
Check LiteLLM_ManagementEndpoint_MetadataFields (proxy/_types.py) for fields that are allowed to be updated
|
||||
"""
|
||||
|
||||
if "metadata" not in non_default_values: # allow user to set metadata to none
|
||||
non_default_values["metadata"] = existing_metadata.copy()
|
||||
|
||||
casted_metadata = cast(dict, non_default_values["metadata"])
|
||||
|
||||
data_json = data.model_dump(exclude_unset=True, exclude_none=True)
|
||||
|
||||
try:
|
||||
for k, v in data_json.items():
|
||||
if k == "model_tpm_limit" or k == "model_rpm_limit":
|
||||
if k not in casted_metadata or casted_metadata[k] is None:
|
||||
casted_metadata[k] = {}
|
||||
casted_metadata[k].update(v)
|
||||
|
||||
if k == "tags" or k == "guardrails":
|
||||
if k not in casted_metadata or casted_metadata[k] is None:
|
||||
casted_metadata[k] = []
|
||||
seen = set(casted_metadata[k])
|
||||
casted_metadata[k].extend(
|
||||
x for x in v if x not in seen and not seen.add(x) # type: ignore
|
||||
) # prevent duplicates from being added + maintain initial order
|
||||
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.exception(
|
||||
"litellm.proxy.proxy_server.prepare_metadata_fields(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
|
||||
non_default_values["metadata"] = casted_metadata
|
||||
return non_default_values
|
||||
|
||||
|
||||
def prepare_key_update_data(
|
||||
data: Union[UpdateKeyRequest, RegenerateKeyRequest], existing_key_row
|
||||
):
|
||||
data_json: dict = data.model_dump(exclude_unset=True)
|
||||
data_json.pop("key", None)
|
||||
_metadata_fields = ["model_rpm_limit", "model_tpm_limit", "guardrails"]
|
||||
_metadata_fields = ["model_rpm_limit", "model_tpm_limit", "guardrails", "tags"]
|
||||
non_default_values = {}
|
||||
for k, v in data_json.items():
|
||||
if k in _metadata_fields:
|
||||
|
@ -476,24 +522,13 @@ def prepare_key_update_data(
|
|||
duration_s = duration_in_seconds(duration=budget_duration)
|
||||
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||
non_default_values["budget_reset_at"] = key_reset_at
|
||||
non_default_values["budget_duration"] = budget_duration
|
||||
|
||||
_metadata = existing_key_row.metadata or {}
|
||||
|
||||
if data.model_tpm_limit:
|
||||
if "model_tpm_limit" not in _metadata:
|
||||
_metadata["model_tpm_limit"] = {}
|
||||
_metadata["model_tpm_limit"].update(data.model_tpm_limit)
|
||||
non_default_values["metadata"] = _metadata
|
||||
|
||||
if data.model_rpm_limit:
|
||||
if "model_rpm_limit" not in _metadata:
|
||||
_metadata["model_rpm_limit"] = {}
|
||||
_metadata["model_rpm_limit"].update(data.model_rpm_limit)
|
||||
non_default_values["metadata"] = _metadata
|
||||
|
||||
if data.guardrails:
|
||||
_metadata["guardrails"] = data.guardrails
|
||||
non_default_values["metadata"] = _metadata
|
||||
non_default_values = prepare_metadata_fields(
|
||||
data=data, non_default_values=non_default_values, existing_metadata=_metadata
|
||||
)
|
||||
|
||||
return non_default_values
|
||||
|
||||
|
@ -585,6 +620,12 @@ async def update_key_fn(
|
|||
data=data, existing_key_row=existing_key_row
|
||||
)
|
||||
|
||||
await _enforce_unique_key_alias(
|
||||
key_alias=non_default_values.get("key_alias", None),
|
||||
prisma_client=prisma_client,
|
||||
existing_key_token=existing_key_row.token,
|
||||
)
|
||||
|
||||
response = await prisma_client.update_data(
|
||||
token=key, data={**non_default_values, "token": key}
|
||||
)
|
||||
|
@ -912,11 +953,11 @@ async def generate_key_helper_fn( # noqa: PLR0915
|
|||
request_type: Literal[
|
||||
"user", "key"
|
||||
], # identifies if this request is from /user/new or /key/generate
|
||||
duration: Optional[str],
|
||||
models: list,
|
||||
aliases: dict,
|
||||
config: dict,
|
||||
spend: float,
|
||||
duration: Optional[str] = None,
|
||||
models: list = [],
|
||||
aliases: dict = {},
|
||||
config: dict = {},
|
||||
spend: float = 0.0,
|
||||
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
|
||||
key_budget_duration: Optional[str] = None,
|
||||
budget_id: Optional[float] = None, # budget id <-> LiteLLM_BudgetTable
|
||||
|
@ -945,8 +986,8 @@ async def generate_key_helper_fn( # noqa: PLR0915
|
|||
allowed_cache_controls: Optional[list] = [],
|
||||
permissions: Optional[dict] = {},
|
||||
model_max_budget: Optional[dict] = {},
|
||||
model_rpm_limit: Optional[dict] = {},
|
||||
model_tpm_limit: Optional[dict] = {},
|
||||
model_rpm_limit: Optional[dict] = None,
|
||||
model_tpm_limit: Optional[dict] = None,
|
||||
guardrails: Optional[list] = None,
|
||||
teams: Optional[list] = None,
|
||||
organization_id: Optional[str] = None,
|
||||
|
@ -1883,3 +1924,38 @@ async def test_key_logging(
|
|||
status="healthy",
|
||||
details=f"No logger exceptions triggered, system is healthy. Manually check if logs were sent to {logging_callbacks} ",
|
||||
)
|
||||
|
||||
|
||||
async def _enforce_unique_key_alias(
|
||||
key_alias: Optional[str],
|
||||
prisma_client: Any,
|
||||
existing_key_token: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Helper to enforce unique key aliases across all keys.
|
||||
|
||||
Args:
|
||||
key_alias (Optional[str]): The key alias to check
|
||||
prisma_client (Any): Prisma client instance
|
||||
existing_key_token (Optional[str]): ID of existing key being updated, to exclude from uniqueness check
|
||||
(The Admin UI passes key_alias, in all Edit key requests. So we need to be sure that if we find a key with the same alias, it's not the same key we're updating)
|
||||
|
||||
Raises:
|
||||
ProxyException: If key alias already exists on a different key
|
||||
"""
|
||||
if key_alias is not None and prisma_client is not None:
|
||||
where_clause: dict[str, Any] = {"key_alias": key_alias}
|
||||
if existing_key_token:
|
||||
# Exclude the current key from the uniqueness check
|
||||
where_clause["NOT"] = {"token": existing_key_token}
|
||||
|
||||
existing_key = await prisma_client.db.litellm_verificationtoken.find_first(
|
||||
where=where_clause
|
||||
)
|
||||
if existing_key is not None:
|
||||
raise ProxyException(
|
||||
message=f"Key with alias '{key_alias}' already exists. Unique key aliases across all keys are required.",
|
||||
type=ProxyErrorTypes.bad_request_error,
|
||||
param="key_alias",
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
|
|
@ -1367,6 +1367,7 @@ async def list_team(
|
|||
""".format(
|
||||
team.team_id, team.model_dump(), str(e)
|
||||
)
|
||||
raise HTTPException(status_code=400, detail={"error": team_exception})
|
||||
verbose_proxy_logger.exception(team_exception)
|
||||
continue
|
||||
|
||||
return returned_responses
|
||||
|
|
|
@ -2,4 +2,4 @@ include:
|
|||
- model_config.yaml
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["prometheus"]
|
||||
callbacks: ["datadog"]
|
||||
|
|
|
@ -134,7 +134,10 @@ from litellm.proxy.auth.model_checks import (
|
|||
get_key_models,
|
||||
get_team_models,
|
||||
)
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.auth.user_api_key_auth import (
|
||||
user_api_key_auth,
|
||||
user_api_key_auth_websocket,
|
||||
)
|
||||
|
||||
## Import All Misc routes here ##
|
||||
from litellm.proxy.caching_routes import router as caching_router
|
||||
|
@ -173,6 +176,7 @@ from litellm.proxy.health_endpoints._health_endpoints import router as health_ro
|
|||
from litellm.proxy.hooks.prompt_injection_detection import (
|
||||
_OPTIONAL_PromptInjectionDetection,
|
||||
)
|
||||
from litellm.proxy.hooks.proxy_failure_handler import _PROXY_failure_handler
|
||||
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
|
||||
from litellm.proxy.management_endpoints.customer_endpoints import (
|
||||
router as customer_router,
|
||||
|
@ -526,14 +530,6 @@ db_writer_client: Optional[HTTPHandler] = None
|
|||
### logger ###
|
||||
|
||||
|
||||
def _get_pydantic_json_dict(pydantic_obj: BaseModel) -> dict:
|
||||
try:
|
||||
return pydantic_obj.model_dump() # type: ignore
|
||||
except Exception:
|
||||
# if using pydantic v1
|
||||
return pydantic_obj.dict()
|
||||
|
||||
|
||||
def get_custom_headers(
|
||||
*,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
|
@ -687,68 +683,6 @@ def cost_tracking():
|
|||
litellm._async_success_callback.append(_PROXY_track_cost_callback) # type: ignore
|
||||
|
||||
|
||||
async def _PROXY_failure_handler(
|
||||
kwargs, # kwargs to completion
|
||||
completion_response: litellm.ModelResponse, # response from completion
|
||||
start_time=None,
|
||||
end_time=None, # start/end time for completion
|
||||
):
|
||||
global prisma_client
|
||||
if prisma_client is not None:
|
||||
verbose_proxy_logger.debug(
|
||||
"inside _PROXY_failure_handler kwargs=", extra=kwargs
|
||||
)
|
||||
|
||||
_exception = kwargs.get("exception")
|
||||
_exception_type = _exception.__class__.__name__
|
||||
_model = kwargs.get("model", None)
|
||||
|
||||
_optional_params = kwargs.get("optional_params", {})
|
||||
_optional_params = copy.deepcopy(_optional_params)
|
||||
|
||||
for k, v in _optional_params.items():
|
||||
v = str(v)
|
||||
v = v[:100]
|
||||
|
||||
_status_code = "500"
|
||||
try:
|
||||
_status_code = str(_exception.status_code)
|
||||
except Exception:
|
||||
# Don't let this fail logging the exception to the dB
|
||||
pass
|
||||
|
||||
_litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
_metadata = _litellm_params.get("metadata", {}) or {}
|
||||
_model_id = _metadata.get("model_info", {}).get("id", "")
|
||||
_model_group = _metadata.get("model_group", "")
|
||||
api_base = litellm.get_api_base(model=_model, optional_params=_litellm_params)
|
||||
_exception_string = str(_exception)
|
||||
|
||||
error_log = LiteLLM_ErrorLogs(
|
||||
request_id=str(uuid.uuid4()),
|
||||
model_group=_model_group,
|
||||
model_id=_model_id,
|
||||
litellm_model_name=kwargs.get("model"),
|
||||
request_kwargs=_optional_params,
|
||||
api_base=api_base,
|
||||
exception_type=_exception_type,
|
||||
status_code=_status_code,
|
||||
exception_string=_exception_string,
|
||||
startTime=kwargs.get("start_time"),
|
||||
endTime=kwargs.get("end_time"),
|
||||
)
|
||||
|
||||
# helper function to convert to dict on pydantic v2 & v1
|
||||
error_log_dict = _get_pydantic_json_dict(error_log)
|
||||
error_log_dict["request_kwargs"] = json.dumps(error_log_dict["request_kwargs"])
|
||||
|
||||
await prisma_client.db.litellm_errorlogs.create(
|
||||
data=error_log_dict # type: ignore
|
||||
)
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@log_db_metrics
|
||||
async def _PROXY_track_cost_callback(
|
||||
kwargs, # kwargs to completion
|
||||
|
@ -4394,7 +4328,11 @@ from litellm import _arealtime
|
|||
|
||||
|
||||
@app.websocket("/v1/realtime")
|
||||
async def websocket_endpoint(websocket: WebSocket, model: str):
|
||||
async def websocket_endpoint(
|
||||
websocket: WebSocket,
|
||||
model: str,
|
||||
user_api_key_dict=Depends(user_api_key_auth_websocket),
|
||||
):
|
||||
import websockets
|
||||
|
||||
await websocket.accept()
|
||||
|
|
|
@ -86,7 +86,6 @@ async def route_request(
|
|||
else:
|
||||
models = [model.strip() for model in data.pop("model").split(",")]
|
||||
return llm_router.abatch_completion(models=models, **data)
|
||||
|
||||
elif llm_router is not None:
|
||||
if (
|
||||
data["model"] in router_model_names
|
||||
|
@ -113,6 +112,9 @@ async def route_request(
|
|||
or len(llm_router.pattern_router.patterns) > 0
|
||||
):
|
||||
return getattr(llm_router, f"{route_type}")(**data)
|
||||
elif route_type == "amoderation":
|
||||
# moderation endpoint does not require `model` parameter
|
||||
return getattr(llm_router, f"{route_type}")(**data)
|
||||
|
||||
elif user_model is not None:
|
||||
return getattr(litellm, f"{route_type}")(**data)
|
||||
|
|
|
@ -891,7 +891,7 @@ class ProxyLogging:
|
|||
original_exception: Exception,
|
||||
request: Request,
|
||||
parent_otel_span: Optional[Any],
|
||||
api_key: str,
|
||||
api_key: Optional[str],
|
||||
):
|
||||
"""
|
||||
Handler for Logging Authentication Errors on LiteLLM Proxy
|
||||
|
@ -905,9 +905,13 @@ class ProxyLogging:
|
|||
|
||||
user_api_key_dict = UserAPIKeyAuth(
|
||||
parent_otel_span=parent_otel_span,
|
||||
token=_hash_token_if_needed(token=api_key),
|
||||
token=_hash_token_if_needed(token=api_key or ""),
|
||||
)
|
||||
request_data = await request.json()
|
||||
try:
|
||||
request_data = await request.json()
|
||||
except json.JSONDecodeError:
|
||||
# For GET requests or requests without a JSON body
|
||||
request_data = {}
|
||||
await self._run_post_call_failure_hook_custom_loggers(
|
||||
original_exception=original_exception,
|
||||
request_data=request_data,
|
||||
|
|
|
@ -41,6 +41,7 @@ from typing import (
|
|||
import httpx
|
||||
import openai
|
||||
from openai import AsyncOpenAI
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import overload
|
||||
|
||||
import litellm
|
||||
|
@ -122,6 +123,7 @@ from litellm.types.router import (
|
|||
ModelInfo,
|
||||
ProviderBudgetConfigType,
|
||||
RetryPolicy,
|
||||
RouterCacheEnum,
|
||||
RouterErrors,
|
||||
RouterGeneralSettings,
|
||||
RouterModelGroupAliasItem,
|
||||
|
@ -239,7 +241,6 @@ class Router:
|
|||
] = "simple-shuffle",
|
||||
routing_strategy_args: dict = {}, # just for latency-based
|
||||
provider_budget_config: Optional[ProviderBudgetConfigType] = None,
|
||||
semaphore: Optional[asyncio.Semaphore] = None,
|
||||
alerting_config: Optional[AlertingConfig] = None,
|
||||
router_general_settings: Optional[
|
||||
RouterGeneralSettings
|
||||
|
@ -315,8 +316,6 @@ class Router:
|
|||
|
||||
from litellm._service_logger import ServiceLogging
|
||||
|
||||
if semaphore:
|
||||
self.semaphore = semaphore
|
||||
self.set_verbose = set_verbose
|
||||
self.debug_level = debug_level
|
||||
self.enable_pre_call_checks = enable_pre_call_checks
|
||||
|
@ -506,6 +505,14 @@ class Router:
|
|||
litellm.success_callback.append(self.sync_deployment_callback_on_success)
|
||||
else:
|
||||
litellm.success_callback = [self.sync_deployment_callback_on_success]
|
||||
if isinstance(litellm._async_failure_callback, list):
|
||||
litellm._async_failure_callback.append(
|
||||
self.async_deployment_callback_on_failure
|
||||
)
|
||||
else:
|
||||
litellm._async_failure_callback = [
|
||||
self.async_deployment_callback_on_failure
|
||||
]
|
||||
## COOLDOWNS ##
|
||||
if isinstance(litellm.failure_callback, list):
|
||||
litellm.failure_callback.append(self.deployment_callback_on_failure)
|
||||
|
@ -2556,10 +2563,7 @@ class Router:
|
|||
original_function: Callable,
|
||||
**kwargs,
|
||||
):
|
||||
if (
|
||||
"model" in kwargs
|
||||
and self.get_model_list(model_name=kwargs["model"]) is not None
|
||||
):
|
||||
if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]):
|
||||
deployment = await self.async_get_available_deployment(
|
||||
model=kwargs["model"]
|
||||
)
|
||||
|
@ -3291,13 +3295,14 @@ class Router:
|
|||
):
|
||||
"""
|
||||
Track remaining tpm/rpm quota for model in model_list
|
||||
|
||||
Currently, only updates TPM usage.
|
||||
"""
|
||||
try:
|
||||
if kwargs["litellm_params"].get("metadata") is None:
|
||||
pass
|
||||
else:
|
||||
deployment_name = kwargs["litellm_params"]["metadata"].get(
|
||||
"deployment", None
|
||||
) # stable name - works for wildcard routes as well
|
||||
model_group = kwargs["litellm_params"]["metadata"].get(
|
||||
"model_group", None
|
||||
)
|
||||
|
@ -3308,6 +3313,8 @@ class Router:
|
|||
elif isinstance(id, int):
|
||||
id = str(id)
|
||||
|
||||
parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs)
|
||||
|
||||
_usage_obj = completion_response.get("usage")
|
||||
total_tokens = _usage_obj.get("total_tokens", 0) if _usage_obj else 0
|
||||
|
||||
|
@ -3319,13 +3326,14 @@ class Router:
|
|||
"%H-%M"
|
||||
) # use the same timezone regardless of system clock
|
||||
|
||||
tpm_key = f"global_router:{id}:tpm:{current_minute}"
|
||||
tpm_key = RouterCacheEnum.TPM.value.format(
|
||||
id=id, current_minute=current_minute, model=deployment_name
|
||||
)
|
||||
# ------------
|
||||
# Update usage
|
||||
# ------------
|
||||
# update cache
|
||||
|
||||
parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs)
|
||||
## TPM
|
||||
await self.cache.async_increment_cache(
|
||||
key=tpm_key,
|
||||
|
@ -3334,6 +3342,17 @@ class Router:
|
|||
ttl=RoutingArgs.ttl.value,
|
||||
)
|
||||
|
||||
## RPM
|
||||
rpm_key = RouterCacheEnum.RPM.value.format(
|
||||
id=id, current_minute=current_minute, model=deployment_name
|
||||
)
|
||||
await self.cache.async_increment_cache(
|
||||
key=rpm_key,
|
||||
value=1,
|
||||
parent_otel_span=parent_otel_span,
|
||||
ttl=RoutingArgs.ttl.value,
|
||||
)
|
||||
|
||||
increment_deployment_successes_for_current_minute(
|
||||
litellm_router_instance=self,
|
||||
deployment_id=id,
|
||||
|
@ -3446,6 +3465,40 @@ class Router:
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
async def async_deployment_callback_on_failure(
|
||||
self, kwargs, completion_response: Optional[Any], start_time, end_time
|
||||
):
|
||||
"""
|
||||
Update RPM usage for a deployment
|
||||
"""
|
||||
deployment_name = kwargs["litellm_params"]["metadata"].get(
|
||||
"deployment", None
|
||||
) # handles wildcard routes - by giving the original name sent to `litellm.completion`
|
||||
model_group = kwargs["litellm_params"]["metadata"].get("model_group", None)
|
||||
model_info = kwargs["litellm_params"].get("model_info", {}) or {}
|
||||
id = model_info.get("id", None)
|
||||
if model_group is None or id is None:
|
||||
return
|
||||
elif isinstance(id, int):
|
||||
id = str(id)
|
||||
parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs)
|
||||
|
||||
dt = get_utc_datetime()
|
||||
current_minute = dt.strftime(
|
||||
"%H-%M"
|
||||
) # use the same timezone regardless of system clock
|
||||
|
||||
## RPM
|
||||
rpm_key = RouterCacheEnum.RPM.value.format(
|
||||
id=id, current_minute=current_minute, model=deployment_name
|
||||
)
|
||||
await self.cache.async_increment_cache(
|
||||
key=rpm_key,
|
||||
value=1,
|
||||
parent_otel_span=parent_otel_span,
|
||||
ttl=RoutingArgs.ttl.value,
|
||||
)
|
||||
|
||||
def log_retry(self, kwargs: dict, e: Exception) -> dict:
|
||||
"""
|
||||
When a retry or fallback happens, log the details of the just failed model call - similar to Sentry breadcrumbing
|
||||
|
@ -4123,7 +4176,24 @@ class Router:
|
|||
raise Exception("Model Name invalid - {}".format(type(model)))
|
||||
return None
|
||||
|
||||
def get_router_model_info(self, deployment: dict) -> ModelMapInfo:
|
||||
@overload
|
||||
def get_router_model_info(
|
||||
self, deployment: dict, received_model_name: str, id: None = None
|
||||
) -> ModelMapInfo:
|
||||
pass
|
||||
|
||||
@overload
|
||||
def get_router_model_info(
|
||||
self, deployment: None, received_model_name: str, id: str
|
||||
) -> ModelMapInfo:
|
||||
pass
|
||||
|
||||
def get_router_model_info(
|
||||
self,
|
||||
deployment: Optional[dict],
|
||||
received_model_name: str,
|
||||
id: Optional[str] = None,
|
||||
) -> ModelMapInfo:
|
||||
"""
|
||||
For a given model id, return the model info (max tokens, input cost, output cost, etc.).
|
||||
|
||||
|
@ -4137,6 +4207,14 @@ class Router:
|
|||
Raises:
|
||||
- ValueError -> If model is not mapped yet
|
||||
"""
|
||||
if id is not None:
|
||||
_deployment = self.get_deployment(model_id=id)
|
||||
if _deployment is not None:
|
||||
deployment = _deployment.model_dump(exclude_none=True)
|
||||
|
||||
if deployment is None:
|
||||
raise ValueError("Deployment not found")
|
||||
|
||||
## GET BASE MODEL
|
||||
base_model = deployment.get("model_info", {}).get("base_model", None)
|
||||
if base_model is None:
|
||||
|
@ -4158,10 +4236,27 @@ class Router:
|
|||
elif custom_llm_provider != "azure":
|
||||
model = _model
|
||||
|
||||
potential_models = self.pattern_router.route(received_model_name)
|
||||
if "*" in model and potential_models is not None: # if wildcard route
|
||||
for potential_model in potential_models:
|
||||
try:
|
||||
if potential_model.get("model_info", {}).get(
|
||||
"id"
|
||||
) == deployment.get("model_info", {}).get("id"):
|
||||
model = potential_model.get("litellm_params", {}).get(
|
||||
"model"
|
||||
)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
## GET LITELLM MODEL INFO - raises exception, if model is not mapped
|
||||
model_info = litellm.get_model_info(
|
||||
model="{}/{}".format(custom_llm_provider, model)
|
||||
)
|
||||
if not model.startswith(custom_llm_provider):
|
||||
model_info_name = "{}/{}".format(custom_llm_provider, model)
|
||||
else:
|
||||
model_info_name = model
|
||||
|
||||
model_info = litellm.get_model_info(model=model_info_name)
|
||||
|
||||
## CHECK USER SET MODEL INFO
|
||||
user_model_info = deployment.get("model_info", {})
|
||||
|
@ -4211,8 +4306,10 @@ class Router:
|
|||
total_tpm: Optional[int] = None
|
||||
total_rpm: Optional[int] = None
|
||||
configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None
|
||||
|
||||
for model in self.model_list:
|
||||
model_list = self.get_model_list(model_name=model_group)
|
||||
if model_list is None:
|
||||
return None
|
||||
for model in model_list:
|
||||
is_match = False
|
||||
if (
|
||||
"model_name" in model and model["model_name"] == model_group
|
||||
|
@ -4227,7 +4324,7 @@ class Router:
|
|||
if not is_match:
|
||||
continue
|
||||
# model in model group found #
|
||||
litellm_params = LiteLLM_Params(**model["litellm_params"])
|
||||
litellm_params = LiteLLM_Params(**model["litellm_params"]) # type: ignore
|
||||
# get configurable clientside auth params
|
||||
configurable_clientside_auth_params = (
|
||||
litellm_params.configurable_clientside_auth_params
|
||||
|
@ -4235,38 +4332,30 @@ class Router:
|
|||
# get model tpm
|
||||
_deployment_tpm: Optional[int] = None
|
||||
if _deployment_tpm is None:
|
||||
_deployment_tpm = model.get("tpm", None)
|
||||
_deployment_tpm = model.get("tpm", None) # type: ignore
|
||||
if _deployment_tpm is None:
|
||||
_deployment_tpm = model.get("litellm_params", {}).get("tpm", None)
|
||||
_deployment_tpm = model.get("litellm_params", {}).get("tpm", None) # type: ignore
|
||||
if _deployment_tpm is None:
|
||||
_deployment_tpm = model.get("model_info", {}).get("tpm", None)
|
||||
_deployment_tpm = model.get("model_info", {}).get("tpm", None) # type: ignore
|
||||
|
||||
if _deployment_tpm is not None:
|
||||
if total_tpm is None:
|
||||
total_tpm = 0
|
||||
total_tpm += _deployment_tpm # type: ignore
|
||||
# get model rpm
|
||||
_deployment_rpm: Optional[int] = None
|
||||
if _deployment_rpm is None:
|
||||
_deployment_rpm = model.get("rpm", None)
|
||||
_deployment_rpm = model.get("rpm", None) # type: ignore
|
||||
if _deployment_rpm is None:
|
||||
_deployment_rpm = model.get("litellm_params", {}).get("rpm", None)
|
||||
_deployment_rpm = model.get("litellm_params", {}).get("rpm", None) # type: ignore
|
||||
if _deployment_rpm is None:
|
||||
_deployment_rpm = model.get("model_info", {}).get("rpm", None)
|
||||
_deployment_rpm = model.get("model_info", {}).get("rpm", None) # type: ignore
|
||||
|
||||
if _deployment_rpm is not None:
|
||||
if total_rpm is None:
|
||||
total_rpm = 0
|
||||
total_rpm += _deployment_rpm # type: ignore
|
||||
# get model info
|
||||
try:
|
||||
model_info = litellm.get_model_info(model=litellm_params.model)
|
||||
except Exception:
|
||||
model_info = None
|
||||
# get llm provider
|
||||
model, llm_provider = "", ""
|
||||
litellm_model, llm_provider = "", ""
|
||||
try:
|
||||
model, llm_provider, _, _ = litellm.get_llm_provider(
|
||||
litellm_model, llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=litellm_params.model,
|
||||
custom_llm_provider=litellm_params.custom_llm_provider,
|
||||
)
|
||||
|
@ -4277,7 +4366,7 @@ class Router:
|
|||
|
||||
if model_info is None:
|
||||
supported_openai_params = litellm.get_supported_openai_params(
|
||||
model=model, custom_llm_provider=llm_provider
|
||||
model=litellm_model, custom_llm_provider=llm_provider
|
||||
)
|
||||
if supported_openai_params is None:
|
||||
supported_openai_params = []
|
||||
|
@ -4367,7 +4456,20 @@ class Router:
|
|||
model_group_info.supported_openai_params = model_info[
|
||||
"supported_openai_params"
|
||||
]
|
||||
if model_info.get("tpm", None) is not None and _deployment_tpm is None:
|
||||
_deployment_tpm = model_info.get("tpm")
|
||||
if model_info.get("rpm", None) is not None and _deployment_rpm is None:
|
||||
_deployment_rpm = model_info.get("rpm")
|
||||
|
||||
if _deployment_tpm is not None:
|
||||
if total_tpm is None:
|
||||
total_tpm = 0
|
||||
total_tpm += _deployment_tpm # type: ignore
|
||||
|
||||
if _deployment_rpm is not None:
|
||||
if total_rpm is None:
|
||||
total_rpm = 0
|
||||
total_rpm += _deployment_rpm # type: ignore
|
||||
if model_group_info is not None:
|
||||
## UPDATE WITH TOTAL TPM/RPM FOR MODEL GROUP
|
||||
if total_tpm is not None:
|
||||
|
@ -4419,7 +4521,10 @@ class Router:
|
|||
self, model_group: str
|
||||
) -> Tuple[Optional[int], Optional[int]]:
|
||||
"""
|
||||
Returns remaining tpm/rpm quota for model group
|
||||
Returns current tpm/rpm usage for model group
|
||||
|
||||
Parameters:
|
||||
- model_group: str - the received model name from the user (can be a wildcard route).
|
||||
|
||||
Returns:
|
||||
- usage: Tuple[tpm, rpm]
|
||||
|
@ -4430,20 +4535,37 @@ class Router:
|
|||
) # use the same timezone regardless of system clock
|
||||
tpm_keys: List[str] = []
|
||||
rpm_keys: List[str] = []
|
||||
for model in self.model_list:
|
||||
if "model_name" in model and model["model_name"] == model_group:
|
||||
tpm_keys.append(
|
||||
f"global_router:{model['model_info']['id']}:tpm:{current_minute}"
|
||||
|
||||
model_list = self.get_model_list(model_name=model_group)
|
||||
if model_list is None: # no matching deployments
|
||||
return None, None
|
||||
|
||||
for model in model_list:
|
||||
id: Optional[str] = model.get("model_info", {}).get("id") # type: ignore
|
||||
litellm_model: Optional[str] = model["litellm_params"].get(
|
||||
"model"
|
||||
) # USE THE MODEL SENT TO litellm.completion() - consistent with how global_router cache is written.
|
||||
if id is None or litellm_model is None:
|
||||
continue
|
||||
tpm_keys.append(
|
||||
RouterCacheEnum.TPM.value.format(
|
||||
id=id,
|
||||
model=litellm_model,
|
||||
current_minute=current_minute,
|
||||
)
|
||||
rpm_keys.append(
|
||||
f"global_router:{model['model_info']['id']}:rpm:{current_minute}"
|
||||
)
|
||||
rpm_keys.append(
|
||||
RouterCacheEnum.RPM.value.format(
|
||||
id=id,
|
||||
model=litellm_model,
|
||||
current_minute=current_minute,
|
||||
)
|
||||
)
|
||||
combined_tpm_rpm_keys = tpm_keys + rpm_keys
|
||||
|
||||
combined_tpm_rpm_values = await self.cache.async_batch_get_cache(
|
||||
keys=combined_tpm_rpm_keys
|
||||
)
|
||||
|
||||
if combined_tpm_rpm_values is None:
|
||||
return None, None
|
||||
|
||||
|
@ -4468,6 +4590,32 @@ class Router:
|
|||
rpm_usage += t
|
||||
return tpm_usage, rpm_usage
|
||||
|
||||
async def get_remaining_model_group_usage(self, model_group: str) -> Dict[str, int]:
|
||||
|
||||
current_tpm, current_rpm = await self.get_model_group_usage(model_group)
|
||||
|
||||
model_group_info = self.get_model_group_info(model_group)
|
||||
|
||||
if model_group_info is not None and model_group_info.tpm is not None:
|
||||
tpm_limit = model_group_info.tpm
|
||||
else:
|
||||
tpm_limit = None
|
||||
|
||||
if model_group_info is not None and model_group_info.rpm is not None:
|
||||
rpm_limit = model_group_info.rpm
|
||||
else:
|
||||
rpm_limit = None
|
||||
|
||||
returned_dict = {}
|
||||
if tpm_limit is not None and current_tpm is not None:
|
||||
returned_dict["x-ratelimit-remaining-tokens"] = tpm_limit - current_tpm
|
||||
returned_dict["x-ratelimit-limit-tokens"] = tpm_limit
|
||||
if rpm_limit is not None and current_rpm is not None:
|
||||
returned_dict["x-ratelimit-remaining-requests"] = rpm_limit - current_rpm
|
||||
returned_dict["x-ratelimit-limit-requests"] = rpm_limit
|
||||
|
||||
return returned_dict
|
||||
|
||||
async def set_response_headers(
|
||||
self, response: Any, model_group: Optional[str] = None
|
||||
) -> Any:
|
||||
|
@ -4478,6 +4626,30 @@ class Router:
|
|||
# - if healthy_deployments > 1, return model group rate limit headers
|
||||
# - else return the model's rate limit headers
|
||||
"""
|
||||
if (
|
||||
isinstance(response, BaseModel)
|
||||
and hasattr(response, "_hidden_params")
|
||||
and isinstance(response._hidden_params, dict) # type: ignore
|
||||
):
|
||||
response._hidden_params.setdefault("additional_headers", {}) # type: ignore
|
||||
response._hidden_params["additional_headers"][ # type: ignore
|
||||
"x-litellm-model-group"
|
||||
] = model_group
|
||||
|
||||
additional_headers = response._hidden_params["additional_headers"] # type: ignore
|
||||
|
||||
if (
|
||||
"x-ratelimit-remaining-tokens" not in additional_headers
|
||||
and "x-ratelimit-remaining-requests" not in additional_headers
|
||||
and model_group is not None
|
||||
):
|
||||
remaining_usage = await self.get_remaining_model_group_usage(
|
||||
model_group
|
||||
)
|
||||
|
||||
for header, value in remaining_usage.items():
|
||||
if value is not None:
|
||||
additional_headers[header] = value
|
||||
return response
|
||||
|
||||
def get_model_ids(self, model_name: Optional[str] = None) -> List[str]:
|
||||
|
@ -4540,6 +4712,9 @@ class Router:
|
|||
if hasattr(self, "model_list"):
|
||||
returned_models: List[DeploymentTypedDict] = []
|
||||
|
||||
if model_name is not None:
|
||||
returned_models.extend(self._get_all_deployments(model_name=model_name))
|
||||
|
||||
if hasattr(self, "model_group_alias"):
|
||||
for model_alias, model_value in self.model_group_alias.items():
|
||||
|
||||
|
@ -4560,21 +4735,32 @@ class Router:
|
|||
)
|
||||
)
|
||||
|
||||
if len(returned_models) == 0: # check if wildcard route
|
||||
potential_wildcard_models = self.pattern_router.route(model_name)
|
||||
if potential_wildcard_models is not None:
|
||||
returned_models.extend(
|
||||
[DeploymentTypedDict(**m) for m in potential_wildcard_models] # type: ignore
|
||||
)
|
||||
|
||||
if model_name is None:
|
||||
returned_models += self.model_list
|
||||
|
||||
return returned_models
|
||||
returned_models.extend(self._get_all_deployments(model_name=model_name))
|
||||
|
||||
return returned_models
|
||||
return None
|
||||
|
||||
def get_model_access_groups(self):
|
||||
def get_model_access_groups(self, model_name: Optional[str] = None):
|
||||
"""
|
||||
If model_name is provided, only return access groups for that model.
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
access_groups = defaultdict(list)
|
||||
|
||||
if self.model_list:
|
||||
for m in self.model_list:
|
||||
model_list = self.get_model_list(model_name=model_name)
|
||||
if model_list:
|
||||
for m in model_list:
|
||||
for group in m.get("model_info", {}).get("access_groups", []):
|
||||
model_name = m["model_name"]
|
||||
access_groups[group].append(model_name)
|
||||
|
@ -4810,10 +4996,12 @@ class Router:
|
|||
base_model = deployment.get("litellm_params", {}).get(
|
||||
"base_model", None
|
||||
)
|
||||
model_info = self.get_router_model_info(
|
||||
deployment=deployment, received_model_name=model
|
||||
)
|
||||
model = base_model or deployment.get("litellm_params", {}).get(
|
||||
"model", None
|
||||
)
|
||||
model_info = self.get_router_model_info(deployment=deployment)
|
||||
|
||||
if (
|
||||
isinstance(model_info, dict)
|
||||
|
|
|
@ -79,7 +79,9 @@ class PatternMatchRouter:
|
|||
|
||||
return new_deployments
|
||||
|
||||
def route(self, request: Optional[str]) -> Optional[List[Dict]]:
|
||||
def route(
|
||||
self, request: Optional[str], filtered_model_names: Optional[List[str]] = None
|
||||
) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Route a requested model to the corresponding llm deployments based on the regex pattern
|
||||
|
||||
|
@ -89,14 +91,26 @@ class PatternMatchRouter:
|
|||
|
||||
Args:
|
||||
request: Optional[str]
|
||||
|
||||
filtered_model_names: Optional[List[str]] - if provided, only return deployments that match the filtered_model_names
|
||||
Returns:
|
||||
Optional[List[Deployment]]: llm deployments
|
||||
"""
|
||||
try:
|
||||
if request is None:
|
||||
return None
|
||||
|
||||
regex_filtered_model_names = (
|
||||
[self._pattern_to_regex(m) for m in filtered_model_names]
|
||||
if filtered_model_names is not None
|
||||
else []
|
||||
)
|
||||
|
||||
for pattern, llm_deployments in self.patterns.items():
|
||||
if (
|
||||
filtered_model_names is not None
|
||||
and pattern not in regex_filtered_model_names
|
||||
):
|
||||
continue
|
||||
pattern_match = re.match(pattern, request)
|
||||
if pattern_match:
|
||||
return self._return_pattern_matched_deployments(
|
||||
|
|
0
litellm/router_utils/response_headers.py
Normal file
0
litellm/router_utils/response_headers.py
Normal file
|
@ -1,29 +0,0 @@
|
|||
import pytest
|
||||
|
||||
import litellm
|
||||
|
||||
|
||||
def test_mlflow_logging():
|
||||
litellm.success_callback = ["mlflow"]
|
||||
litellm.failure_callback = ["mlflow"]
|
||||
|
||||
litellm.completion(
|
||||
model="gpt-4o-mini",
|
||||
messages=[{"role": "user", "content": "what llm are u"}],
|
||||
max_tokens=10,
|
||||
temperature=0.2,
|
||||
user="test-user",
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_async_mlflow_logging():
|
||||
litellm.success_callback = ["mlflow"]
|
||||
litellm.failure_callback = ["mlflow"]
|
||||
|
||||
await litellm.acompletion(
|
||||
model="gpt-4o-mini",
|
||||
messages=[{"role": "user", "content": "hi test from local arize"}],
|
||||
mock_response="hello",
|
||||
temperature=0.1,
|
||||
user="OTEL_USER",
|
||||
)
|
|
@ -9,7 +9,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|||
|
||||
import httpx
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import TypedDict
|
||||
from typing_extensions import Required, TypedDict
|
||||
|
||||
from ..exceptions import RateLimitError
|
||||
from .completion import CompletionRequest
|
||||
|
@ -352,9 +352,10 @@ class LiteLLMParamsTypedDict(TypedDict, total=False):
|
|||
tags: Optional[List[str]]
|
||||
|
||||
|
||||
class DeploymentTypedDict(TypedDict):
|
||||
model_name: str
|
||||
litellm_params: LiteLLMParamsTypedDict
|
||||
class DeploymentTypedDict(TypedDict, total=False):
|
||||
model_name: Required[str]
|
||||
litellm_params: Required[LiteLLMParamsTypedDict]
|
||||
model_info: dict
|
||||
|
||||
|
||||
SPECIAL_MODEL_INFO_PARAMS = [
|
||||
|
@ -640,3 +641,8 @@ class ProviderBudgetInfo(BaseModel):
|
|||
|
||||
|
||||
ProviderBudgetConfigType = Dict[str, ProviderBudgetInfo]
|
||||
|
||||
|
||||
class RouterCacheEnum(enum.Enum):
|
||||
TPM = "global_router:{id}:{model}:tpm:{current_minute}"
|
||||
RPM = "global_router:{id}:{model}:rpm:{current_minute}"
|
||||
|
|
|
@ -106,6 +106,8 @@ class ModelInfo(TypedDict, total=False):
|
|||
supports_prompt_caching: Optional[bool]
|
||||
supports_audio_input: Optional[bool]
|
||||
supports_audio_output: Optional[bool]
|
||||
tpm: Optional[int]
|
||||
rpm: Optional[int]
|
||||
|
||||
|
||||
class GenericStreamingChunk(TypedDict, total=False):
|
||||
|
|
|
@ -4656,6 +4656,8 @@ def get_model_info( # noqa: PLR0915
|
|||
),
|
||||
supports_audio_input=_model_info.get("supports_audio_input", False),
|
||||
supports_audio_output=_model_info.get("supports_audio_output", False),
|
||||
tpm=_model_info.get("tpm", None),
|
||||
rpm=_model_info.get("rpm", None),
|
||||
)
|
||||
except Exception as e:
|
||||
if "OllamaError" in str(e):
|
||||
|
|
|
@ -3383,6 +3383,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-001": {
|
||||
|
@ -3406,6 +3408,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash": {
|
||||
|
@ -3428,6 +3432,8 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-latest": {
|
||||
|
@ -3450,6 +3456,32 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-8b": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 4000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-8b-exp-0924": {
|
||||
|
@ -3472,6 +3504,8 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 4000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-exp-1114": {
|
||||
|
@ -3494,7 +3528,12 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing",
|
||||
"metadata": {
|
||||
"notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro."
|
||||
}
|
||||
},
|
||||
"gemini/gemini-1.5-flash-exp-0827": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -3516,6 +3555,8 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 2000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-8b-exp-0827": {
|
||||
|
@ -3537,6 +3578,9 @@
|
|||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 4000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-pro": {
|
||||
|
@ -3550,7 +3594,10 @@
|
|||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
"rpd": 30000,
|
||||
"tpm": 120000,
|
||||
"rpm": 360,
|
||||
"source": "https://ai.google.dev/gemini-api/docs/models/gemini"
|
||||
},
|
||||
"gemini/gemini-1.5-pro": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -3567,6 +3614,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-002": {
|
||||
|
@ -3585,6 +3634,8 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-001": {
|
||||
|
@ -3603,6 +3654,8 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-exp-0801": {
|
||||
|
@ -3620,6 +3673,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-exp-0827": {
|
||||
|
@ -3637,6 +3692,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-latest": {
|
||||
|
@ -3654,6 +3711,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 1000,
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-pro-vision": {
|
||||
|
@ -3668,6 +3727,9 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"rpd": 30000,
|
||||
"tpm": 120000,
|
||||
"rpm": 360,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini/gemini-gemma-2-27b-it": {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.53.0"
|
||||
version = "1.53.2"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.53.0"
|
||||
version = "1.53.2"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# LITELLM PROXY DEPENDENCIES #
|
||||
anyio==4.4.0 # openai + http req.
|
||||
openai==1.54.0 # openai req.
|
||||
openai==1.55.3 # openai req.
|
||||
fastapi==0.111.0 # server dep
|
||||
backoff==2.2.1 # server dep
|
||||
pyyaml==6.0.0 # server dep
|
||||
|
|
|
@ -46,17 +46,22 @@ print(env_keys)
|
|||
repo_base = "./"
|
||||
print(os.listdir(repo_base))
|
||||
docs_path = (
|
||||
"../../docs/my-website/docs/proxy/config_settings.md" # Path to the documentation
|
||||
"./docs/my-website/docs/proxy/config_settings.md" # Path to the documentation
|
||||
)
|
||||
documented_keys = set()
|
||||
try:
|
||||
with open(docs_path, "r", encoding="utf-8") as docs_file:
|
||||
content = docs_file.read()
|
||||
|
||||
print(f"content: {content}")
|
||||
|
||||
# Find the section titled "general_settings - Reference"
|
||||
general_settings_section = re.search(
|
||||
r"### environment variables - Reference(.*?)###", content, re.DOTALL
|
||||
r"### environment variables - Reference(.*?)(?=\n###|\Z)",
|
||||
content,
|
||||
re.DOTALL | re.MULTILINE,
|
||||
)
|
||||
print(f"general_settings_section: {general_settings_section}")
|
||||
if general_settings_section:
|
||||
# Extract the table rows, which contain the documented keys
|
||||
table_content = general_settings_section.group(1)
|
||||
|
@ -70,6 +75,7 @@ except Exception as e:
|
|||
)
|
||||
|
||||
|
||||
print(f"documented_keys: {documented_keys}")
|
||||
# Compare and find undocumented keys
|
||||
undocumented_keys = env_keys - documented_keys
|
||||
|
||||
|
|
87
tests/documentation_tests/test_router_settings.py
Normal file
87
tests/documentation_tests/test_router_settings.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
import os
|
||||
import re
|
||||
import inspect
|
||||
from typing import Type
|
||||
import sys
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
|
||||
|
||||
def get_init_params(cls: Type) -> list[str]:
|
||||
"""
|
||||
Retrieve all parameters supported by the `__init__` method of a given class.
|
||||
|
||||
Args:
|
||||
cls: The class to inspect.
|
||||
|
||||
Returns:
|
||||
A list of parameter names.
|
||||
"""
|
||||
if not hasattr(cls, "__init__"):
|
||||
raise ValueError(
|
||||
f"The provided class {cls.__name__} does not have an __init__ method."
|
||||
)
|
||||
|
||||
init_method = cls.__init__
|
||||
argspec = inspect.getfullargspec(init_method)
|
||||
|
||||
# The first argument is usually 'self', so we exclude it
|
||||
return argspec.args[1:] # Exclude 'self'
|
||||
|
||||
|
||||
router_init_params = set(get_init_params(litellm.router.Router))
|
||||
print(router_init_params)
|
||||
router_init_params.remove("model_list")
|
||||
|
||||
# Parse the documentation to extract documented keys
|
||||
repo_base = "./"
|
||||
print(os.listdir(repo_base))
|
||||
docs_path = (
|
||||
"./docs/my-website/docs/proxy/config_settings.md" # Path to the documentation
|
||||
)
|
||||
# docs_path = (
|
||||
# "../../docs/my-website/docs/proxy/config_settings.md" # Path to the documentation
|
||||
# )
|
||||
documented_keys = set()
|
||||
try:
|
||||
with open(docs_path, "r", encoding="utf-8") as docs_file:
|
||||
content = docs_file.read()
|
||||
|
||||
# Find the section titled "general_settings - Reference"
|
||||
general_settings_section = re.search(
|
||||
r"### router_settings - Reference(.*?)###", content, re.DOTALL
|
||||
)
|
||||
if general_settings_section:
|
||||
# Extract the table rows, which contain the documented keys
|
||||
table_content = general_settings_section.group(1)
|
||||
doc_key_pattern = re.compile(
|
||||
r"\|\s*([^\|]+?)\s*\|"
|
||||
) # Capture the key from each row of the table
|
||||
documented_keys.update(doc_key_pattern.findall(table_content))
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"Error reading documentation: {e}, \n repo base - {os.listdir(repo_base)}"
|
||||
)
|
||||
|
||||
|
||||
# Compare and find undocumented keys
|
||||
undocumented_keys = router_init_params - documented_keys
|
||||
|
||||
# Print results
|
||||
print("Keys expected in 'router settings' (found in code):")
|
||||
for key in sorted(router_init_params):
|
||||
print(key)
|
||||
|
||||
if undocumented_keys:
|
||||
raise Exception(
|
||||
f"\nKeys not documented in 'router settings - Reference': {undocumented_keys}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"\nAll keys are documented in 'router settings - Reference'. - {}".format(
|
||||
router_init_params
|
||||
)
|
||||
)
|
|
@ -1 +1,3 @@
|
|||
More tests under `litellm/litellm/tests/*`.
|
||||
Unit tests for individual LLM providers.
|
||||
|
||||
Name of the test file is the name of the LLM provider - e.g. `test_openai.py` is for OpenAI.
|
|
@ -62,7 +62,14 @@ class BaseLLMChatTest(ABC):
|
|||
response = litellm.completion(**base_completion_call_args, messages=messages)
|
||||
assert response is not None
|
||||
|
||||
def test_json_response_format(self):
|
||||
@pytest.mark.parametrize(
|
||||
"response_format",
|
||||
[
|
||||
{"type": "json_object"},
|
||||
{"type": "text"},
|
||||
],
|
||||
)
|
||||
def test_json_response_format(self, response_format):
|
||||
"""
|
||||
Test that the JSON response format is supported by the LLM API
|
||||
"""
|
||||
|
@ -83,7 +90,7 @@ class BaseLLMChatTest(ABC):
|
|||
response = litellm.completion(
|
||||
**base_completion_call_args,
|
||||
messages=messages,
|
||||
response_format={"type": "json_object"},
|
||||
response_format=response_format,
|
||||
)
|
||||
|
||||
print(response)
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -45,81 +45,59 @@ def test_map_azure_model_group(model_group_header, expected_model):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
async def test_azure_ai_with_image_url(respx_mock: MockRouter):
|
||||
async def test_azure_ai_with_image_url():
|
||||
"""
|
||||
Important test:
|
||||
|
||||
Test that Azure AI studio can handle image_url passed when content is a list containing both text and image_url
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
# Mock response based on the actual API response
|
||||
mock_response = {
|
||||
"id": "cmpl-53860ea1efa24d2883555bfec13d2254",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": None,
|
||||
"message": {
|
||||
"content": "The image displays a graphic with the text 'LiteLLM' in black",
|
||||
"role": "assistant",
|
||||
"refusal": None,
|
||||
"audio": None,
|
||||
"function_call": None,
|
||||
"tool_calls": None,
|
||||
},
|
||||
}
|
||||
],
|
||||
"created": 1731801937,
|
||||
"model": "phi35-vision-instruct",
|
||||
"object": "chat.completion",
|
||||
"usage": {
|
||||
"completion_tokens": 69,
|
||||
"prompt_tokens": 617,
|
||||
"total_tokens": 686,
|
||||
"completion_tokens_details": None,
|
||||
"prompt_tokens_details": None,
|
||||
},
|
||||
}
|
||||
|
||||
# Mock the API request
|
||||
mock_request = respx_mock.post(
|
||||
"https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com"
|
||||
).mock(return_value=httpx.Response(200, json=mock_response))
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model="azure_ai/Phi-3-5-vision-instruct-dcvov",
|
||||
api_base="https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What is in this image?",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
client = AsyncOpenAI(
|
||||
api_key="fake-api-key",
|
||||
base_url="https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com",
|
||||
)
|
||||
|
||||
# Verify the request was made
|
||||
assert mock_request.called
|
||||
with patch.object(
|
||||
client.chat.completions.with_raw_response, "create"
|
||||
) as mock_client:
|
||||
try:
|
||||
await litellm.acompletion(
|
||||
model="azure_ai/Phi-3-5-vision-instruct-dcvov",
|
||||
api_base="https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What is in this image?",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
api_key="fake-api-key",
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
print(f"Error: {e}")
|
||||
|
||||
# Check the request body
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
assert request_body == {
|
||||
"model": "Phi-3-5-vision-instruct-dcvov",
|
||||
"messages": [
|
||||
# Verify the request was made
|
||||
mock_client.assert_called_once()
|
||||
|
||||
# Check the request body
|
||||
request_body = mock_client.call_args.kwargs
|
||||
assert request_body["model"] == "Phi-3-5-vision-instruct-dcvov"
|
||||
assert request_body["messages"] == [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
|
@ -132,7 +110,4 @@ async def test_azure_ai_with_image_url(respx_mock: MockRouter):
|
|||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
print(f"response: {response}")
|
||||
]
|
||||
|
|
|
@ -1243,6 +1243,19 @@ def test_bedrock_cross_region_inference(model):
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expected_base_model",
|
||||
[
|
||||
(
|
||||
"apac.anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
"anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_bedrock_get_base_model(model, expected_base_model):
|
||||
assert litellm.AmazonConverseConfig()._get_base_model(model) == expected_base_model
|
||||
|
||||
|
||||
from litellm.llms.prompt_templates.factory import _bedrock_converse_messages_pt
|
||||
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ load_dotenv()
|
|||
import httpx
|
||||
import pytest
|
||||
from respx import MockRouter
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse
|
||||
|
@ -41,56 +42,58 @@ def return_mocked_response(model: str):
|
|||
"bedrock/mistral.mistral-large-2407-v1:0",
|
||||
],
|
||||
)
|
||||
@pytest.mark.respx
|
||||
@pytest.mark.asyncio()
|
||||
async def test_bedrock_max_completion_tokens(model: str, respx_mock: MockRouter):
|
||||
async def test_bedrock_max_completion_tokens(model: str):
|
||||
"""
|
||||
Tests that:
|
||||
- max_completion_tokens is passed as max_tokens to bedrock models
|
||||
"""
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
client = AsyncHTTPHandler()
|
||||
|
||||
mock_response = return_mocked_response(model)
|
||||
_model = model.split("/")[1]
|
||||
print("\n\nmock_response: ", mock_response)
|
||||
url = f"https://bedrock-runtime.us-west-2.amazonaws.com/model/{_model}/converse"
|
||||
mock_request = respx_mock.post(url).mock(
|
||||
return_value=httpx.Response(200, json=mock_response)
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
with patch.object(client, "post") as mock_client:
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
mock_client.assert_called_once()
|
||||
request_body = json.loads(mock_client.call_args.kwargs["data"])
|
||||
|
||||
print("request_body: ", request_body)
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
|
||||
"additionalModelRequestFields": {},
|
||||
"system": [],
|
||||
"inferenceConfig": {"maxTokens": 10},
|
||||
}
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
||||
assert request_body == {
|
||||
"messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
|
||||
"additionalModelRequestFields": {},
|
||||
"system": [],
|
||||
"inferenceConfig": {"maxTokens": 10},
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229,"],
|
||||
["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229"],
|
||||
)
|
||||
@pytest.mark.respx
|
||||
@pytest.mark.asyncio()
|
||||
async def test_anthropic_api_max_completion_tokens(model: str, respx_mock: MockRouter):
|
||||
async def test_anthropic_api_max_completion_tokens(model: str):
|
||||
"""
|
||||
Tests that:
|
||||
- max_completion_tokens is passed as max_tokens to anthropic models
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
|
||||
mock_response = {
|
||||
"content": [{"text": "Hi! My name is Claude.", "type": "text"}],
|
||||
|
@ -103,30 +106,32 @@ async def test_anthropic_api_max_completion_tokens(model: str, respx_mock: MockR
|
|||
"usage": {"input_tokens": 2095, "output_tokens": 503},
|
||||
}
|
||||
|
||||
client = HTTPHandler()
|
||||
|
||||
print("\n\nmock_response: ", mock_response)
|
||||
url = f"https://api.anthropic.com/v1/messages"
|
||||
mock_request = respx_mock.post(url).mock(
|
||||
return_value=httpx.Response(200, json=mock_response)
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
with patch.object(client, "post") as mock_client:
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
mock_client.assert_called_once()
|
||||
request_body = mock_client.call_args.kwargs["json"]
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
print("request_body: ", request_body)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"messages": [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}],
|
||||
"max_tokens": 10,
|
||||
"model": model.split("/")[-1],
|
||||
}
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
||||
assert request_body == {
|
||||
"messages": [
|
||||
{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}
|
||||
],
|
||||
"max_tokens": 10,
|
||||
"model": model.split("/")[-1],
|
||||
}
|
||||
|
||||
|
||||
def test_all_model_configs():
|
||||
|
|
|
@ -12,95 +12,78 @@ sys.path.insert(
|
|||
import httpx
|
||||
import pytest
|
||||
from respx import MockRouter
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse, EmbeddingResponse, Usage
|
||||
from litellm import completion
|
||||
|
||||
|
||||
@pytest.mark.respx
|
||||
def test_completion_nvidia_nim(respx_mock: MockRouter):
|
||||
def test_completion_nvidia_nim():
|
||||
from openai import OpenAI
|
||||
|
||||
litellm.set_verbose = True
|
||||
mock_response = ModelResponse(
|
||||
id="cmpl-mock",
|
||||
choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model="databricks/dbrx-instruct",
|
||||
)
|
||||
model_name = "nvidia_nim/databricks/dbrx-instruct"
|
||||
client = OpenAI(
|
||||
api_key="fake-api-key",
|
||||
)
|
||||
|
||||
mock_request = respx_mock.post(
|
||||
"https://integrate.api.nvidia.com/v1/chat/completions"
|
||||
).mock(return_value=httpx.Response(200, json=mock_response.dict()))
|
||||
try:
|
||||
response = completion(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
||||
}
|
||||
],
|
||||
presence_penalty=0.5,
|
||||
frequency_penalty=0.1,
|
||||
)
|
||||
with patch.object(
|
||||
client.chat.completions.with_raw_response, "create"
|
||||
) as mock_client:
|
||||
try:
|
||||
completion(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
||||
}
|
||||
],
|
||||
presence_penalty=0.5,
|
||||
frequency_penalty=0.1,
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
assert response.choices[0].message.content is not None
|
||||
assert len(response.choices[0].message.content) > 0
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
mock_client.assert_called_once()
|
||||
request_body = mock_client.call_args.kwargs
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
||||
}
|
||||
],
|
||||
"model": "databricks/dbrx-instruct",
|
||||
"frequency_penalty": 0.1,
|
||||
"presence_penalty": 0.5,
|
||||
}
|
||||
except litellm.exceptions.Timeout as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_embedding_nvidia_nim(respx_mock: MockRouter):
|
||||
litellm.set_verbose = True
|
||||
mock_response = EmbeddingResponse(
|
||||
model="nvidia_nim/databricks/dbrx-instruct",
|
||||
data=[
|
||||
assert request_body["messages"] == [
|
||||
{
|
||||
"embedding": [0.1, 0.2, 0.3],
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
usage=Usage(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=0,
|
||||
total_tokens=10,
|
||||
),
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
||||
},
|
||||
]
|
||||
assert request_body["model"] == "databricks/dbrx-instruct"
|
||||
assert request_body["frequency_penalty"] == 0.1
|
||||
assert request_body["presence_penalty"] == 0.5
|
||||
|
||||
|
||||
def test_embedding_nvidia_nim():
|
||||
litellm.set_verbose = True
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
api_key="fake-api-key",
|
||||
)
|
||||
mock_request = respx_mock.post(
|
||||
"https://integrate.api.nvidia.com/v1/embeddings"
|
||||
).mock(return_value=httpx.Response(200, json=mock_response.dict()))
|
||||
response = litellm.embedding(
|
||||
model="nvidia_nim/nvidia/nv-embedqa-e5-v5",
|
||||
input="What is the meaning of life?",
|
||||
input_type="passage",
|
||||
)
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
print("request_body: ", request_body)
|
||||
assert request_body == {
|
||||
"input": "What is the meaning of life?",
|
||||
"model": "nvidia/nv-embedqa-e5-v5",
|
||||
"input_type": "passage",
|
||||
"encoding_format": "base64",
|
||||
}
|
||||
with patch.object(client.embeddings.with_raw_response, "create") as mock_client:
|
||||
try:
|
||||
litellm.embedding(
|
||||
model="nvidia_nim/nvidia/nv-embedqa-e5-v5",
|
||||
input="What is the meaning of life?",
|
||||
input_type="passage",
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
mock_client.assert_called_once()
|
||||
request_body = mock_client.call_args.kwargs
|
||||
print("request_body: ", request_body)
|
||||
assert request_body["input"] == "What is the meaning of life?"
|
||||
assert request_body["model"] == "nvidia/nv-embedqa-e5-v5"
|
||||
assert request_body["extra_body"]["input_type"] == "passage"
|
||||
|
|
|
@ -2,7 +2,7 @@ import json
|
|||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
|
@ -63,8 +63,7 @@ def test_openai_prediction_param():
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
async def test_openai_prediction_param_mock(respx_mock: MockRouter):
|
||||
async def test_openai_prediction_param_mock():
|
||||
"""
|
||||
Tests that prediction parameter is correctly passed to the API
|
||||
"""
|
||||
|
@ -92,60 +91,36 @@ async def test_openai_prediction_param_mock(respx_mock: MockRouter):
|
|||
public string Username { get; set; }
|
||||
}
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
mock_response = ModelResponse(
|
||||
id="chatcmpl-AQ5RmV8GvVSRxEcDxnuXlQnsibiY9",
|
||||
choices=[
|
||||
Choices(
|
||||
message=Message(
|
||||
content=code.replace("Username", "Email").replace(
|
||||
"username", "email"
|
||||
),
|
||||
role="assistant",
|
||||
)
|
||||
client = AsyncOpenAI(api_key="fake-api-key")
|
||||
|
||||
with patch.object(
|
||||
client.chat.completions.with_raw_response, "create"
|
||||
) as mock_client:
|
||||
try:
|
||||
await litellm.acompletion(
|
||||
model="gpt-4o-mini",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
|
||||
},
|
||||
{"role": "user", "content": code},
|
||||
],
|
||||
prediction={"type": "content", "content": code},
|
||||
client=client,
|
||||
)
|
||||
],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model="gpt-4o-mini-2024-07-18",
|
||||
usage={
|
||||
"completion_tokens": 207,
|
||||
"prompt_tokens": 175,
|
||||
"total_tokens": 382,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 80,
|
||||
},
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json=mock_response.dict())
|
||||
)
|
||||
mock_client.assert_called_once()
|
||||
request_body = mock_client.call_args.kwargs
|
||||
|
||||
completion = await litellm.acompletion(
|
||||
model="gpt-4o-mini",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
|
||||
},
|
||||
{"role": "user", "content": code},
|
||||
],
|
||||
prediction={"type": "content", "content": code},
|
||||
)
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
|
||||
# Verify the request contains the prediction parameter
|
||||
assert "prediction" in request_body
|
||||
# verify prediction is correctly sent to the API
|
||||
assert request_body["prediction"] == {"type": "content", "content": code}
|
||||
|
||||
# Verify the completion tokens details
|
||||
assert completion.usage.completion_tokens_details.accepted_prediction_tokens == 0
|
||||
assert completion.usage.completion_tokens_details.rejected_prediction_tokens == 80
|
||||
# Verify the request contains the prediction parameter
|
||||
assert "prediction" in request_body
|
||||
# verify prediction is correctly sent to the API
|
||||
assert request_body["prediction"] == {"type": "content", "content": code}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
@ -223,3 +198,73 @@ async def test_openai_prediction_param_with_caching():
|
|||
)
|
||||
|
||||
assert completion_response_3.id != completion_response_1.id
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_vision_with_custom_model():
|
||||
"""
|
||||
Tests that an OpenAI compatible endpoint when sent an image will receive the image in the request
|
||||
|
||||
"""
|
||||
import base64
|
||||
import requests
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
client = AsyncOpenAI(api_key="fake-api-key")
|
||||
|
||||
litellm.set_verbose = True
|
||||
api_base = "https://my-custom.api.openai.com"
|
||||
|
||||
# Fetch and encode a test image
|
||||
url = "https://dummyimage.com/100/100/fff&text=Test+image"
|
||||
response = requests.get(url)
|
||||
file_data = response.content
|
||||
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||
base64_image = f"data:image/png;base64,{encoded_file}"
|
||||
|
||||
with patch.object(
|
||||
client.chat.completions.with_raw_response, "create"
|
||||
) as mock_client:
|
||||
try:
|
||||
response = await litellm.acompletion(
|
||||
model="openai/my-custom-model",
|
||||
max_tokens=10,
|
||||
api_base=api_base, # use the mock api
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": base64_image},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
mock_client.assert_called_once()
|
||||
request_body = mock_client.call_args.kwargs
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body["messages"] == [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABB0lEQVRYhe2SzWrEIBCAh2A0jxEs4j6GLDS9hqWmV5Flt0cJS+lRwv742DXpEjY1kOZW6HwHFZnPmVEBEARBEARB/jd0KYA/bcUYbPrRLh6amXHJ/K+ypMoyUaGthILzw0l+xI0jsO7ZcmCcm4ILd+QuVYgpHOmDmz6jBeJImdcUCmeBqQpuqRIbVmQsLCrAalrGpfoEqEogqbLTWuXCPCo+Ki1XGqgQ+jVVuhB8bOaHkvmYuzm/b0KYLWwoK58oFqi6XfxQ4Uz7d6WeKpna6ytUs5e8betMcqAv5YPC5EZB2Lm9FIn0/VP6R58+/GEY1X1egVoZ/3bt/EqF6malgSAIgiDIH+QL41409QMY0LMAAAAASUVORK5CYII="
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
assert request_body["model"] == "my-custom-model"
|
||||
assert request_body["max_tokens"] == 10
|
|
@ -2,7 +2,7 @@ import json
|
|||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
|
@ -18,87 +18,75 @@ from litellm import Choices, Message, ModelResponse
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
async def test_o1_handle_system_role(respx_mock: MockRouter):
|
||||
async def test_o1_handle_system_role():
|
||||
"""
|
||||
Tests that:
|
||||
- max_tokens is translated to 'max_completion_tokens'
|
||||
- role 'system' is translated to 'user'
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
mock_response = ModelResponse(
|
||||
id="cmpl-mock",
|
||||
choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model="o1-preview",
|
||||
)
|
||||
client = AsyncOpenAI(api_key="fake-api-key")
|
||||
|
||||
mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json=mock_response.dict())
|
||||
)
|
||||
with patch.object(
|
||||
client.chat.completions.with_raw_response, "create"
|
||||
) as mock_client:
|
||||
try:
|
||||
await litellm.acompletion(
|
||||
model="o1-preview",
|
||||
max_tokens=10,
|
||||
messages=[{"role": "system", "content": "Hello!"}],
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model="o1-preview",
|
||||
max_tokens=10,
|
||||
messages=[{"role": "system", "content": "Hello!"}],
|
||||
)
|
||||
mock_client.assert_called_once()
|
||||
request_body = mock_client.call_args.kwargs
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
print("request_body: ", request_body)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"model": "o1-preview",
|
||||
"max_completion_tokens": 10,
|
||||
"messages": [{"role": "user", "content": "Hello!"}],
|
||||
}
|
||||
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
||||
assert request_body["model"] == "o1-preview"
|
||||
assert request_body["max_completion_tokens"] == 10
|
||||
assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
|
||||
async def test_o1_max_completion_tokens(respx_mock: MockRouter, model: str):
|
||||
async def test_o1_max_completion_tokens(model: str):
|
||||
"""
|
||||
Tests that:
|
||||
- max_completion_tokens is passed directly to OpenAI chat completion models
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
mock_response = ModelResponse(
|
||||
id="cmpl-mock",
|
||||
choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model=model,
|
||||
)
|
||||
client = AsyncOpenAI(api_key="fake-api-key")
|
||||
|
||||
mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json=mock_response.dict())
|
||||
)
|
||||
with patch.object(
|
||||
client.chat.completions.with_raw_response, "create"
|
||||
) as mock_client:
|
||||
try:
|
||||
await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
client=client,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
max_completion_tokens=10,
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
mock_client.assert_called_once()
|
||||
request_body = mock_client.call_args.kwargs
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
print("request_body: ", request_body)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"model": model,
|
||||
"max_completion_tokens": 10,
|
||||
"messages": [{"role": "user", "content": "Hello!"}],
|
||||
}
|
||||
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
||||
assert request_body["model"] == model
|
||||
assert request_body["max_completion_tokens"] == 10
|
||||
assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
|
||||
|
||||
|
||||
def test_litellm_responses():
|
||||
|
|
|
@ -1,94 +0,0 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from respx import MockRouter
|
||||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
@pytest.mark.respx
|
||||
async def test_vision_with_custom_model(respx_mock: MockRouter):
|
||||
"""
|
||||
Tests that an OpenAI compatible endpoint when sent an image will receive the image in the request
|
||||
|
||||
"""
|
||||
import base64
|
||||
import requests
|
||||
|
||||
litellm.set_verbose = True
|
||||
api_base = "https://my-custom.api.openai.com"
|
||||
|
||||
# Fetch and encode a test image
|
||||
url = "https://dummyimage.com/100/100/fff&text=Test+image"
|
||||
response = requests.get(url)
|
||||
file_data = response.content
|
||||
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||
base64_image = f"data:image/png;base64,{encoded_file}"
|
||||
|
||||
mock_response = ModelResponse(
|
||||
id="cmpl-mock",
|
||||
choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
|
||||
created=int(datetime.now().timestamp()),
|
||||
model="my-custom-model",
|
||||
)
|
||||
|
||||
mock_request = respx_mock.post(f"{api_base}/chat/completions").mock(
|
||||
return_value=httpx.Response(200, json=mock_response.dict())
|
||||
)
|
||||
|
||||
response = await litellm.acompletion(
|
||||
model="openai/my-custom-model",
|
||||
max_tokens=10,
|
||||
api_base=api_base, # use the mock api
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": base64_image},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
|
||||
print("request_body: ", request_body)
|
||||
|
||||
assert request_body == {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABB0lEQVRYhe2SzWrEIBCAh2A0jxEs4j6GLDS9hqWmV5Flt0cJS+lRwv742DXpEjY1kOZW6HwHFZnPmVEBEARBEARB/jd0KYA/bcUYbPrRLh6amXHJ/K+ypMoyUaGthILzw0l+xI0jsO7ZcmCcm4ILd+QuVYgpHOmDmz6jBeJImdcUCmeBqQpuqRIbVmQsLCrAalrGpfoEqEogqbLTWuXCPCo+Ki1XGqgQ+jVVuhB8bOaHkvmYuzm/b0KYLWwoK58oFqi6XfxQ4Uz7d6WeKpna6ytUs5e8betMcqAv5YPC5EZB2Lm9FIn0/VP6R58+/GEY1X1egVoZ/3bt/EqF6malgSAIgiDIH+QL41409QMY0LMAAAAASUVORK5CYII="
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
"model": "my-custom-model",
|
||||
"max_tokens": 10,
|
||||
}
|
||||
|
||||
print(f"response: {response}")
|
||||
assert isinstance(response, ModelResponse)
|
|
@ -6,6 +6,7 @@ from unittest.mock import AsyncMock
|
|||
import pytest
|
||||
import httpx
|
||||
from respx import MockRouter
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
|
@ -68,13 +69,16 @@ def test_convert_dict_to_text_completion_response():
|
|||
assert response.choices[0].logprobs.top_logprobs == [None, {",": -2.1568563}]
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="need to migrate huggingface to support httpx client being passed in"
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.respx
|
||||
async def test_huggingface_text_completion_logprobs(respx_mock: MockRouter):
|
||||
async def test_huggingface_text_completion_logprobs():
|
||||
"""Test text completion with Hugging Face, focusing on logprobs structure"""
|
||||
litellm.set_verbose = True
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
|
||||
|
||||
# Mock the raw response from Hugging Face
|
||||
mock_response = [
|
||||
{
|
||||
"generated_text": ",\n\nI have a question...", # truncated for brevity
|
||||
|
@ -91,46 +95,48 @@ async def test_huggingface_text_completion_logprobs(respx_mock: MockRouter):
|
|||
}
|
||||
]
|
||||
|
||||
# Mock the API request
|
||||
mock_request = respx_mock.post(
|
||||
"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
|
||||
).mock(return_value=httpx.Response(200, json=mock_response))
|
||||
return_val = AsyncMock()
|
||||
|
||||
response = await litellm.atext_completion(
|
||||
model="huggingface/mistralai/Mistral-7B-v0.1",
|
||||
prompt="good morning",
|
||||
)
|
||||
return_val.json.return_value = mock_response
|
||||
|
||||
# Verify the request
|
||||
assert mock_request.called
|
||||
request_body = json.loads(mock_request.calls[0].request.content)
|
||||
assert request_body == {
|
||||
"inputs": "good morning",
|
||||
"parameters": {"details": True, "return_full_text": False},
|
||||
"stream": False,
|
||||
}
|
||||
client = AsyncHTTPHandler()
|
||||
with patch.object(client, "post", return_value=return_val) as mock_post:
|
||||
response = await litellm.atext_completion(
|
||||
model="huggingface/mistralai/Mistral-7B-v0.1",
|
||||
prompt="good morning",
|
||||
client=client,
|
||||
)
|
||||
|
||||
print("response=", response)
|
||||
# Verify the request
|
||||
mock_post.assert_called_once()
|
||||
request_body = json.loads(mock_post.call_args.kwargs["data"])
|
||||
assert request_body == {
|
||||
"inputs": "good morning",
|
||||
"parameters": {"details": True, "return_full_text": False},
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
# Verify response structure
|
||||
assert isinstance(response, TextCompletionResponse)
|
||||
assert response.object == "text_completion"
|
||||
assert response.model == "mistralai/Mistral-7B-v0.1"
|
||||
print("response=", response)
|
||||
|
||||
# Verify logprobs structure
|
||||
choice = response.choices[0]
|
||||
assert choice.finish_reason == "length"
|
||||
assert choice.index == 0
|
||||
assert isinstance(choice.logprobs.tokens, list)
|
||||
assert isinstance(choice.logprobs.token_logprobs, list)
|
||||
assert isinstance(choice.logprobs.text_offset, list)
|
||||
assert isinstance(choice.logprobs.top_logprobs, list)
|
||||
assert choice.logprobs.tokens == [",", "\n"]
|
||||
assert choice.logprobs.token_logprobs == [-1.7626953, -1.7314453]
|
||||
assert choice.logprobs.text_offset == [0, 1]
|
||||
assert choice.logprobs.top_logprobs == [{}, {}]
|
||||
# Verify response structure
|
||||
assert isinstance(response, TextCompletionResponse)
|
||||
assert response.object == "text_completion"
|
||||
assert response.model == "mistralai/Mistral-7B-v0.1"
|
||||
|
||||
# Verify usage
|
||||
assert response.usage["completion_tokens"] > 0
|
||||
assert response.usage["prompt_tokens"] > 0
|
||||
assert response.usage["total_tokens"] > 0
|
||||
# Verify logprobs structure
|
||||
choice = response.choices[0]
|
||||
assert choice.finish_reason == "length"
|
||||
assert choice.index == 0
|
||||
assert isinstance(choice.logprobs.tokens, list)
|
||||
assert isinstance(choice.logprobs.token_logprobs, list)
|
||||
assert isinstance(choice.logprobs.text_offset, list)
|
||||
assert isinstance(choice.logprobs.top_logprobs, list)
|
||||
assert choice.logprobs.tokens == [",", "\n"]
|
||||
assert choice.logprobs.token_logprobs == [-1.7626953, -1.7314453]
|
||||
assert choice.logprobs.text_offset == [0, 1]
|
||||
assert choice.logprobs.top_logprobs == [{}, {}]
|
||||
|
||||
# Verify usage
|
||||
assert response.usage["completion_tokens"] > 0
|
||||
assert response.usage["prompt_tokens"] > 0
|
||||
assert response.usage["total_tokens"] > 0
|
||||
|
|
|
@ -1146,6 +1146,21 @@ def test_process_gemini_image():
|
|||
mime_type="image/png", file_uri="https://example.com/image.png"
|
||||
)
|
||||
|
||||
# Test HTTPS VIDEO URL
|
||||
https_result = _process_gemini_image("https://cloud-samples-data/video/animals.mp4")
|
||||
print("https_result PNG", https_result)
|
||||
assert https_result["file_data"] == FileDataType(
|
||||
mime_type="video/mp4", file_uri="https://cloud-samples-data/video/animals.mp4"
|
||||
)
|
||||
|
||||
# Test HTTPS PDF URL
|
||||
https_result = _process_gemini_image("https://cloud-samples-data/pdf/animals.pdf")
|
||||
print("https_result PDF", https_result)
|
||||
assert https_result["file_data"] == FileDataType(
|
||||
mime_type="application/pdf",
|
||||
file_uri="https://cloud-samples-data/pdf/animals.pdf",
|
||||
)
|
||||
|
||||
# Test base64 image
|
||||
base64_image = "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
|
||||
base64_result = _process_gemini_image(base64_image)
|
||||
|
|
|
@ -95,3 +95,107 @@ async def test_handle_failed_db_connection():
|
|||
print("_handle_failed_db_connection_for_get_key_object got exception", exc_info)
|
||||
|
||||
assert str(exc_info.value) == "Failed to connect to DB"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expect_to_work",
|
||||
[("openai/gpt-4o-mini", True), ("openai/gpt-4o", False)],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_can_key_call_model(model, expect_to_work):
|
||||
"""
|
||||
If wildcard model + specific model is used, choose the specific model settings
|
||||
"""
|
||||
from litellm.proxy.auth.auth_checks import can_key_call_model
|
||||
from fastapi import HTTPException
|
||||
|
||||
llm_model_list = [
|
||||
{
|
||||
"model_name": "openai/*",
|
||||
"litellm_params": {
|
||||
"model": "openai/*",
|
||||
"api_key": "test-api-key",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "e6e7006f83029df40ebc02ddd068890253f4cd3092bcb203d3d8e6f6f606f30f",
|
||||
"db_model": False,
|
||||
"access_groups": ["public-openai-models"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "openai/gpt-4o",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-4o",
|
||||
"api_key": "test-api-key",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "0cfcd87f2cb12a783a466888d05c6c89df66db23e01cecd75ec0b83aed73c9ad",
|
||||
"db_model": False,
|
||||
"access_groups": ["private-openai-models"],
|
||||
},
|
||||
},
|
||||
]
|
||||
router = litellm.Router(model_list=llm_model_list)
|
||||
args = {
|
||||
"model": model,
|
||||
"llm_model_list": llm_model_list,
|
||||
"valid_token": UserAPIKeyAuth(
|
||||
models=["public-openai-models"],
|
||||
),
|
||||
"llm_router": router,
|
||||
}
|
||||
if expect_to_work:
|
||||
await can_key_call_model(**args)
|
||||
else:
|
||||
with pytest.raises(Exception) as e:
|
||||
await can_key_call_model(**args)
|
||||
|
||||
print(e)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expect_to_work",
|
||||
[("openai/gpt-4o", False), ("openai/gpt-4o-mini", True)],
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_can_team_call_model(model, expect_to_work):
|
||||
from litellm.proxy.auth.auth_checks import model_in_access_group
|
||||
from fastapi import HTTPException
|
||||
|
||||
llm_model_list = [
|
||||
{
|
||||
"model_name": "openai/*",
|
||||
"litellm_params": {
|
||||
"model": "openai/*",
|
||||
"api_key": "test-api-key",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "e6e7006f83029df40ebc02ddd068890253f4cd3092bcb203d3d8e6f6f606f30f",
|
||||
"db_model": False,
|
||||
"access_groups": ["public-openai-models"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "openai/gpt-4o",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-4o",
|
||||
"api_key": "test-api-key",
|
||||
},
|
||||
"model_info": {
|
||||
"id": "0cfcd87f2cb12a783a466888d05c6c89df66db23e01cecd75ec0b83aed73c9ad",
|
||||
"db_model": False,
|
||||
"access_groups": ["private-openai-models"],
|
||||
},
|
||||
},
|
||||
]
|
||||
router = litellm.Router(model_list=llm_model_list)
|
||||
|
||||
args = {
|
||||
"model": model,
|
||||
"team_models": ["public-openai-models"],
|
||||
"llm_router": router,
|
||||
}
|
||||
if expect_to_work:
|
||||
assert model_in_access_group(**args)
|
||||
else:
|
||||
assert not model_in_access_group(**args)
|
||||
|
|
|
@ -33,7 +33,7 @@ from litellm.router import Router
|
|||
|
||||
@pytest.mark.asyncio()
|
||||
@pytest.mark.respx()
|
||||
async def test_azure_tenant_id_auth(respx_mock: MockRouter):
|
||||
async def test_aaaaazure_tenant_id_auth(respx_mock: MockRouter):
|
||||
"""
|
||||
|
||||
Tests when we set tenant_id, client_id, client_secret they don't get sent with the request
|
||||
|
|
|
@ -1,128 +1,128 @@
|
|||
#### What this tests ####
|
||||
# This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
|
||||
import sys, os, time, inspect, asyncio, traceback
|
||||
from datetime import datetime
|
||||
import pytest
|
||||
# #### What this tests ####
|
||||
# # This adds perf testing to the router, to ensure it's never > 50ms slower than the azure-openai sdk.
|
||||
# import sys, os, time, inspect, asyncio, traceback
|
||||
# from datetime import datetime
|
||||
# import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
import openai, litellm, uuid
|
||||
from openai import AsyncAzureOpenAI
|
||||
# sys.path.insert(0, os.path.abspath("../.."))
|
||||
# import openai, litellm, uuid
|
||||
# from openai import AsyncAzureOpenAI
|
||||
|
||||
client = AsyncAzureOpenAI(
|
||||
api_key=os.getenv("AZURE_API_KEY"),
|
||||
azure_endpoint=os.getenv("AZURE_API_BASE"), # type: ignore
|
||||
api_version=os.getenv("AZURE_API_VERSION"),
|
||||
)
|
||||
# client = AsyncAzureOpenAI(
|
||||
# api_key=os.getenv("AZURE_API_KEY"),
|
||||
# azure_endpoint=os.getenv("AZURE_API_BASE"), # type: ignore
|
||||
# api_version=os.getenv("AZURE_API_VERSION"),
|
||||
# )
|
||||
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "azure-test",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
},
|
||||
}
|
||||
]
|
||||
# model_list = [
|
||||
# {
|
||||
# "model_name": "azure-test",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/chatgpt-v-2",
|
||||
# "api_key": os.getenv("AZURE_API_KEY"),
|
||||
# "api_base": os.getenv("AZURE_API_BASE"),
|
||||
# "api_version": os.getenv("AZURE_API_VERSION"),
|
||||
# },
|
||||
# }
|
||||
# ]
|
||||
|
||||
router = litellm.Router(model_list=model_list) # type: ignore
|
||||
# router = litellm.Router(model_list=model_list) # type: ignore
|
||||
|
||||
|
||||
async def _openai_completion():
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = await client.chat.completions.create(
|
||||
model="chatgpt-v-2",
|
||||
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
stream=True,
|
||||
)
|
||||
time_to_first_token = None
|
||||
first_token_ts = None
|
||||
init_chunk = None
|
||||
async for chunk in response:
|
||||
if (
|
||||
time_to_first_token is None
|
||||
and len(chunk.choices) > 0
|
||||
and chunk.choices[0].delta.content is not None
|
||||
):
|
||||
first_token_ts = time.time()
|
||||
time_to_first_token = first_token_ts - start_time
|
||||
init_chunk = chunk
|
||||
end_time = time.time()
|
||||
print(
|
||||
"OpenAI Call: ",
|
||||
init_chunk,
|
||||
start_time,
|
||||
first_token_ts,
|
||||
time_to_first_token,
|
||||
end_time,
|
||||
)
|
||||
return time_to_first_token
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return None
|
||||
# async def _openai_completion():
|
||||
# try:
|
||||
# start_time = time.time()
|
||||
# response = await client.chat.completions.create(
|
||||
# model="chatgpt-v-2",
|
||||
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
# stream=True,
|
||||
# )
|
||||
# time_to_first_token = None
|
||||
# first_token_ts = None
|
||||
# init_chunk = None
|
||||
# async for chunk in response:
|
||||
# if (
|
||||
# time_to_first_token is None
|
||||
# and len(chunk.choices) > 0
|
||||
# and chunk.choices[0].delta.content is not None
|
||||
# ):
|
||||
# first_token_ts = time.time()
|
||||
# time_to_first_token = first_token_ts - start_time
|
||||
# init_chunk = chunk
|
||||
# end_time = time.time()
|
||||
# print(
|
||||
# "OpenAI Call: ",
|
||||
# init_chunk,
|
||||
# start_time,
|
||||
# first_token_ts,
|
||||
# time_to_first_token,
|
||||
# end_time,
|
||||
# )
|
||||
# return time_to_first_token
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# return None
|
||||
|
||||
|
||||
async def _router_completion():
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = await router.acompletion(
|
||||
model="azure-test",
|
||||
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
stream=True,
|
||||
)
|
||||
time_to_first_token = None
|
||||
first_token_ts = None
|
||||
init_chunk = None
|
||||
async for chunk in response:
|
||||
if (
|
||||
time_to_first_token is None
|
||||
and len(chunk.choices) > 0
|
||||
and chunk.choices[0].delta.content is not None
|
||||
):
|
||||
first_token_ts = time.time()
|
||||
time_to_first_token = first_token_ts - start_time
|
||||
init_chunk = chunk
|
||||
end_time = time.time()
|
||||
print(
|
||||
"Router Call: ",
|
||||
init_chunk,
|
||||
start_time,
|
||||
first_token_ts,
|
||||
time_to_first_token,
|
||||
end_time - first_token_ts,
|
||||
)
|
||||
return time_to_first_token
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return None
|
||||
# async def _router_completion():
|
||||
# try:
|
||||
# start_time = time.time()
|
||||
# response = await router.acompletion(
|
||||
# model="azure-test",
|
||||
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
|
||||
# stream=True,
|
||||
# )
|
||||
# time_to_first_token = None
|
||||
# first_token_ts = None
|
||||
# init_chunk = None
|
||||
# async for chunk in response:
|
||||
# if (
|
||||
# time_to_first_token is None
|
||||
# and len(chunk.choices) > 0
|
||||
# and chunk.choices[0].delta.content is not None
|
||||
# ):
|
||||
# first_token_ts = time.time()
|
||||
# time_to_first_token = first_token_ts - start_time
|
||||
# init_chunk = chunk
|
||||
# end_time = time.time()
|
||||
# print(
|
||||
# "Router Call: ",
|
||||
# init_chunk,
|
||||
# start_time,
|
||||
# first_token_ts,
|
||||
# time_to_first_token,
|
||||
# end_time - first_token_ts,
|
||||
# )
|
||||
# return time_to_first_token
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# return None
|
||||
|
||||
|
||||
async def test_azure_completion_streaming():
|
||||
"""
|
||||
Test azure streaming call - measure on time to first (non-null) token.
|
||||
"""
|
||||
n = 3 # Number of concurrent tasks
|
||||
## OPENAI AVG. TIME
|
||||
tasks = [_openai_completion() for _ in range(n)]
|
||||
chat_completions = await asyncio.gather(*tasks)
|
||||
successful_completions = [c for c in chat_completions if c is not None]
|
||||
total_time = 0
|
||||
for item in successful_completions:
|
||||
total_time += item
|
||||
avg_openai_time = total_time / 3
|
||||
## ROUTER AVG. TIME
|
||||
tasks = [_router_completion() for _ in range(n)]
|
||||
chat_completions = await asyncio.gather(*tasks)
|
||||
successful_completions = [c for c in chat_completions if c is not None]
|
||||
total_time = 0
|
||||
for item in successful_completions:
|
||||
total_time += item
|
||||
avg_router_time = total_time / 3
|
||||
## COMPARE
|
||||
print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
|
||||
assert avg_router_time < avg_openai_time + 0.5
|
||||
# async def test_azure_completion_streaming():
|
||||
# """
|
||||
# Test azure streaming call - measure on time to first (non-null) token.
|
||||
# """
|
||||
# n = 3 # Number of concurrent tasks
|
||||
# ## OPENAI AVG. TIME
|
||||
# tasks = [_openai_completion() for _ in range(n)]
|
||||
# chat_completions = await asyncio.gather(*tasks)
|
||||
# successful_completions = [c for c in chat_completions if c is not None]
|
||||
# total_time = 0
|
||||
# for item in successful_completions:
|
||||
# total_time += item
|
||||
# avg_openai_time = total_time / 3
|
||||
# ## ROUTER AVG. TIME
|
||||
# tasks = [_router_completion() for _ in range(n)]
|
||||
# chat_completions = await asyncio.gather(*tasks)
|
||||
# successful_completions = [c for c in chat_completions if c is not None]
|
||||
# total_time = 0
|
||||
# for item in successful_completions:
|
||||
# total_time += item
|
||||
# avg_router_time = total_time / 3
|
||||
# ## COMPARE
|
||||
# print(f"avg_router_time: {avg_router_time}; avg_openai_time: {avg_openai_time}")
|
||||
# assert avg_router_time < avg_openai_time + 0.5
|
||||
|
||||
|
||||
# asyncio.run(test_azure_completion_streaming())
|
||||
# # asyncio.run(test_azure_completion_streaming())
|
||||
|
|
|
@ -1146,7 +1146,9 @@ async def test_exception_with_headers_httpx(
|
|||
|
||||
except litellm.RateLimitError as e:
|
||||
exception_raised = True
|
||||
assert e.litellm_response_headers is not None
|
||||
assert (
|
||||
e.litellm_response_headers is not None
|
||||
), "litellm_response_headers is None"
|
||||
print("e.litellm_response_headers", e.litellm_response_headers)
|
||||
assert int(e.litellm_response_headers["retry-after"]) == cooldown_time
|
||||
|
||||
|
|
|
@ -102,3 +102,17 @@ def test_get_model_info_ollama_chat():
|
|||
print(mock_client.call_args.kwargs)
|
||||
|
||||
assert mock_client.call_args.kwargs["json"]["name"] == "mistral"
|
||||
|
||||
|
||||
def test_get_model_info_gemini():
|
||||
"""
|
||||
Tests if ALL gemini models have 'tpm' and 'rpm' in the model info
|
||||
"""
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
model_map = litellm.model_cost
|
||||
for model, info in model_map.items():
|
||||
if model.startswith("gemini/") and not "gemma" in model:
|
||||
assert info.get("tpm") is not None, f"{model} does not have tpm"
|
||||
assert info.get("rpm") is not None, f"{model} does not have rpm"
|
||||
|
|
79
tests/local_testing/test_http_parsing_utils.py
Normal file
79
tests/local_testing/test_http_parsing_utils.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
import pytest
|
||||
from fastapi import Request
|
||||
from fastapi.testclient import TestClient
|
||||
from starlette.datastructures import Headers
|
||||
from starlette.requests import HTTPConnection
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_request_body_valid_json():
|
||||
"""Test the function with a valid JSON payload."""
|
||||
|
||||
class MockRequest:
|
||||
async def body(self):
|
||||
return b'{"key": "value"}'
|
||||
|
||||
request = MockRequest()
|
||||
result = await _read_request_body(request)
|
||||
assert result == {"key": "value"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_request_body_empty_body():
|
||||
"""Test the function with an empty body."""
|
||||
|
||||
class MockRequest:
|
||||
async def body(self):
|
||||
return b""
|
||||
|
||||
request = MockRequest()
|
||||
result = await _read_request_body(request)
|
||||
assert result == {}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_request_body_invalid_json():
|
||||
"""Test the function with an invalid JSON payload."""
|
||||
|
||||
class MockRequest:
|
||||
async def body(self):
|
||||
return b'{"key": value}' # Missing quotes around `value`
|
||||
|
||||
request = MockRequest()
|
||||
result = await _read_request_body(request)
|
||||
assert result == {} # Should return an empty dict on failure
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_request_body_large_payload():
|
||||
"""Test the function with a very large payload."""
|
||||
large_payload = '{"key":' + '"a"' * 10**6 + "}" # Large payload
|
||||
|
||||
class MockRequest:
|
||||
async def body(self):
|
||||
return large_payload.encode()
|
||||
|
||||
request = MockRequest()
|
||||
result = await _read_request_body(request)
|
||||
assert result == {} # Large payloads could trigger errors, so validate behavior
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_request_body_unexpected_error():
|
||||
"""Test the function when an unexpected error occurs."""
|
||||
|
||||
class MockRequest:
|
||||
async def body(self):
|
||||
raise ValueError("Unexpected error")
|
||||
|
||||
request = MockRequest()
|
||||
result = await _read_request_body(request)
|
||||
assert result == {} # Ensure fallback behavior
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue