mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 10:14:26 +00:00
Merge branch 'main' into fix/health-check-interval
This commit is contained in:
commit
322c7cd353
136 changed files with 5845 additions and 3096 deletions
|
@ -119,7 +119,7 @@ jobs:
|
|||
paths:
|
||||
- local_testing_coverage.xml
|
||||
- local_testing_coverage
|
||||
ui_endpoint_testing:
|
||||
auth_ui_unit_tests:
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
auth:
|
||||
|
@ -161,8 +161,8 @@ jobs:
|
|||
- run:
|
||||
name: Rename the coverage files
|
||||
command: |
|
||||
mv coverage.xml ui_endpoint_testing_coverage.xml
|
||||
mv .coverage ui_endpoint_testing_coverage
|
||||
mv coverage.xml auth_ui_unit_tests_coverage.xml
|
||||
mv .coverage auth_ui_unit_tests_coverage
|
||||
|
||||
# Store test results
|
||||
- store_test_results:
|
||||
|
@ -171,8 +171,8 @@ jobs:
|
|||
- persist_to_workspace:
|
||||
root: .
|
||||
paths:
|
||||
- ui_endpoint_testing_coverage.xml
|
||||
- ui_endpoint_testing_coverage
|
||||
- auth_ui_unit_tests_coverage.xml
|
||||
- auth_ui_unit_tests_coverage
|
||||
litellm_router_testing: # Runs all tests with the "router" keyword
|
||||
docker:
|
||||
- image: cimg/python:3.11
|
||||
|
@ -416,15 +416,17 @@ jobs:
|
|||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install ruff
|
||||
pip install pylint
|
||||
pip install pylint
|
||||
pip install pyright
|
||||
pip install .
|
||||
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
||||
- run: ruff check ./litellm
|
||||
- run: python ./tests/documentation_tests/test_general_setting_keys.py
|
||||
- run: python ./tests/code_coverage_tests/router_code_coverage.py
|
||||
- run: python ./tests/documentation_tests/test_env_keys.py
|
||||
|
||||
- run: helm lint ./deploy/charts/litellm-helm
|
||||
|
||||
db_migration_disable_update_check:
|
||||
machine:
|
||||
image: ubuntu-2204:2023.10.1
|
||||
|
@ -811,7 +813,7 @@ jobs:
|
|||
python -m venv venv
|
||||
. venv/bin/activate
|
||||
pip install coverage
|
||||
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage ui_endpoint_testing_coverage
|
||||
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage
|
||||
coverage xml
|
||||
- codecov/upload:
|
||||
file: ./coverage.xml
|
||||
|
@ -1011,7 +1013,7 @@ workflows:
|
|||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- ui_endpoint_testing:
|
||||
- auth_ui_unit_tests:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
|
@ -1060,7 +1062,7 @@ workflows:
|
|||
- litellm_router_testing
|
||||
- local_testing
|
||||
- litellm_assistants_api_testing
|
||||
- ui_endpoint_testing
|
||||
- auth_ui_unit_tests
|
||||
- db_migration_disable_update_check:
|
||||
filters:
|
||||
branches:
|
||||
|
@ -1088,7 +1090,7 @@ workflows:
|
|||
- logging_testing
|
||||
- litellm_router_testing
|
||||
- litellm_assistants_api_testing
|
||||
- ui_endpoint_testing
|
||||
- auth_ui_unit_tests
|
||||
- db_migration_disable_update_check
|
||||
- e2e_ui_testing
|
||||
- installing_litellm_on_python
|
||||
|
@ -1099,4 +1101,4 @@ workflows:
|
|||
branches:
|
||||
only:
|
||||
- main
|
||||
|
||||
|
||||
|
|
5
.github/workflows/ghcr_helm_deploy.yml
vendored
5
.github/workflows/ghcr_helm_deploy.yml
vendored
|
@ -50,6 +50,9 @@ jobs:
|
|||
current-version: ${{ steps.current_version.outputs.current-version || '0.1.0' }}
|
||||
version-fragment: 'bug'
|
||||
|
||||
- name: Lint helm chart
|
||||
run: helm lint deploy/charts/litellm-helm
|
||||
|
||||
- uses: ./.github/actions/helm-oci-chart-releaser
|
||||
with:
|
||||
name: litellm-helm
|
||||
|
@ -61,4 +64,4 @@ jobs:
|
|||
registry_username: ${{ github.actor }}
|
||||
registry_password: ${{ secrets.GITHUB_TOKEN }}
|
||||
update_dependencies: true
|
||||
|
||||
|
||||
|
|
13
codecov.yaml
13
codecov.yaml
|
@ -18,4 +18,15 @@ component_management:
|
|||
paths:
|
||||
- "*/proxy/auth/**"
|
||||
comment:
|
||||
layout: "header, diff, flags, components" # show component info in the PR comment
|
||||
layout: "header, diff, flags, components" # show component info in the PR comment
|
||||
|
||||
coverage:
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
target: auto
|
||||
threshold: 1% # at maximum allow project coverage to drop by 1%
|
||||
patch:
|
||||
default:
|
||||
target: auto
|
||||
threshold: 0% # patch coverage should be 100%
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
import clickhouse_connect
|
||||
import datetime as datetime
|
||||
import os
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=os.getenv("CLICKHOUSE_HOST"),
|
||||
port=int(os.getenv("CLICKHOUSE_PORT")),
|
||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
||||
)
|
||||
import clickhouse_connect
|
||||
|
||||
row1 = [
|
||||
"ishaan", # request_id
|
||||
"GET", # call_type
|
||||
"api_key_123", # api_key
|
||||
50.00, # spend
|
||||
1000, # total_tokens
|
||||
800, # prompt_tokens
|
||||
200, # completion_tokens
|
||||
datetime.datetime.now(), # startTime (replace with the actual timestamp)
|
||||
datetime.datetime.now(), # endTime (replace with the actual timestamp)
|
||||
"gpt-3.5", # model
|
||||
"user123", # user
|
||||
'{"key": "value"}', # metadata (replace with valid JSON)
|
||||
"True", # cache_hit
|
||||
"cache_key_123", # cache_key
|
||||
"tag1,tag2", # request_tags
|
||||
]
|
||||
|
||||
row2 = [
|
||||
"jaffer", # request_id
|
||||
"POST", # call_type
|
||||
"api_key_456", # api_key
|
||||
30.50, # spend
|
||||
800, # total_tokens
|
||||
600, # prompt_tokens
|
||||
200, # completion_tokens
|
||||
datetime.datetime.now(), # startTime (replace with the actual timestamp)
|
||||
datetime.datetime.now(), # endTime (replace with the actual timestamp)
|
||||
"gpt-4.0", # model
|
||||
"user456", # user
|
||||
'{"key": "value"}', # metadata (replace with valid JSON)
|
||||
"False", # cache_hit
|
||||
"cache_key_789", # cache_key
|
||||
"tag3,tag4", # request_tags
|
||||
]
|
||||
|
||||
data = [row1, row2]
|
||||
resp = client.insert(
|
||||
"spend_logs",
|
||||
data,
|
||||
column_names=[
|
||||
"request_id",
|
||||
"call_type",
|
||||
"api_key",
|
||||
"spend",
|
||||
"total_tokens",
|
||||
"prompt_tokens",
|
||||
"completion_tokens",
|
||||
"startTime",
|
||||
"endTime",
|
||||
"model",
|
||||
"user",
|
||||
"metadata",
|
||||
"cache_hit",
|
||||
"cache_key",
|
||||
"request_tags",
|
||||
],
|
||||
)
|
||||
|
||||
print(resp)
|
|
@ -1,39 +0,0 @@
|
|||
# insert data into clickhouse
|
||||
# response = client.command(
|
||||
# """
|
||||
# CREATE TEMPORARY TABLE temp_spend_logs AS (
|
||||
# SELECT
|
||||
# generateUUIDv4() AS request_id,
|
||||
# arrayElement(['TypeA', 'TypeB', 'TypeC'], rand() % 3 + 1) AS call_type,
|
||||
# 'ishaan' as api_key,
|
||||
# rand() * 1000 AS spend,
|
||||
# rand() * 100 AS total_tokens,
|
||||
# rand() * 50 AS prompt_tokens,
|
||||
# rand() * 50 AS completion_tokens,
|
||||
# toDate('2024-02-01') + toIntervalDay(rand()%27) AS startTime,
|
||||
# now() AS endTime,
|
||||
# arrayElement(['azure/gpt-4', 'gpt-3.5', 'vertexai/gemini-pro', 'mistral/mistral-small', 'ollama/llama2'], rand() % 3 + 1) AS model,
|
||||
# 'ishaan-insert-rand' as user,
|
||||
# 'data' as metadata,
|
||||
# 'true'AS cache_hit,
|
||||
# 'ishaan' as cache_key,
|
||||
# '{"tag1": "value1", "tag2": "value2"}' AS request_tags
|
||||
# FROM numbers(1, 1000000)
|
||||
# );
|
||||
# """
|
||||
# )
|
||||
|
||||
# client.command(
|
||||
# """
|
||||
# -- Insert data into spend_logs table
|
||||
# INSERT INTO spend_logs
|
||||
# SELECT * FROM temp_spend_logs;
|
||||
# """
|
||||
# )
|
||||
|
||||
|
||||
# client.command(
|
||||
# """
|
||||
# DROP TABLE IF EXISTS temp_spend_logs;
|
||||
# """
|
||||
# )
|
|
@ -24,7 +24,7 @@ version: 0.3.0
|
|||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: v1.46.6
|
||||
appVersion: v1.50.2
|
||||
|
||||
dependencies:
|
||||
- name: "postgresql"
|
||||
|
|
|
@ -28,14 +28,13 @@ If `db.useStackgresOperator` is used (not yet implemented):
|
|||
| `image.repository` | LiteLLM Proxy image repository | `ghcr.io/berriai/litellm` |
|
||||
| `image.pullPolicy` | LiteLLM Proxy image pull policy | `IfNotPresent` |
|
||||
| `image.tag` | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published. | `""` |
|
||||
| `image.dbReadyImage` | On Pod startup, an initContainer is used to make sure the Postgres database is available before attempting to start LiteLLM. This field specifies the image to use as that initContainer. | `docker.io/bitnami/postgresql` |
|
||||
| `image.dbReadyTag` | Tag for the above image. If not specified, "latest" is used. | `""` |
|
||||
| `imagePullSecrets` | Registry credentials for the LiteLLM and initContainer images. | `[]` |
|
||||
| `serviceAccount.create` | Whether or not to create a Kubernetes Service Account for this deployment. The default is `false` because LiteLLM has no need to access the Kubernetes API. | `false` |
|
||||
| `service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` |
|
||||
| `service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the proxy will listen on. | `4000` |
|
||||
| `ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A |
|
||||
| `proxy_config.*` | See [values.yaml](./values.yaml) for default settings. See [example_config_yaml](../../../litellm/proxy/example_config_yaml/) for configuration examples. | N/A |
|
||||
| `extraContainers[]` | An array of additional containers to be deployed as sidecars alongside the LiteLLM Proxy. | `[]` |
|
||||
|
||||
#### Example `environmentSecrets` Secret
|
||||
|
||||
|
@ -127,4 +126,4 @@ kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.ma
|
|||
At the time of writing, the Admin UI is unable to add models. This is because
|
||||
it would need to update the `config.yaml` file which is a exposed ConfigMap, and
|
||||
therefore, read-only. This is a limitation of this helm chart, not the Admin UI
|
||||
itself.
|
||||
itself.
|
||||
|
|
|
@ -31,71 +31,6 @@ spec:
|
|||
serviceAccountName: {{ include "litellm.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
||||
initContainers:
|
||||
- name: db-ready
|
||||
securityContext:
|
||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
||||
image: "{{ .Values.image.dbReadyImage }}:{{ .Values.image.dbReadyTag | default("16.1.0-debian-11-r20") }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
env:
|
||||
{{- if .Values.db.deployStandalone }}
|
||||
- name: DATABASE_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||
key: username
|
||||
- name: PGPASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||
key: password
|
||||
- name: DATABASE_HOST
|
||||
value: {{ .Release.Name }}-postgresql
|
||||
- name: DATABASE_NAME
|
||||
value: litellm
|
||||
{{- else if .Values.db.useExisting }}
|
||||
- name: DATABASE_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ .Values.db.secret.name }}
|
||||
key: {{ .Values.db.secret.usernameKey }}
|
||||
- name: PGPASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ .Values.db.secret.name }}
|
||||
key: {{ .Values.db.secret.passwordKey }}
|
||||
- name: DATABASE_HOST
|
||||
value: {{ .Values.db.endpoint }}
|
||||
- name: DATABASE_NAME
|
||||
value: {{ .Values.db.database }}
|
||||
{{- end }}
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
# Maximum wait time will be (limit * 2) seconds.
|
||||
limit=60
|
||||
current=0
|
||||
ret=1
|
||||
while [ $current -lt $limit ] && [ $ret -ne 0 ]; do
|
||||
echo "Waiting for database to be ready $current"
|
||||
psql -U $(DATABASE_USERNAME) -h $(DATABASE_HOST) -l
|
||||
ret=$?
|
||||
current=$(( $current + 1 ))
|
||||
sleep 2
|
||||
done
|
||||
if [ $ret -eq 0 ]; then
|
||||
echo "Database is ready"
|
||||
else
|
||||
echo "Database failed to become ready before we gave up waiting."
|
||||
fi
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
{{ if .Values.securityContext.readOnlyRootFilesystem }}
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
{{ end }}
|
||||
containers:
|
||||
- name: {{ include "litellm.name" . }}
|
||||
securityContext:
|
||||
|
@ -203,6 +138,9 @@ spec:
|
|||
{{- with .Values.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.extraContainers }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
{{ if .Values.securityContext.readOnlyRootFilesystem }}
|
||||
- name: tmp
|
||||
|
@ -235,4 +173,4 @@ spec:
|
|||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
|
|
@ -7,16 +7,11 @@ replicaCount: 1
|
|||
image:
|
||||
# Use "ghcr.io/berriai/litellm-database" for optimized image with database
|
||||
repository: ghcr.io/berriai/litellm-database
|
||||
pullPolicy: IfNotPresent
|
||||
pullPolicy: Always
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
# tag: "main-latest"
|
||||
tag: ""
|
||||
|
||||
# Image and tag used for the init container to check and wait for the
|
||||
# readiness of the postgres database.
|
||||
dbReadyImage: docker.io/bitnami/postgresql
|
||||
dbReadyTag: ""
|
||||
|
||||
imagePullSecrets: []
|
||||
nameOverride: "litellm"
|
||||
fullnameOverride: ""
|
||||
|
|
|
@ -84,6 +84,60 @@ print(query_result[:5])
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## Image Embeddings
|
||||
|
||||
For models that support image embeddings, you can pass in a base64 encoded image string to the `input` param.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import embedding
|
||||
import os
|
||||
|
||||
# set your api key
|
||||
os.environ["COHERE_API_KEY"] = ""
|
||||
|
||||
response = embedding(model="cohere/embed-english-v3.0", input=["<base64 encoded image>"])
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: cohere-embed
|
||||
litellm_params:
|
||||
model: cohere/embed-english-v3.0
|
||||
api_key: os.environ/COHERE_API_KEY
|
||||
```
|
||||
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
|
||||
# RUNNING on http://0.0.0.0:4000
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://0.0.0.0:4000/v1/embeddings' \
|
||||
-H 'Authorization: Bearer sk-54d77cd67b9febbb' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"model": "cohere/embed-english-v3.0",
|
||||
"input": ["<base64 encoded image>"]
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Input Params for `litellm.embedding()`
|
||||
|
||||
|
||||
|
|
|
@ -62,7 +62,8 @@ litellm_settings:
|
|||
environment_variables:
|
||||
ARIZE_SPACE_KEY: "d0*****"
|
||||
ARIZE_API_KEY: "141a****"
|
||||
ARIZE_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize api endpoint
|
||||
ARIZE_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize GRPC api endpoint
|
||||
ARIZE_HTTP_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize HTTP api endpoint. Set either this or ARIZE_ENDPOINT
|
||||
```
|
||||
|
||||
## Support & Talk to Founders
|
||||
|
|
|
@ -9,12 +9,11 @@ LiteLLM requires `boto3` to be installed on your system for Bedrock requests
|
|||
pip install boto3>=1.28.57
|
||||
```
|
||||
|
||||
## Required Environment Variables
|
||||
```python
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = "" # Access key
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = "" # Secret access key
|
||||
os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
|
||||
```
|
||||
:::info
|
||||
|
||||
LiteLLM uses boto3 to handle authentication. All these options are supported - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#credentials.
|
||||
|
||||
:::
|
||||
|
||||
## Usage
|
||||
|
||||
|
@ -22,6 +21,7 @@ os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
|
|||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||
</a>
|
||||
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import completion
|
||||
|
@ -38,7 +38,7 @@ response = completion(
|
|||
|
||||
## LiteLLM Proxy Usage
|
||||
|
||||
Here's how to call Anthropic with the LiteLLM Proxy Server
|
||||
Here's how to call Bedrock with the LiteLLM Proxy Server
|
||||
|
||||
### 1. Setup config.yaml
|
||||
|
||||
|
|
|
@ -135,7 +135,7 @@ Cli arguments, --host, --port, --num_workers
|
|||
```
|
||||
|
||||
## --request_timeout
|
||||
- **Default:** `600`
|
||||
- **Default:** `6000`
|
||||
- **Type:** `int`
|
||||
- Set the timeout in seconds for completion calls.
|
||||
- **Usage:**
|
||||
|
|
|
@ -625,6 +625,7 @@ litellm_settings:
|
|||
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
||||
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
|
||||
|
||||
request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
|
||||
|
||||
set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
|
||||
json_logs: boolean # if true, logs will be in json format
|
||||
|
@ -721,6 +722,7 @@ general_settings:
|
|||
| set_verbose | boolean | If true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION |
|
||||
| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
|
||||
| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
|
||||
| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) |
|
||||
| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
|
||||
| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
|
||||
| cache | boolean | If true, enables caching. [Further docs](./caching) |
|
||||
|
@ -812,6 +814,7 @@ general_settings:
|
|||
| pass_through_endpoints | List[Dict[str, Any]] | Define the pass through endpoints. [Docs](./pass_through) |
|
||||
| enable_oauth2_proxy_auth | boolean | (Enterprise Feature) If true, enables oauth2.0 authentication |
|
||||
| forward_openai_org_id | boolean | If true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI). |
|
||||
| forward_client_headers_to_llm_api | boolean | If true, forwards the client headers (any `x-` headers) to the backend LLM call |
|
||||
|
||||
### router_settings - Reference
|
||||
|
||||
|
@ -898,10 +901,6 @@ router_settings:
|
|||
| BRAINTRUST_API_KEY | API key for Braintrust integration
|
||||
| CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI
|
||||
| CIRCLE_OIDC_TOKEN_V2 | Version 2 of the OpenID Connect token for CircleCI
|
||||
| CLICKHOUSE_HOST | Host for ClickHouse database
|
||||
| CLICKHOUSE_PASSWORD | Password for ClickHouse authentication
|
||||
| CLICKHOUSE_PORT | Port for ClickHouse database connection
|
||||
| CLICKHOUSE_USERNAME | Username for ClickHouse authentication
|
||||
| CONFIG_FILE_PATH | File path for configuration file
|
||||
| CUSTOM_TIKTOKEN_CACHE_DIR | Custom directory for Tiktoken cache
|
||||
| DATABASE_HOST | Hostname for the database server
|
||||
|
@ -919,6 +918,7 @@ router_settings:
|
|||
| DD_API_KEY | API key for Datadog integration
|
||||
| DD_SITE | Site URL for Datadog (e.g., datadoghq.com)
|
||||
| DD_SOURCE | Source identifier for Datadog logs
|
||||
| DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback
|
||||
| DEBUG_OTEL | Enable debug mode for OpenTelemetry
|
||||
| DIRECT_URL | Direct URL for service endpoint
|
||||
| DISABLE_ADMIN_UI | Toggle to disable the admin UI
|
||||
|
|
|
@ -57,4 +57,34 @@ model_list:
|
|||
api_version: os.envrion/AZURE_API_VERSION
|
||||
input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
|
||||
output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
|
||||
```
|
||||
```
|
||||
|
||||
### Debugging
|
||||
|
||||
If you're custom pricing is not being used or you're seeing errors, please check the following:
|
||||
|
||||
1. Run the proxy with `LITELLM_LOG="DEBUG"` or the `--detailed_debug` cli flag
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml --detailed_debug
|
||||
```
|
||||
|
||||
2. Check logs for this line:
|
||||
|
||||
```
|
||||
LiteLLM:DEBUG: utils.py:263 - litellm.acompletion
|
||||
```
|
||||
|
||||
3. Check if 'input_cost_per_token' and 'output_cost_per_token' are top-level keys in the acompletion function.
|
||||
|
||||
```bash
|
||||
acompletion(
|
||||
...,
|
||||
input_cost_per_token: my-custom-price,
|
||||
output_cost_per_token: my-custom-price,
|
||||
)
|
||||
```
|
||||
|
||||
If these keys are not present, LiteLLM will not use your custom pricing.
|
||||
|
||||
If the problem persists, please file an issue on [GitHub](https://github.com/BerriAI/litellm/issues).
|
|
@ -1279,7 +1279,8 @@ litellm_settings:
|
|||
environment_variables:
|
||||
ARIZE_SPACE_KEY: "d0*****"
|
||||
ARIZE_API_KEY: "141a****"
|
||||
ARIZE_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize api endpoint
|
||||
ARIZE_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize GRPC api endpoint
|
||||
ARIZE_HTTP_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize HTTP api endpoint. Set either this or ARIZE_ENDPOINT
|
||||
```
|
||||
|
||||
2. Start Proxy
|
||||
|
@ -1467,6 +1468,13 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
|
||||
## Logging Proxy Input/Output - DataDog
|
||||
|
||||
LiteLLM Supports logging to the following Datdog Integrations:
|
||||
- `datadog` [Datadog Logs](https://docs.datadoghq.com/logs/)
|
||||
- `datadog_llm_observability` [Datadog LLM Observability](https://www.datadoghq.com/product/llm-observability/)
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="datadog" label="Datadog Logs">
|
||||
|
||||
We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog
|
||||
|
||||
**Step 1**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||
|
@ -1481,6 +1489,21 @@ litellm_settings:
|
|||
service_callback: ["datadog"] # logs redis, postgres failures on datadog
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="datadog_llm_observability" label="Datadog LLM Observability">
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
litellm_settings:
|
||||
callbacks: ["datadog_llm_observability"] # logs llm success logs on datadog
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
**Step 2**: Set Required env variables for datadog
|
||||
|
||||
```shell
|
||||
|
|
|
@ -21,6 +21,7 @@ general_settings:
|
|||
database_connection_pool_limit: 10 # limit the number of database connections to = MAX Number of DB Connections/Number of instances of litellm proxy (Around 10-20 is good number)
|
||||
|
||||
litellm_settings:
|
||||
request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
|
||||
set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
|
||||
json_logs: true # Get debug logs in json format
|
||||
```
|
||||
|
|
|
@ -83,4 +83,21 @@ ws.on("message", function incoming(message) {
|
|||
ws.on("error", function handleError(error) {
|
||||
console.error("Error: ", error);
|
||||
});
|
||||
```
|
||||
```
|
||||
|
||||
## Logging
|
||||
|
||||
To prevent requests from being dropped, by default LiteLLM just logs these event types:
|
||||
|
||||
- `session.created`
|
||||
- `response.create`
|
||||
- `response.done`
|
||||
|
||||
You can override this by setting the `logged_real_time_event_types` parameter in the config. For example:
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
logged_real_time_event_types: "*" # Log all events
|
||||
## OR ##
|
||||
logged_real_time_event_types: ["session.created", "response.create", "response.done"] # Log only these event types
|
||||
```
|
||||
|
|
|
@ -1312,7 +1312,7 @@ LiteLLM proxy adds **0.00325 seconds** latency as compared to using the Raw Open
|
|||
```
|
||||
|
||||
#### --request_timeout
|
||||
- **Default:** `600`
|
||||
- **Default:** `6000`
|
||||
- **Type:** `int`
|
||||
- Set the timeout in seconds for completion calls.
|
||||
- **Usage:**
|
||||
|
|
6
docs/my-website/package-lock.json
generated
6
docs/my-website/package-lock.json
generated
|
@ -12447,9 +12447,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/http-proxy-middleware": {
|
||||
"version": "2.0.6",
|
||||
"resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.6.tgz",
|
||||
"integrity": "sha512-ya/UeJ6HVBYxrgYotAZo1KvPWlgB48kUJLDePFeneHsVujFaW5WNj2NgWCAE//B1Dl02BIfYlpNgBy8Kf8Rjmw==",
|
||||
"version": "2.0.7",
|
||||
"resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.7.tgz",
|
||||
"integrity": "sha512-fgVY8AV7qU7z/MmXJ/rxwbrtQH4jBQ9m7kp3llF0liB7glmFeVZFBepQb32T3y8n8k2+AEYuMPCpinYW+/CuRA==",
|
||||
"dependencies": {
|
||||
"@types/http-proxy": "^1.17.8",
|
||||
"http-proxy": "^1.18.1",
|
||||
|
|
|
@ -8,6 +8,7 @@ import os
|
|||
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache
|
||||
from litellm.types.llms.bedrock import COHERE_EMBEDDING_INPUT_TYPES
|
||||
from litellm._logging import (
|
||||
set_verbose,
|
||||
_turn_on_debug,
|
||||
|
@ -48,6 +49,7 @@ _custom_logger_compatible_callbacks_literal = Literal[
|
|||
"langsmith",
|
||||
"prometheus",
|
||||
"datadog",
|
||||
"datadog_llm_observability",
|
||||
"galileo",
|
||||
"braintrust",
|
||||
"arize",
|
||||
|
@ -56,6 +58,7 @@ _custom_logger_compatible_callbacks_literal = Literal[
|
|||
"opik",
|
||||
"argilla",
|
||||
]
|
||||
logged_real_time_event_types: Optional[Union[List[str], Literal["*"]]] = None
|
||||
_known_custom_logger_compatible_callbacks: List = list(
|
||||
get_args(_custom_logger_compatible_callbacks_literal)
|
||||
)
|
||||
|
@ -79,6 +82,9 @@ turn_off_message_logging: Optional[bool] = False
|
|||
log_raw_request_response: bool = False
|
||||
redact_messages_in_exceptions: Optional[bool] = False
|
||||
redact_user_api_key_info: Optional[bool] = False
|
||||
add_user_information_to_llm_headers: Optional[bool] = (
|
||||
None # adds user_id, team_id, token hash (params from StandardLoggingMetadata) to request headers
|
||||
)
|
||||
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||
## end of callbacks #############
|
||||
|
||||
|
@ -132,7 +138,7 @@ enable_azure_ad_token_refresh: Optional[bool] = False
|
|||
### DEFAULT AZURE API VERSION ###
|
||||
AZURE_DEFAULT_API_VERSION = "2024-08-01-preview" # this is updated to the latest
|
||||
### COHERE EMBEDDINGS DEFAULT TYPE ###
|
||||
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document"
|
||||
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE: COHERE_EMBEDDING_INPUT_TYPES = "search_document"
|
||||
### GUARDRAILS ###
|
||||
llamaguard_model_name: Optional[str] = None
|
||||
openai_moderations_model_name: Optional[str] = None
|
||||
|
@ -159,9 +165,6 @@ enable_caching_on_provider_specific_optional_params: bool = (
|
|||
caching: bool = (
|
||||
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
always_read_redis: bool = (
|
||||
True # always use redis for rate limiting logic on litellm proxy
|
||||
)
|
||||
caching_with_models: bool = (
|
||||
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||
)
|
||||
|
|
|
@ -69,6 +69,8 @@ def _get_redis_cluster_kwargs(client=None):
|
|||
|
||||
available_args = [x for x in arg_spec.args if x not in exclude_args]
|
||||
available_args.append("password")
|
||||
available_args.append("username")
|
||||
available_args.append("ssl")
|
||||
|
||||
return available_args
|
||||
|
||||
|
|
|
@ -233,7 +233,7 @@ class Cache:
|
|||
if self.namespace is not None and isinstance(self.cache, RedisCache):
|
||||
self.cache.namespace = self.namespace
|
||||
|
||||
def get_cache_key(self, *args, **kwargs) -> str: # noqa: PLR0915
|
||||
def get_cache_key(self, *args, **kwargs) -> str:
|
||||
"""
|
||||
Get the cache key for the given arguments.
|
||||
|
||||
|
|
|
@ -32,7 +32,6 @@ class DualCache(BaseCache):
|
|||
redis_cache: Optional[RedisCache] = None,
|
||||
default_in_memory_ttl: Optional[float] = None,
|
||||
default_redis_ttl: Optional[float] = None,
|
||||
always_read_redis: Optional[bool] = True,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
# If in_memory_cache is not provided, use the default InMemoryCache
|
||||
|
@ -44,7 +43,6 @@ class DualCache(BaseCache):
|
|||
default_in_memory_ttl or litellm.default_in_memory_ttl
|
||||
)
|
||||
self.default_redis_ttl = default_redis_ttl or litellm.default_redis_ttl
|
||||
self.always_read_redis = always_read_redis
|
||||
|
||||
def update_cache_ttl(
|
||||
self, default_in_memory_ttl: Optional[float], default_redis_ttl: Optional[float]
|
||||
|
@ -102,12 +100,8 @@ class DualCache(BaseCache):
|
|||
if in_memory_result is not None:
|
||||
result = in_memory_result
|
||||
|
||||
if (
|
||||
(self.always_read_redis is True)
|
||||
and self.redis_cache is not None
|
||||
and local_only is False
|
||||
):
|
||||
# If not found in in-memory cache or always_read_redis is True, try fetching from Redis
|
||||
if result is None and self.redis_cache is not None and local_only is False:
|
||||
# If not found in in-memory cache, try fetching from Redis
|
||||
redis_result = self.redis_cache.get_cache(key, **kwargs)
|
||||
|
||||
if redis_result is not None:
|
||||
|
|
|
@ -1,167 +0,0 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import datetime
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import dotenv
|
||||
|
||||
model_cost = {
|
||||
"gpt-3.5-turbo": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
},
|
||||
"gpt-35-turbo": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
}, # azure model name
|
||||
"gpt-3.5-turbo-0613": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
},
|
||||
"gpt-3.5-turbo-0301": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
},
|
||||
"gpt-3.5-turbo-16k": {
|
||||
"max_tokens": 16000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
},
|
||||
"gpt-35-turbo-16k": {
|
||||
"max_tokens": 16000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
}, # azure model name
|
||||
"gpt-3.5-turbo-16k-0613": {
|
||||
"max_tokens": 16000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
},
|
||||
"gpt-4-0613": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
},
|
||||
"claude-instant-1": {
|
||||
"max_tokens": 100000,
|
||||
"input_cost_per_token": 0.00000163,
|
||||
"output_cost_per_token": 0.00000551,
|
||||
},
|
||||
"claude-2": {
|
||||
"max_tokens": 100000,
|
||||
"input_cost_per_token": 0.00001102,
|
||||
"output_cost_per_token": 0.00003268,
|
||||
},
|
||||
"text-bison-001": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.000004,
|
||||
"output_cost_per_token": 0.000004,
|
||||
},
|
||||
"chat-bison-001": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000002,
|
||||
},
|
||||
"command-nightly": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000015,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class AISpendLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
# Instance variables
|
||||
self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
|
||||
self.api_key = os.getenv("AISPEND_API_KEY")
|
||||
|
||||
def price_calculator(self, model, response_obj, start_time, end_time):
|
||||
# try and find if the model is in the model_cost map
|
||||
# else default to the average of the costs
|
||||
prompt_tokens_cost_usd_dollar = 0
|
||||
completion_tokens_cost_usd_dollar = 0
|
||||
if model in model_cost:
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost[model]["input_cost_per_token"]
|
||||
* response_obj["usage"]["prompt_tokens"]
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost[model]["output_cost_per_token"]
|
||||
* response_obj["usage"]["completion_tokens"]
|
||||
)
|
||||
elif "replicate" in model:
|
||||
# replicate models are charged based on time
|
||||
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
||||
model_run_time = end_time - start_time # assuming time in seconds
|
||||
cost_usd_dollar = model_run_time * 0.0032
|
||||
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
|
||||
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
|
||||
else:
|
||||
# calculate average input cost
|
||||
input_cost_sum = 0
|
||||
output_cost_sum = 0
|
||||
for model in model_cost:
|
||||
input_cost_sum += model_cost[model]["input_cost_per_token"]
|
||||
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost[model]["input_cost_per_token"]
|
||||
* response_obj["usage"]["prompt_tokens"]
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost[model]["output_cost_per_token"]
|
||||
* response_obj["usage"]["completion_tokens"]
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
|
||||
def log_event(self, model, response_obj, start_time, end_time, print_verbose):
|
||||
# Method definition
|
||||
try:
|
||||
print_verbose(
|
||||
f"AISpend Logging - Enters logging function for model {model}"
|
||||
)
|
||||
|
||||
response_timestamp = datetime.datetime.fromtimestamp(
|
||||
int(response_obj["created"])
|
||||
).strftime("%Y-%m-%d")
|
||||
|
||||
(
|
||||
prompt_tokens_cost_usd_dollar,
|
||||
completion_tokens_cost_usd_dollar,
|
||||
) = self.price_calculator(model, response_obj, start_time, end_time)
|
||||
prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
|
||||
completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
|
||||
data = [
|
||||
{
|
||||
"requests": 1,
|
||||
"requests_context": 1,
|
||||
"context_tokens": response_obj["usage"]["prompt_tokens"],
|
||||
"requests_generated": 1,
|
||||
"generated_tokens": response_obj["usage"]["completion_tokens"],
|
||||
"recorded_date": response_timestamp,
|
||||
"model_id": response_obj["model"],
|
||||
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
|
||||
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
|
||||
}
|
||||
]
|
||||
|
||||
print_verbose(f"AISpend Logging - final data object: {data}")
|
||||
except Exception:
|
||||
print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
|
||||
pass
|
|
@ -7,135 +7,208 @@ this file has Arize ai specific helper functions
|
|||
import json
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm._logging import verbose_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from opentelemetry.trace import Span as _Span
|
||||
|
||||
from .opentelemetry import OpenTelemetryConfig as _OpenTelemetryConfig
|
||||
|
||||
Span = _Span
|
||||
OpenTelemetryConfig = _OpenTelemetryConfig
|
||||
else:
|
||||
Span = Any
|
||||
OpenTelemetryConfig = Any
|
||||
|
||||
import os
|
||||
|
||||
from litellm.types.integrations.arize import *
|
||||
|
||||
|
||||
def make_json_serializable(payload: dict) -> dict:
|
||||
for key, value in payload.items():
|
||||
class ArizeLogger:
|
||||
@staticmethod
|
||||
def set_arize_ai_attributes(span: Span, kwargs, response_obj):
|
||||
from litellm.integrations._types.open_inference import (
|
||||
MessageAttributes,
|
||||
MessageContentAttributes,
|
||||
OpenInferenceSpanKindValues,
|
||||
SpanAttributes,
|
||||
)
|
||||
|
||||
try:
|
||||
if isinstance(value, dict):
|
||||
# recursively sanitize dicts
|
||||
payload[key] = make_json_serializable(value.copy())
|
||||
elif not isinstance(value, (str, int, float, bool, type(None))):
|
||||
# everything else becomes a string
|
||||
payload[key] = str(value)
|
||||
except Exception:
|
||||
# non blocking if it can't cast to a str
|
||||
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
# litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
|
||||
#############################################
|
||||
############ LLM CALL METADATA ##############
|
||||
#############################################
|
||||
# commented out for now - looks like Arize AI could not log this
|
||||
# metadata = litellm_params.get("metadata", {}) or {}
|
||||
# span.set_attribute(SpanAttributes.METADATA, str(metadata))
|
||||
|
||||
#############################################
|
||||
########## LLM Request Attributes ###########
|
||||
#############################################
|
||||
|
||||
# The name of the LLM a request is being made to
|
||||
if kwargs.get("model"):
|
||||
span.set_attribute(SpanAttributes.LLM_MODEL_NAME, kwargs.get("model"))
|
||||
|
||||
span.set_attribute(
|
||||
SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
||||
OpenInferenceSpanKindValues.LLM.value,
|
||||
)
|
||||
messages = kwargs.get("messages")
|
||||
|
||||
# for /chat/completions
|
||||
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||
if messages:
|
||||
span.set_attribute(
|
||||
SpanAttributes.INPUT_VALUE,
|
||||
messages[-1].get("content", ""), # get the last message for input
|
||||
)
|
||||
|
||||
# LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page
|
||||
for idx, msg in enumerate(messages):
|
||||
# Set the role per message
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_ROLE}",
|
||||
msg["role"],
|
||||
)
|
||||
# Set the content per message
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_CONTENT}",
|
||||
msg.get("content", ""),
|
||||
)
|
||||
|
||||
# The Generative AI Provider: Azure, OpenAI, etc.
|
||||
_optional_params = ArizeLogger.make_json_serializable(optional_params)
|
||||
_json_optional_params = json.dumps(_optional_params)
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_INVOCATION_PARAMETERS, _json_optional_params
|
||||
)
|
||||
|
||||
if optional_params.get("user"):
|
||||
span.set_attribute(SpanAttributes.USER_ID, optional_params.get("user"))
|
||||
|
||||
#############################################
|
||||
########## LLM Response Attributes ##########
|
||||
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||
#############################################
|
||||
for choice in response_obj.get("choices"):
|
||||
response_message = choice.get("message", {})
|
||||
span.set_attribute(
|
||||
SpanAttributes.OUTPUT_VALUE, response_message.get("content", "")
|
||||
)
|
||||
|
||||
# This shows up under `output_messages` tab on the span page
|
||||
# This code assumes a single response
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_ROLE}",
|
||||
response_message["role"],
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_CONTENT}",
|
||||
response_message.get("content", ""),
|
||||
)
|
||||
|
||||
usage = response_obj.get("usage")
|
||||
if usage:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
|
||||
usage.get("total_tokens"),
|
||||
)
|
||||
|
||||
# The number of tokens used in the LLM response (completion).
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
|
||||
usage.get("completion_tokens"),
|
||||
)
|
||||
|
||||
# The number of tokens used in the LLM prompt.
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
|
||||
usage.get("prompt_tokens"),
|
||||
)
|
||||
pass
|
||||
return payload
|
||||
except Exception as e:
|
||||
verbose_logger.error(f"Error setting arize attributes: {e}")
|
||||
|
||||
###################### Helper functions ######################
|
||||
|
||||
def set_arize_ai_attributes(span: Span, kwargs, response_obj):
|
||||
from litellm.integrations._types.open_inference import (
|
||||
MessageAttributes,
|
||||
MessageContentAttributes,
|
||||
OpenInferenceSpanKindValues,
|
||||
SpanAttributes,
|
||||
)
|
||||
@staticmethod
|
||||
def _get_arize_config() -> ArizeConfig:
|
||||
"""
|
||||
Helper function to get Arize configuration.
|
||||
|
||||
try:
|
||||
Returns:
|
||||
ArizeConfig: A Pydantic model containing Arize configuration.
|
||||
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
# litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
Raises:
|
||||
ValueError: If required environment variables are not set.
|
||||
"""
|
||||
space_key = os.environ.get("ARIZE_SPACE_KEY")
|
||||
api_key = os.environ.get("ARIZE_API_KEY")
|
||||
|
||||
#############################################
|
||||
############ LLM CALL METADATA ##############
|
||||
#############################################
|
||||
# commented out for now - looks like Arize AI could not log this
|
||||
# metadata = litellm_params.get("metadata", {}) or {}
|
||||
# span.set_attribute(SpanAttributes.METADATA, str(metadata))
|
||||
if not space_key:
|
||||
raise ValueError("ARIZE_SPACE_KEY not found in environment variables")
|
||||
if not api_key:
|
||||
raise ValueError("ARIZE_API_KEY not found in environment variables")
|
||||
|
||||
#############################################
|
||||
########## LLM Request Attributes ###########
|
||||
#############################################
|
||||
|
||||
# The name of the LLM a request is being made to
|
||||
if kwargs.get("model"):
|
||||
span.set_attribute(SpanAttributes.LLM_MODEL_NAME, kwargs.get("model"))
|
||||
|
||||
span.set_attribute(
|
||||
SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
||||
OpenInferenceSpanKindValues.LLM.value,
|
||||
)
|
||||
messages = kwargs.get("messages")
|
||||
|
||||
# for /chat/completions
|
||||
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||
if messages:
|
||||
span.set_attribute(
|
||||
SpanAttributes.INPUT_VALUE,
|
||||
messages[-1].get("content", ""), # get the last message for input
|
||||
grpc_endpoint = os.environ.get("ARIZE_ENDPOINT")
|
||||
http_endpoint = os.environ.get("ARIZE_HTTP_ENDPOINT")
|
||||
if grpc_endpoint is None and http_endpoint is None:
|
||||
# use default arize grpc endpoint
|
||||
verbose_logger.debug(
|
||||
"No ARIZE_ENDPOINT or ARIZE_HTTP_ENDPOINT found, using default endpoint: https://otlp.arize.com/v1"
|
||||
)
|
||||
grpc_endpoint = "https://otlp.arize.com/v1"
|
||||
|
||||
# LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page
|
||||
for idx, msg in enumerate(messages):
|
||||
# Set the role per message
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_ROLE}",
|
||||
msg["role"],
|
||||
)
|
||||
# Set the content per message
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_CONTENT}",
|
||||
msg.get("content", ""),
|
||||
)
|
||||
|
||||
# The Generative AI Provider: Azure, OpenAI, etc.
|
||||
_optional_params = make_json_serializable(optional_params)
|
||||
_json_optional_params = json.dumps(_optional_params)
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_INVOCATION_PARAMETERS, _json_optional_params
|
||||
return ArizeConfig(
|
||||
space_key=space_key,
|
||||
api_key=api_key,
|
||||
grpc_endpoint=grpc_endpoint,
|
||||
http_endpoint=http_endpoint,
|
||||
)
|
||||
|
||||
if optional_params.get("user"):
|
||||
span.set_attribute(SpanAttributes.USER_ID, optional_params.get("user"))
|
||||
@staticmethod
|
||||
def get_arize_opentelemetry_config() -> Optional[OpenTelemetryConfig]:
|
||||
"""
|
||||
Helper function to get OpenTelemetry configuration for Arize.
|
||||
|
||||
#############################################
|
||||
########## LLM Response Attributes ##########
|
||||
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||
#############################################
|
||||
for choice in response_obj.get("choices"):
|
||||
response_message = choice.get("message", {})
|
||||
span.set_attribute(
|
||||
SpanAttributes.OUTPUT_VALUE, response_message.get("content", "")
|
||||
Args:
|
||||
arize_config (ArizeConfig): Arize configuration object.
|
||||
|
||||
Returns:
|
||||
OpenTelemetryConfig: Configuration for OpenTelemetry.
|
||||
"""
|
||||
from .opentelemetry import OpenTelemetryConfig
|
||||
|
||||
arize_config = ArizeLogger._get_arize_config()
|
||||
if arize_config.http_endpoint:
|
||||
return OpenTelemetryConfig(
|
||||
exporter="otlp_http",
|
||||
endpoint=arize_config.http_endpoint,
|
||||
)
|
||||
|
||||
# This shows up under `output_messages` tab on the span page
|
||||
# This code assumes a single response
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_ROLE}",
|
||||
response_message["role"],
|
||||
)
|
||||
span.set_attribute(
|
||||
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_CONTENT}",
|
||||
response_message.get("content", ""),
|
||||
)
|
||||
# use default arize grpc endpoint
|
||||
return OpenTelemetryConfig(
|
||||
exporter="otlp_grpc",
|
||||
endpoint=arize_config.grpc_endpoint,
|
||||
)
|
||||
|
||||
usage = response_obj.get("usage")
|
||||
if usage:
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
|
||||
usage.get("total_tokens"),
|
||||
)
|
||||
|
||||
# The number of tokens used in the LLM response (completion).
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
|
||||
usage.get("completion_tokens"),
|
||||
)
|
||||
|
||||
# The number of tokens used in the LLM prompt.
|
||||
span.set_attribute(
|
||||
SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
|
||||
usage.get("prompt_tokens"),
|
||||
)
|
||||
pass
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(f"Error setting arize attributes: {e}")
|
||||
@staticmethod
|
||||
def make_json_serializable(payload: dict) -> dict:
|
||||
for key, value in payload.items():
|
||||
try:
|
||||
if isinstance(value, dict):
|
||||
# recursively sanitize dicts
|
||||
payload[key] = ArizeLogger.make_json_serializable(value.copy())
|
||||
elif not isinstance(value, (str, int, float, bool, type(None))):
|
||||
# everything else becomes a string
|
||||
payload[key] = str(value)
|
||||
except Exception:
|
||||
# non blocking if it can't cast to a str
|
||||
pass
|
||||
return payload
|
||||
|
|
|
@ -1,104 +0,0 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import datetime
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import dotenv
|
||||
import requests # type: ignore
|
||||
|
||||
model_cost = {
|
||||
"gpt-3.5-turbo": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
},
|
||||
"gpt-35-turbo": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
}, # azure model name
|
||||
"gpt-3.5-turbo-0613": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
},
|
||||
"gpt-3.5-turbo-0301": {
|
||||
"max_tokens": 4000,
|
||||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
},
|
||||
"gpt-3.5-turbo-16k": {
|
||||
"max_tokens": 16000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
},
|
||||
"gpt-35-turbo-16k": {
|
||||
"max_tokens": 16000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
}, # azure model name
|
||||
"gpt-3.5-turbo-16k-0613": {
|
||||
"max_tokens": 16000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
},
|
||||
"gpt-4-0613": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"max_tokens": 8000,
|
||||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
},
|
||||
"claude-instant-1": {
|
||||
"max_tokens": 100000,
|
||||
"input_cost_per_token": 0.00000163,
|
||||
"output_cost_per_token": 0.00000551,
|
||||
},
|
||||
"claude-2": {
|
||||
"max_tokens": 100000,
|
||||
"input_cost_per_token": 0.00001102,
|
||||
"output_cost_per_token": 0.00003268,
|
||||
},
|
||||
"text-bison-001": {
|
||||
"max_tokens": 8192,
|
||||
"input_cost_per_token": 0.000004,
|
||||
"output_cost_per_token": 0.000004,
|
||||
},
|
||||
"chat-bison-001": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000002,
|
||||
},
|
||||
"command-nightly": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000015,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class BerriSpendLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
# Instance variables
|
||||
self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
|
||||
|
||||
def price_calculator(self, model, response_obj, start_time, end_time):
|
||||
return
|
||||
|
||||
def log_event(
|
||||
self, model, messages, response_obj, start_time, end_time, print_verbose
|
||||
):
|
||||
"""
|
||||
This integration is not implemented yet.
|
||||
"""
|
||||
return
|
|
@ -1,334 +0,0 @@
|
|||
# callback to make a request to an API endpoint
|
||||
|
||||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
from typing import Literal, Optional, Union
|
||||
|
||||
import dotenv
|
||||
import requests
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
#### What this does ####
|
||||
# On success + failure, log events to Supabase
|
||||
|
||||
|
||||
def create_client():
|
||||
try:
|
||||
import clickhouse_connect
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
|
||||
if clickhouse_host is not None:
|
||||
verbose_logger.debug("setting up clickhouse")
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||
if host is None:
|
||||
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||
|
||||
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||
if username is None:
|
||||
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||
|
||||
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||
if password is None:
|
||||
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||
if port is None:
|
||||
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=host,
|
||||
port=port,
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
return client
|
||||
else:
|
||||
raise Exception("Clickhouse: Clickhouse host not set")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Clickhouse: {e}")
|
||||
|
||||
|
||||
def build_daily_metrics():
|
||||
click_house_client = create_client()
|
||||
|
||||
# get daily spend
|
||||
daily_spend = click_house_client.query_df(
|
||||
"""
|
||||
SELECT sumMerge(DailySpend) as daily_spend, day FROM daily_aggregated_spend GROUP BY day
|
||||
"""
|
||||
)
|
||||
|
||||
# get daily spend per model
|
||||
daily_spend_per_model = click_house_client.query_df(
|
||||
"""
|
||||
SELECT sumMerge(DailySpend) as daily_spend, day, model FROM daily_aggregated_spend_per_model GROUP BY day, model
|
||||
"""
|
||||
)
|
||||
new_df = daily_spend_per_model.to_dict(orient="records")
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame(new_df)
|
||||
# Group by 'day' and create a dictionary for each group
|
||||
result_dict = {}
|
||||
for day, group in df.groupby("day"):
|
||||
models = group["model"].tolist()
|
||||
spend = group["daily_spend"].tolist()
|
||||
spend_per_model = {model: spend for model, spend in zip(models, spend)}
|
||||
result_dict[day] = spend_per_model
|
||||
|
||||
# Display the resulting dictionary
|
||||
|
||||
# get daily spend per API key
|
||||
daily_spend_per_api_key = click_house_client.query_df(
|
||||
"""
|
||||
SELECT
|
||||
daily_spend,
|
||||
day,
|
||||
api_key
|
||||
FROM (
|
||||
SELECT
|
||||
sumMerge(DailySpend) as daily_spend,
|
||||
day,
|
||||
api_key,
|
||||
RANK() OVER (PARTITION BY day ORDER BY sumMerge(DailySpend) DESC) as spend_rank
|
||||
FROM
|
||||
daily_aggregated_spend_per_api_key
|
||||
GROUP BY
|
||||
day,
|
||||
api_key
|
||||
) AS ranked_api_keys
|
||||
WHERE
|
||||
spend_rank <= 5
|
||||
AND day IS NOT NULL
|
||||
ORDER BY
|
||||
day,
|
||||
daily_spend DESC
|
||||
"""
|
||||
)
|
||||
new_df = daily_spend_per_api_key.to_dict(orient="records")
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame(new_df)
|
||||
# Group by 'day' and create a dictionary for each group
|
||||
api_key_result_dict = {}
|
||||
for day, group in df.groupby("day"):
|
||||
api_keys = group["api_key"].tolist()
|
||||
spend = group["daily_spend"].tolist()
|
||||
spend_per_api_key = {api_key: spend for api_key, spend in zip(api_keys, spend)}
|
||||
api_key_result_dict[day] = spend_per_api_key
|
||||
|
||||
# Display the resulting dictionary
|
||||
|
||||
# Calculate total spend across all days
|
||||
total_spend = daily_spend["daily_spend"].sum()
|
||||
|
||||
# Identify top models and top API keys with the highest spend across all days
|
||||
top_models = {}
|
||||
top_api_keys = {}
|
||||
|
||||
for day, spend_per_model in result_dict.items():
|
||||
for model, model_spend in spend_per_model.items():
|
||||
if model not in top_models or model_spend > top_models[model]:
|
||||
top_models[model] = model_spend
|
||||
|
||||
for day, spend_per_api_key in api_key_result_dict.items():
|
||||
for api_key, api_key_spend in spend_per_api_key.items():
|
||||
if api_key not in top_api_keys or api_key_spend > top_api_keys[api_key]:
|
||||
top_api_keys[api_key] = api_key_spend
|
||||
|
||||
# for each day in daily spend, look up the day in result_dict and api_key_result_dict
|
||||
# Assuming daily_spend DataFrame has 'day' column
|
||||
result = []
|
||||
for index, row in daily_spend.iterrows():
|
||||
day = row["day"]
|
||||
data_day = row.to_dict()
|
||||
|
||||
# Look up in result_dict
|
||||
if day in result_dict:
|
||||
spend_per_model = result_dict[day]
|
||||
# Assuming there is a column named 'model' in daily_spend
|
||||
data_day["spend_per_model"] = spend_per_model # Assign 0 if model not found
|
||||
|
||||
# Look up in api_key_result_dict
|
||||
if day in api_key_result_dict:
|
||||
spend_per_api_key = api_key_result_dict[day]
|
||||
# Assuming there is a column named 'api_key' in daily_spend
|
||||
data_day["spend_per_api_key"] = spend_per_api_key
|
||||
|
||||
result.append(data_day)
|
||||
|
||||
data_to_return = {}
|
||||
data_to_return["daily_spend"] = result
|
||||
|
||||
data_to_return["total_spend"] = total_spend
|
||||
data_to_return["top_models"] = top_models
|
||||
data_to_return["top_api_keys"] = top_api_keys
|
||||
return data_to_return
|
||||
|
||||
|
||||
# build_daily_metrics()
|
||||
|
||||
|
||||
def _start_clickhouse():
|
||||
import clickhouse_connect
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
|
||||
if clickhouse_host is not None:
|
||||
verbose_logger.debug("setting up clickhouse")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||
if host is None:
|
||||
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||
|
||||
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||
if username is None:
|
||||
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||
|
||||
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||
if password is None:
|
||||
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||
if port is None:
|
||||
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=host,
|
||||
port=port,
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
# view all tables in DB
|
||||
response = client.query("SHOW TABLES")
|
||||
verbose_logger.debug(
|
||||
f"checking if litellm spend logs exists, all tables={response.result_rows}"
|
||||
)
|
||||
# all tables is returned like this: all tables = [('new_table',), ('spend_logs',)]
|
||||
# check if spend_logs in all tables
|
||||
table_names = [all_tables[0] for all_tables in response.result_rows]
|
||||
|
||||
if "spend_logs" not in table_names:
|
||||
verbose_logger.debug(
|
||||
"Clickhouse: spend logs table does not exist... creating it"
|
||||
)
|
||||
|
||||
response = client.command(
|
||||
"""
|
||||
CREATE TABLE default.spend_logs
|
||||
(
|
||||
`request_id` String,
|
||||
`call_type` String,
|
||||
`api_key` String,
|
||||
`spend` Float64,
|
||||
`total_tokens` Int256,
|
||||
`prompt_tokens` Int256,
|
||||
`completion_tokens` Int256,
|
||||
`startTime` DateTime,
|
||||
`endTime` DateTime,
|
||||
`model` String,
|
||||
`user` String,
|
||||
`metadata` String,
|
||||
`cache_hit` String,
|
||||
`cache_key` String,
|
||||
`request_tags` String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple();
|
||||
"""
|
||||
)
|
||||
else:
|
||||
# check if spend logs exist, if it does then return the schema
|
||||
response = client.query("DESCRIBE default.spend_logs")
|
||||
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
||||
|
||||
|
||||
class ClickhouseLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self, endpoint=None, headers=None):
|
||||
import clickhouse_connect
|
||||
|
||||
_start_clickhouse()
|
||||
|
||||
verbose_logger.debug(
|
||||
f"ClickhouseLogger init, host {os.getenv('CLICKHOUSE_HOST')}, port {os.getenv('CLICKHOUSE_PORT')}, username {os.getenv('CLICKHOUSE_USERNAME')}"
|
||||
)
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||
if host is None:
|
||||
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||
|
||||
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||
if username is None:
|
||||
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||
|
||||
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||
if password is None:
|
||||
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||
if port is None:
|
||||
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=host,
|
||||
port=port,
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
self.client = client
|
||||
|
||||
# This is sync, because we run this in a separate thread. Running in a sepearate thread ensures it will never block an LLM API call
|
||||
# Experience with s3, Langfuse shows that async logging events are complicated and can block LLM calls
|
||||
def log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
):
|
||||
try:
|
||||
verbose_logger.debug(
|
||||
f"ClickhouseLogger Logging - Enters logging function for model {kwargs}"
|
||||
)
|
||||
# follows the same params as langfuse.py
|
||||
|
||||
payload: Optional[StandardLoggingPayload] = kwargs.get(
|
||||
"standard_logging_object"
|
||||
)
|
||||
if payload is None:
|
||||
return
|
||||
# Build the initial payload
|
||||
|
||||
verbose_logger.debug(f"\nClickhouse Logger - Logging payload = {payload}")
|
||||
|
||||
# just get the payload items in one array and payload keys in 2nd array
|
||||
values = []
|
||||
keys = []
|
||||
for key, value in payload.items():
|
||||
keys.append(key)
|
||||
values.append(value)
|
||||
data = [values]
|
||||
|
||||
response = self.client.insert("default.spend_logs", data, column_names=keys)
|
||||
|
||||
# make request to endpoint with payload
|
||||
verbose_logger.debug(f"Clickhouse Logger - final response = {response}")
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}")
|
||||
pass
|
169
litellm/integrations/datadog/datadog_llm_obs.py
Normal file
169
litellm/integrations/datadog/datadog_llm_obs.py
Normal file
|
@ -0,0 +1,169 @@
|
|||
"""
|
||||
Implements logging integration with Datadog's LLM Observability Service
|
||||
|
||||
|
||||
API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=example#api-standards
|
||||
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from httpx import Response
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.types.integrations.datadog_llm_obs import *
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
|
||||
class DataDogLLMObsLogger(CustomBatchLogger):
|
||||
def __init__(self, **kwargs):
|
||||
try:
|
||||
verbose_logger.debug("DataDogLLMObs: Initializing logger")
|
||||
if os.getenv("DD_API_KEY", None) is None:
|
||||
raise Exception("DD_API_KEY is not set, set 'DD_API_KEY=<>'")
|
||||
if os.getenv("DD_SITE", None) is None:
|
||||
raise Exception(
|
||||
"DD_SITE is not set, set 'DD_SITE=<>', example sit = `us5.datadoghq.com`"
|
||||
)
|
||||
|
||||
self.async_client = get_async_httpx_client(
|
||||
llm_provider=httpxSpecialProvider.LoggingCallback
|
||||
)
|
||||
self.DD_API_KEY = os.getenv("DD_API_KEY")
|
||||
self.DD_SITE = os.getenv("DD_SITE")
|
||||
self.intake_url = (
|
||||
f"https://api.{self.DD_SITE}/api/intake/llm-obs/v1/trace/spans"
|
||||
)
|
||||
|
||||
# testing base url
|
||||
dd_base_url = os.getenv("DD_BASE_URL")
|
||||
if dd_base_url:
|
||||
self.intake_url = f"{dd_base_url}/api/intake/llm-obs/v1/trace/spans"
|
||||
|
||||
asyncio.create_task(self.periodic_flush())
|
||||
self.flush_lock = asyncio.Lock()
|
||||
self.log_queue: List[LLMObsPayload] = []
|
||||
super().__init__(**kwargs, flush_lock=self.flush_lock)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(f"DataDogLLMObs: Error initializing - {str(e)}")
|
||||
raise e
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
try:
|
||||
verbose_logger.debug(
|
||||
f"DataDogLLMObs: Logging success event for model {kwargs.get('model', 'unknown')}"
|
||||
)
|
||||
payload = self.create_llm_obs_payload(
|
||||
kwargs, response_obj, start_time, end_time
|
||||
)
|
||||
verbose_logger.debug(f"DataDogLLMObs: Payload: {payload}")
|
||||
self.log_queue.append(payload)
|
||||
|
||||
if len(self.log_queue) >= self.batch_size:
|
||||
await self.async_send_batch()
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
f"DataDogLLMObs: Error logging success event - {str(e)}"
|
||||
)
|
||||
|
||||
async def async_send_batch(self):
|
||||
try:
|
||||
if not self.log_queue:
|
||||
return
|
||||
|
||||
verbose_logger.debug(
|
||||
f"DataDogLLMObs: Flushing {len(self.log_queue)} events"
|
||||
)
|
||||
|
||||
# Prepare the payload
|
||||
payload = {
|
||||
"data": DDIntakePayload(
|
||||
type="span",
|
||||
attributes=DDSpanAttributes(
|
||||
ml_app="litellm",
|
||||
tags=[
|
||||
"service:litellm",
|
||||
f"env:{os.getenv('DD_ENV', 'production')}",
|
||||
],
|
||||
spans=self.log_queue,
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
response = await self.async_client.post(
|
||||
url=self.intake_url,
|
||||
json=payload,
|
||||
headers={
|
||||
"DD-API-KEY": self.DD_API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
if response.status_code != 202:
|
||||
raise Exception(
|
||||
f"DataDogLLMObs: Unexpected response - status_code: {response.status_code}, text: {response.text}"
|
||||
)
|
||||
|
||||
verbose_logger.debug(
|
||||
f"DataDogLLMObs: Successfully sent batch - status_code: {response.status_code}"
|
||||
)
|
||||
self.log_queue.clear()
|
||||
except Exception as e:
|
||||
verbose_logger.exception(f"DataDogLLMObs: Error sending batch - {str(e)}")
|
||||
|
||||
def create_llm_obs_payload(
|
||||
self, kwargs: Dict, response_obj: Any, start_time: datetime, end_time: datetime
|
||||
) -> LLMObsPayload:
|
||||
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
||||
"standard_logging_object"
|
||||
)
|
||||
if standard_logging_payload is None:
|
||||
raise Exception("DataDogLLMObs: standard_logging_object is not set")
|
||||
|
||||
messages = standard_logging_payload["messages"]
|
||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||
|
||||
input_meta = InputMeta(messages=messages) # type: ignore
|
||||
output_meta = OutputMeta(messages=self._get_response_messages(response_obj))
|
||||
|
||||
meta = Meta(kind="llm", input=input_meta, output=output_meta)
|
||||
|
||||
# Calculate metrics (you may need to adjust these based on available data)
|
||||
metrics = LLMMetrics(
|
||||
input_tokens=float(standard_logging_payload.get("prompt_tokens", 0)),
|
||||
output_tokens=float(standard_logging_payload.get("completion_tokens", 0)),
|
||||
total_tokens=float(standard_logging_payload.get("total_tokens", 0)),
|
||||
)
|
||||
|
||||
return LLMObsPayload(
|
||||
parent_id=metadata.get("parent_id", "undefined"),
|
||||
trace_id=metadata.get("trace_id", str(uuid.uuid4())),
|
||||
span_id=metadata.get("span_id", str(uuid.uuid4())),
|
||||
name=metadata.get("name", "litellm_llm_call"),
|
||||
meta=meta,
|
||||
start_ns=int(start_time.timestamp() * 1e9),
|
||||
duration=int((end_time - start_time).total_seconds() * 1e9),
|
||||
metrics=metrics,
|
||||
)
|
||||
|
||||
def _get_response_messages(self, response_obj: Any) -> List[Any]:
|
||||
"""
|
||||
Get the messages from the response object
|
||||
|
||||
for now this handles logging /chat/completions responses
|
||||
"""
|
||||
if isinstance(response_obj, litellm.ModelResponse):
|
||||
return [response_obj["choices"][0]["message"].json()]
|
||||
return []
|
|
@ -4,7 +4,7 @@ import copy
|
|||
import inspect
|
||||
import os
|
||||
import traceback
|
||||
from typing import Optional
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
|
||||
from packaging.version import Version
|
||||
from pydantic import BaseModel
|
||||
|
@ -13,7 +13,13 @@ import litellm
|
|||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
|
||||
from litellm.secret_managers.main import str_to_bool
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.integrations.langfuse import *
|
||||
from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import DynamicLoggingCache
|
||||
else:
|
||||
DynamicLoggingCache = Any
|
||||
|
||||
|
||||
class LangFuseLogger:
|
168
litellm/integrations/langfuse/langfuse_handler.py
Normal file
168
litellm/integrations/langfuse/langfuse_handler.py
Normal file
|
@ -0,0 +1,168 @@
|
|||
"""
|
||||
This file contains the LangFuseHandler class
|
||||
|
||||
Used to get the LangFuseLogger for a given request
|
||||
|
||||
Handles Key/Team Based Langfuse Logging
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
|
||||
from litellm.litellm_core_utils.litellm_logging import StandardCallbackDynamicParams
|
||||
|
||||
from .langfuse import LangFuseLogger, LangfuseLoggingConfig
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import DynamicLoggingCache
|
||||
else:
|
||||
DynamicLoggingCache = Any
|
||||
|
||||
|
||||
class LangFuseHandler:
|
||||
|
||||
@staticmethod
|
||||
def get_langfuse_logger_for_request(
|
||||
standard_callback_dynamic_params: StandardCallbackDynamicParams,
|
||||
in_memory_dynamic_logger_cache: DynamicLoggingCache,
|
||||
globalLangfuseLogger: Optional[LangFuseLogger] = None,
|
||||
) -> LangFuseLogger:
|
||||
"""
|
||||
This function is used to get the LangFuseLogger for a given request
|
||||
|
||||
1. If dynamic credentials are passed
|
||||
- check if a LangFuseLogger is cached for the dynamic credentials
|
||||
- if cached LangFuseLogger is not found, create a new LangFuseLogger and cache it
|
||||
|
||||
2. If dynamic credentials are not passed return the globalLangfuseLogger
|
||||
|
||||
"""
|
||||
temp_langfuse_logger: Optional[LangFuseLogger] = globalLangfuseLogger
|
||||
if (
|
||||
LangFuseHandler._dynamic_langfuse_credentials_are_passed(
|
||||
standard_callback_dynamic_params
|
||||
)
|
||||
is False
|
||||
):
|
||||
return LangFuseHandler._return_global_langfuse_logger(
|
||||
globalLangfuseLogger=globalLangfuseLogger,
|
||||
in_memory_dynamic_logger_cache=in_memory_dynamic_logger_cache,
|
||||
)
|
||||
|
||||
# get langfuse logging config to use for this request, based on standard_callback_dynamic_params
|
||||
_credentials = LangFuseHandler.get_dynamic_langfuse_logging_config(
|
||||
globalLangfuseLogger=globalLangfuseLogger,
|
||||
standard_callback_dynamic_params=standard_callback_dynamic_params,
|
||||
)
|
||||
credentials_dict = dict(_credentials)
|
||||
|
||||
# check if langfuse logger is already cached
|
||||
temp_langfuse_logger = in_memory_dynamic_logger_cache.get_cache(
|
||||
credentials=credentials_dict, service_name="langfuse"
|
||||
)
|
||||
|
||||
# if not cached, create a new langfuse logger and cache it
|
||||
if temp_langfuse_logger is None:
|
||||
temp_langfuse_logger = (
|
||||
LangFuseHandler._create_langfuse_logger_from_credentials(
|
||||
credentials=credentials_dict,
|
||||
in_memory_dynamic_logger_cache=in_memory_dynamic_logger_cache,
|
||||
)
|
||||
)
|
||||
|
||||
return temp_langfuse_logger
|
||||
|
||||
@staticmethod
|
||||
def _return_global_langfuse_logger(
|
||||
globalLangfuseLogger: Optional[LangFuseLogger],
|
||||
in_memory_dynamic_logger_cache: DynamicLoggingCache,
|
||||
) -> LangFuseLogger:
|
||||
"""
|
||||
Returns the Global LangfuseLogger set on litellm
|
||||
|
||||
(this is the default langfuse logger - used when no dynamic credentials are passed)
|
||||
|
||||
If no Global LangfuseLogger is set, it will check in_memory_dynamic_logger_cache for a cached LangFuseLogger
|
||||
This function is used to return the globalLangfuseLogger if it exists, otherwise it will check in_memory_dynamic_logger_cache for a cached LangFuseLogger
|
||||
"""
|
||||
if globalLangfuseLogger is not None:
|
||||
return globalLangfuseLogger
|
||||
|
||||
credentials_dict: Dict[str, Any] = (
|
||||
{}
|
||||
) # the global langfuse logger uses Environment Variables, there are no dynamic credentials
|
||||
globalLangfuseLogger = in_memory_dynamic_logger_cache.get_cache(
|
||||
credentials=credentials_dict,
|
||||
service_name="langfuse",
|
||||
)
|
||||
if globalLangfuseLogger is None:
|
||||
globalLangfuseLogger = (
|
||||
LangFuseHandler._create_langfuse_logger_from_credentials(
|
||||
credentials=credentials_dict,
|
||||
in_memory_dynamic_logger_cache=in_memory_dynamic_logger_cache,
|
||||
)
|
||||
)
|
||||
return globalLangfuseLogger
|
||||
|
||||
@staticmethod
|
||||
def _create_langfuse_logger_from_credentials(
|
||||
credentials: Dict,
|
||||
in_memory_dynamic_logger_cache: DynamicLoggingCache,
|
||||
) -> LangFuseLogger:
|
||||
"""
|
||||
This function is used to
|
||||
1. create a LangFuseLogger from the credentials
|
||||
2. cache the LangFuseLogger to prevent re-creating it for the same credentials
|
||||
"""
|
||||
|
||||
langfuse_logger = LangFuseLogger(
|
||||
langfuse_public_key=credentials.get("langfuse_public_key"),
|
||||
langfuse_secret=credentials.get("langfuse_secret"),
|
||||
langfuse_host=credentials.get("langfuse_host"),
|
||||
)
|
||||
in_memory_dynamic_logger_cache.set_cache(
|
||||
credentials=credentials,
|
||||
service_name="langfuse",
|
||||
logging_obj=langfuse_logger,
|
||||
)
|
||||
return langfuse_logger
|
||||
|
||||
@staticmethod
|
||||
def get_dynamic_langfuse_logging_config(
|
||||
standard_callback_dynamic_params: StandardCallbackDynamicParams,
|
||||
globalLangfuseLogger: Optional[LangFuseLogger] = None,
|
||||
) -> LangfuseLoggingConfig:
|
||||
"""
|
||||
This function is used to get the Langfuse logging config to use for a given request.
|
||||
|
||||
It checks if the dynamic parameters are provided in the standard_callback_dynamic_params and uses them to get the Langfuse logging config.
|
||||
|
||||
If no dynamic parameters are provided, it uses the `globalLangfuseLogger` values
|
||||
"""
|
||||
# only use dynamic params if langfuse credentials are passed dynamically
|
||||
return LangfuseLoggingConfig(
|
||||
langfuse_secret=standard_callback_dynamic_params.get("langfuse_secret")
|
||||
or standard_callback_dynamic_params.get("langfuse_secret_key"),
|
||||
langfuse_public_key=standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
),
|
||||
langfuse_host=standard_callback_dynamic_params.get("langfuse_host"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _dynamic_langfuse_credentials_are_passed(
|
||||
standard_callback_dynamic_params: StandardCallbackDynamicParams,
|
||||
) -> bool:
|
||||
"""
|
||||
This function is used to check if the dynamic langfuse credentials are passed in standard_callback_dynamic_params
|
||||
|
||||
Returns:
|
||||
bool: True if the dynamic langfuse credentials are passed, False otherwise
|
||||
"""
|
||||
if (
|
||||
standard_callback_dynamic_params.get("langfuse_host") is not None
|
||||
or standard_callback_dynamic_params.get("langfuse_public_key") is not None
|
||||
or standard_callback_dynamic_params.get("langfuse_secret") is not None
|
||||
or standard_callback_dynamic_params.get("langfuse_secret_key") is not None
|
||||
):
|
||||
return True
|
||||
return False
|
|
@ -1,77 +0,0 @@
|
|||
import json
|
||||
import os
|
||||
import traceback
|
||||
import types
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class LiteDebugger:
|
||||
user_email = None
|
||||
dashboard_url = None
|
||||
|
||||
def __init__(self, email=None):
|
||||
self.api_url = "https://api.litellm.ai/debugger"
|
||||
self.validate_environment(email)
|
||||
pass
|
||||
|
||||
def validate_environment(self, email):
|
||||
try:
|
||||
self.user_email = (
|
||||
email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL")
|
||||
)
|
||||
if (
|
||||
self.user_email is None
|
||||
): # if users are trying to use_client=True but token not set
|
||||
raise ValueError(
|
||||
"litellm.use_client = True but no token or email passed. Please set it in litellm.token"
|
||||
)
|
||||
self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
|
||||
if self.user_email is None:
|
||||
raise ValueError(
|
||||
"[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
|
||||
)
|
||||
except Exception:
|
||||
raise ValueError(
|
||||
"[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
|
||||
)
|
||||
|
||||
def input_log_event(
|
||||
self,
|
||||
model,
|
||||
messages,
|
||||
end_user,
|
||||
litellm_call_id,
|
||||
call_type,
|
||||
print_verbose,
|
||||
litellm_params,
|
||||
optional_params,
|
||||
):
|
||||
"""
|
||||
This integration is not implemented yet.
|
||||
"""
|
||||
return
|
||||
|
||||
def post_call_log_event(
|
||||
self, original_response, litellm_call_id, print_verbose, call_type, stream
|
||||
):
|
||||
"""
|
||||
This integration is not implemented yet.
|
||||
"""
|
||||
return
|
||||
|
||||
def log_event(
|
||||
self,
|
||||
end_user,
|
||||
response_obj,
|
||||
start_time,
|
||||
end_time,
|
||||
litellm_call_id,
|
||||
print_verbose,
|
||||
call_type,
|
||||
stream=False,
|
||||
):
|
||||
"""
|
||||
This integration is not implemented yet.
|
||||
"""
|
||||
return
|
|
@ -171,7 +171,7 @@ class OpenTelemetry(CustomLogger):
|
|||
try:
|
||||
value = str(value)
|
||||
except Exception:
|
||||
value = "litllm logging error - could_not_json_serialize"
|
||||
value = "litellm logging error - could_not_json_serialize"
|
||||
self.safe_set_attribute(
|
||||
span=service_logging_span,
|
||||
key=key,
|
||||
|
@ -396,9 +396,9 @@ class OpenTelemetry(CustomLogger):
|
|||
def set_attributes(self, span: Span, kwargs, response_obj): # noqa: PLR0915
|
||||
try:
|
||||
if self.callback_name == "arize":
|
||||
from litellm.integrations.arize_ai import set_arize_ai_attributes
|
||||
from litellm.integrations.arize_ai import ArizeLogger
|
||||
|
||||
set_arize_ai_attributes(span, kwargs, response_obj)
|
||||
ArizeLogger.set_arize_ai_attributes(span, kwargs, response_obj)
|
||||
return
|
||||
elif self.callback_name == "langtrace":
|
||||
from litellm.integrations.langtrace import LangtraceAttributes
|
||||
|
|
|
@ -6,7 +6,7 @@ import subprocess
|
|||
import sys
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import date, datetime, timedelta
|
||||
from typing import Optional, TypedDict, Union
|
||||
|
||||
import dotenv
|
||||
|
@ -334,13 +334,8 @@ class PrometheusLogger(CustomLogger):
|
|||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||
raise e
|
||||
|
||||
async def async_log_success_event( # noqa: PLR0915
|
||||
self, kwargs, response_obj, start_time, end_time
|
||||
):
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
# Define prometheus client
|
||||
from litellm.proxy.common_utils.callback_utils import (
|
||||
get_model_group_from_litellm_kwargs,
|
||||
)
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
verbose_logger.debug(
|
||||
|
@ -351,14 +346,19 @@ class PrometheusLogger(CustomLogger):
|
|||
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
||||
"standard_logging_object"
|
||||
)
|
||||
if standard_logging_payload is None:
|
||||
raise ValueError("standard_logging_object is required")
|
||||
|
||||
if standard_logging_payload is None or not isinstance(
|
||||
standard_logging_payload, dict
|
||||
):
|
||||
raise ValueError(
|
||||
f"standard_logging_object is required, got={standard_logging_payload}"
|
||||
)
|
||||
|
||||
model = kwargs.get("model", "")
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
_metadata = litellm_params.get("metadata", {})
|
||||
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
||||
end_user_id = proxy_server_request.get("body", {}).get("user", None)
|
||||
model_parameters: dict = standard_logging_payload["model_parameters"]
|
||||
user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
|
||||
user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
|
||||
user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]
|
||||
|
@ -369,25 +369,6 @@ class PrometheusLogger(CustomLogger):
|
|||
output_tokens = standard_logging_payload["completion_tokens"]
|
||||
tokens_used = standard_logging_payload["total_tokens"]
|
||||
response_cost = standard_logging_payload["response_cost"]
|
||||
_team_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_spend", None
|
||||
)
|
||||
_team_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_max_budget", None
|
||||
)
|
||||
_remaining_team_budget = safe_get_remaining_budget(
|
||||
max_budget=_team_max_budget, spend=_team_spend
|
||||
)
|
||||
|
||||
_api_key_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_spend", None
|
||||
)
|
||||
_api_key_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_max_budget", None
|
||||
)
|
||||
_remaining_api_key_budget = safe_get_remaining_budget(
|
||||
max_budget=_api_key_max_budget, spend=_api_key_spend
|
||||
)
|
||||
|
||||
print_verbose(
|
||||
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
|
||||
|
@ -402,24 +383,82 @@ class PrometheusLogger(CustomLogger):
|
|||
|
||||
user_api_key = hash_token(user_api_key)
|
||||
|
||||
self.litellm_requests_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
self.litellm_spend_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(response_cost)
|
||||
# increment total LLM requests and spend metric
|
||||
self._increment_top_level_request_and_spend_metrics(
|
||||
end_user_id=end_user_id,
|
||||
user_api_key=user_api_key,
|
||||
user_api_key_alias=user_api_key_alias,
|
||||
model=model,
|
||||
user_api_team=user_api_team,
|
||||
user_api_team_alias=user_api_team_alias,
|
||||
user_id=user_id,
|
||||
response_cost=response_cost,
|
||||
)
|
||||
|
||||
# input, output, total token metrics
|
||||
self._increment_token_metrics(
|
||||
# why type ignore below?
|
||||
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
|
||||
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
|
||||
standard_logging_payload=standard_logging_payload, # type: ignore
|
||||
end_user_id=end_user_id,
|
||||
user_api_key=user_api_key,
|
||||
user_api_key_alias=user_api_key_alias,
|
||||
model=model,
|
||||
user_api_team=user_api_team,
|
||||
user_api_team_alias=user_api_team_alias,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
# remaining budget metrics
|
||||
self._increment_remaining_budget_metrics(
|
||||
user_api_team=user_api_team,
|
||||
user_api_team_alias=user_api_team_alias,
|
||||
user_api_key=user_api_key,
|
||||
user_api_key_alias=user_api_key_alias,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
# set proxy virtual key rpm/tpm metrics
|
||||
self._set_virtual_key_rate_limit_metrics(
|
||||
user_api_key=user_api_key,
|
||||
user_api_key_alias=user_api_key_alias,
|
||||
kwargs=kwargs,
|
||||
metadata=_metadata,
|
||||
)
|
||||
|
||||
# set latency metrics
|
||||
self._set_latency_metrics(
|
||||
kwargs=kwargs,
|
||||
model=model,
|
||||
user_api_key=user_api_key,
|
||||
user_api_key_alias=user_api_key_alias,
|
||||
user_api_team=user_api_team,
|
||||
user_api_team_alias=user_api_team_alias,
|
||||
# why type ignore below?
|
||||
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
|
||||
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
|
||||
standard_logging_payload=standard_logging_payload, # type: ignore
|
||||
)
|
||||
|
||||
# set x-ratelimit headers
|
||||
self.set_llm_deployment_success_metrics(
|
||||
kwargs, start_time, end_time, output_tokens
|
||||
)
|
||||
pass
|
||||
|
||||
def _increment_token_metrics(
|
||||
self,
|
||||
standard_logging_payload: StandardLoggingPayload,
|
||||
end_user_id: Optional[str],
|
||||
user_api_key: Optional[str],
|
||||
user_api_key_alias: Optional[str],
|
||||
model: Optional[str],
|
||||
user_api_team: Optional[str],
|
||||
user_api_team_alias: Optional[str],
|
||||
user_id: Optional[str],
|
||||
):
|
||||
# token metrics
|
||||
self.litellm_tokens_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
|
@ -450,6 +489,34 @@ class PrometheusLogger(CustomLogger):
|
|||
user_id,
|
||||
).inc(standard_logging_payload["completion_tokens"])
|
||||
|
||||
def _increment_remaining_budget_metrics(
|
||||
self,
|
||||
user_api_team: Optional[str],
|
||||
user_api_team_alias: Optional[str],
|
||||
user_api_key: Optional[str],
|
||||
user_api_key_alias: Optional[str],
|
||||
litellm_params: dict,
|
||||
):
|
||||
_team_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_spend", None
|
||||
)
|
||||
_team_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_max_budget", None
|
||||
)
|
||||
_remaining_team_budget = self._safe_get_remaining_budget(
|
||||
max_budget=_team_max_budget, spend=_team_spend
|
||||
)
|
||||
|
||||
_api_key_spend = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_spend", None
|
||||
)
|
||||
_api_key_max_budget = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_max_budget", None
|
||||
)
|
||||
_remaining_api_key_budget = self._safe_get_remaining_budget(
|
||||
max_budget=_api_key_max_budget, spend=_api_key_spend
|
||||
)
|
||||
# Remaining Budget Metrics
|
||||
self.litellm_remaining_team_budget_metric.labels(
|
||||
user_api_team, user_api_team_alias
|
||||
).set(_remaining_team_budget)
|
||||
|
@ -458,6 +525,47 @@ class PrometheusLogger(CustomLogger):
|
|||
user_api_key, user_api_key_alias
|
||||
).set(_remaining_api_key_budget)
|
||||
|
||||
def _increment_top_level_request_and_spend_metrics(
|
||||
self,
|
||||
end_user_id: Optional[str],
|
||||
user_api_key: Optional[str],
|
||||
user_api_key_alias: Optional[str],
|
||||
model: Optional[str],
|
||||
user_api_team: Optional[str],
|
||||
user_api_team_alias: Optional[str],
|
||||
user_id: Optional[str],
|
||||
response_cost: float,
|
||||
):
|
||||
self.litellm_requests_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc()
|
||||
self.litellm_spend_metric.labels(
|
||||
end_user_id,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
model,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
user_id,
|
||||
).inc(response_cost)
|
||||
|
||||
def _set_virtual_key_rate_limit_metrics(
|
||||
self,
|
||||
user_api_key: Optional[str],
|
||||
user_api_key_alias: Optional[str],
|
||||
kwargs: dict,
|
||||
metadata: dict,
|
||||
):
|
||||
from litellm.proxy.common_utils.callback_utils import (
|
||||
get_model_group_from_litellm_kwargs,
|
||||
)
|
||||
|
||||
# Set remaining rpm/tpm for API Key + model
|
||||
# see parallel_request_limiter.py - variables are set there
|
||||
model_group = get_model_group_from_litellm_kwargs(kwargs)
|
||||
|
@ -466,10 +574,8 @@ class PrometheusLogger(CustomLogger):
|
|||
)
|
||||
remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
|
||||
|
||||
remaining_requests = _metadata.get(
|
||||
remaining_requests_variable_name, sys.maxsize
|
||||
)
|
||||
remaining_tokens = _metadata.get(remaining_tokens_variable_name, sys.maxsize)
|
||||
remaining_requests = metadata.get(remaining_requests_variable_name, sys.maxsize)
|
||||
remaining_tokens = metadata.get(remaining_tokens_variable_name, sys.maxsize)
|
||||
|
||||
self.litellm_remaining_api_key_requests_for_model.labels(
|
||||
user_api_key, user_api_key_alias, model_group
|
||||
|
@ -479,9 +585,20 @@ class PrometheusLogger(CustomLogger):
|
|||
user_api_key, user_api_key_alias, model_group
|
||||
).set(remaining_tokens)
|
||||
|
||||
def _set_latency_metrics(
|
||||
self,
|
||||
kwargs: dict,
|
||||
model: Optional[str],
|
||||
user_api_key: Optional[str],
|
||||
user_api_key_alias: Optional[str],
|
||||
user_api_team: Optional[str],
|
||||
user_api_team_alias: Optional[str],
|
||||
standard_logging_payload: StandardLoggingPayload,
|
||||
):
|
||||
# latency metrics
|
||||
total_time: timedelta = kwargs.get("end_time") - kwargs.get("start_time")
|
||||
total_time_seconds = total_time.total_seconds()
|
||||
model_parameters: dict = standard_logging_payload["model_parameters"]
|
||||
end_time: datetime = kwargs.get("end_time") or datetime.now()
|
||||
start_time: Optional[datetime] = kwargs.get("start_time")
|
||||
api_call_start_time = kwargs.get("api_call_start_time", None)
|
||||
|
||||
completion_start_time = kwargs.get("completion_start_time", None)
|
||||
|
@ -509,9 +626,7 @@ class PrometheusLogger(CustomLogger):
|
|||
if api_call_start_time is not None and isinstance(
|
||||
api_call_start_time, datetime
|
||||
):
|
||||
api_call_total_time: timedelta = (
|
||||
kwargs.get("end_time") - api_call_start_time
|
||||
)
|
||||
api_call_total_time: timedelta = end_time - api_call_start_time
|
||||
api_call_total_time_seconds = api_call_total_time.total_seconds()
|
||||
self.litellm_llm_api_latency_metric.labels(
|
||||
model,
|
||||
|
@ -521,20 +636,17 @@ class PrometheusLogger(CustomLogger):
|
|||
user_api_team_alias,
|
||||
).observe(api_call_total_time_seconds)
|
||||
|
||||
# log metrics
|
||||
self.litellm_request_total_latency_metric.labels(
|
||||
model,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
).observe(total_time_seconds)
|
||||
|
||||
# set x-ratelimit headers
|
||||
self.set_llm_deployment_success_metrics(
|
||||
kwargs, start_time, end_time, output_tokens
|
||||
)
|
||||
pass
|
||||
# total request latency
|
||||
if start_time is not None and isinstance(start_time, datetime):
|
||||
total_time: timedelta = end_time - start_time
|
||||
total_time_seconds = total_time.total_seconds()
|
||||
self.litellm_request_total_latency_metric.labels(
|
||||
model,
|
||||
user_api_key,
|
||||
user_api_key_alias,
|
||||
user_api_team,
|
||||
user_api_team_alias,
|
||||
).observe(total_time_seconds)
|
||||
|
||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
@ -651,24 +763,31 @@ class PrometheusLogger(CustomLogger):
|
|||
pass
|
||||
|
||||
def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
|
||||
"""
|
||||
Sets Failure metrics when an LLM API call fails
|
||||
|
||||
- mark the deployment as partial outage
|
||||
- increment deployment failure responses metric
|
||||
- increment deployment total requests metric
|
||||
|
||||
Args:
|
||||
request_kwargs: dict
|
||||
|
||||
"""
|
||||
try:
|
||||
verbose_logger.debug("setting remaining tokens requests metric")
|
||||
standard_logging_payload: StandardLoggingPayload = request_kwargs.get(
|
||||
"standard_logging_object", {}
|
||||
)
|
||||
_response_headers = request_kwargs.get("response_headers")
|
||||
_litellm_params = request_kwargs.get("litellm_params", {}) or {}
|
||||
_metadata = _litellm_params.get("metadata", {})
|
||||
litellm_model_name = request_kwargs.get("model", None)
|
||||
api_base = _metadata.get("api_base", None)
|
||||
model_group = _metadata.get("model_group", None)
|
||||
if api_base is None:
|
||||
api_base = _litellm_params.get("api_base", None)
|
||||
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
||||
_model_info = _metadata.get("model_info") or {}
|
||||
model_id = _model_info.get("id", None)
|
||||
model_group = standard_logging_payload.get("model_group", None)
|
||||
api_base = standard_logging_payload.get("api_base", None)
|
||||
model_id = standard_logging_payload.get("model_id", None)
|
||||
exception: Exception = request_kwargs.get("exception", None)
|
||||
|
||||
llm_provider = _litellm_params.get("custom_llm_provider", None)
|
||||
|
||||
"""
|
||||
log these labels
|
||||
["litellm_model_name", "model_id", "api_base", "api_provider"]
|
||||
|
@ -891,7 +1010,7 @@ class PrometheusLogger(CustomLogger):
|
|||
"""
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
StandardLoggingMetadata,
|
||||
get_standard_logging_metadata,
|
||||
StandardLoggingPayloadSetup,
|
||||
)
|
||||
|
||||
verbose_logger.debug(
|
||||
|
@ -900,8 +1019,10 @@ class PrometheusLogger(CustomLogger):
|
|||
kwargs,
|
||||
)
|
||||
_metadata = kwargs.get("metadata", {})
|
||||
standard_metadata: StandardLoggingMetadata = get_standard_logging_metadata(
|
||||
metadata=_metadata
|
||||
standard_metadata: StandardLoggingMetadata = (
|
||||
StandardLoggingPayloadSetup.get_standard_logging_metadata(
|
||||
metadata=_metadata
|
||||
)
|
||||
)
|
||||
_new_model = kwargs.get("model")
|
||||
self.litellm_deployment_successful_fallbacks.labels(
|
||||
|
@ -923,7 +1044,7 @@ class PrometheusLogger(CustomLogger):
|
|||
"""
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
StandardLoggingMetadata,
|
||||
get_standard_logging_metadata,
|
||||
StandardLoggingPayloadSetup,
|
||||
)
|
||||
|
||||
verbose_logger.debug(
|
||||
|
@ -933,8 +1054,10 @@ class PrometheusLogger(CustomLogger):
|
|||
)
|
||||
_new_model = kwargs.get("model")
|
||||
_metadata = kwargs.get("metadata", {})
|
||||
standard_metadata: StandardLoggingMetadata = get_standard_logging_metadata(
|
||||
metadata=_metadata
|
||||
standard_metadata: StandardLoggingMetadata = (
|
||||
StandardLoggingPayloadSetup.get_standard_logging_metadata(
|
||||
metadata=_metadata
|
||||
)
|
||||
)
|
||||
self.litellm_deployment_failed_fallbacks.labels(
|
||||
requested_model=original_model_group,
|
||||
|
@ -951,8 +1074,8 @@ class PrometheusLogger(CustomLogger):
|
|||
self,
|
||||
state: int,
|
||||
litellm_model_name: str,
|
||||
model_id: str,
|
||||
api_base: str,
|
||||
model_id: Optional[str],
|
||||
api_base: Optional[str],
|
||||
api_provider: str,
|
||||
):
|
||||
self.litellm_deployment_state.labels(
|
||||
|
@ -973,8 +1096,8 @@ class PrometheusLogger(CustomLogger):
|
|||
def set_deployment_partial_outage(
|
||||
self,
|
||||
litellm_model_name: str,
|
||||
model_id: str,
|
||||
api_base: str,
|
||||
model_id: Optional[str],
|
||||
api_base: Optional[str],
|
||||
api_provider: str,
|
||||
):
|
||||
self.set_litellm_deployment_state(
|
||||
|
@ -984,8 +1107,8 @@ class PrometheusLogger(CustomLogger):
|
|||
def set_deployment_complete_outage(
|
||||
self,
|
||||
litellm_model_name: str,
|
||||
model_id: str,
|
||||
api_base: str,
|
||||
model_id: Optional[str],
|
||||
api_base: Optional[str],
|
||||
api_provider: str,
|
||||
):
|
||||
self.set_litellm_deployment_state(
|
||||
|
@ -1007,14 +1130,13 @@ class PrometheusLogger(CustomLogger):
|
|||
litellm_model_name, model_id, api_base, api_provider, exception_status
|
||||
).inc()
|
||||
|
||||
def _safe_get_remaining_budget(
|
||||
self, max_budget: Optional[float], spend: Optional[float]
|
||||
) -> float:
|
||||
if max_budget is None:
|
||||
return float("inf")
|
||||
|
||||
def safe_get_remaining_budget(
|
||||
max_budget: Optional[float], spend: Optional[float]
|
||||
) -> float:
|
||||
if max_budget is None:
|
||||
return float("inf")
|
||||
if spend is None:
|
||||
return max_budget
|
||||
|
||||
if spend is None:
|
||||
return max_budget
|
||||
|
||||
return max_budget - spend
|
||||
return max_budget - spend
|
||||
|
|
|
@ -333,6 +333,14 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915
|
|||
api_key: Optional[str],
|
||||
dynamic_api_key: Optional[str],
|
||||
) -> Tuple[str, str, Optional[str], Optional[str]]:
|
||||
"""
|
||||
Returns:
|
||||
Tuple[str, str, Optional[str], Optional[str]]:
|
||||
model: str
|
||||
custom_llm_provider: str
|
||||
dynamic_api_key: Optional[str]
|
||||
api_base: Optional[str]
|
||||
"""
|
||||
custom_llm_provider = model.split("/", 1)[0]
|
||||
model = model.split("/", 1)[1]
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ import time
|
|||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime as dt_object
|
||||
from typing import Any, Callable, Dict, List, Literal, Optional, Union
|
||||
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -51,6 +51,7 @@ from litellm.types.utils import (
|
|||
StandardPassThroughResponseObject,
|
||||
TextCompletionResponse,
|
||||
TranscriptionResponse,
|
||||
Usage,
|
||||
)
|
||||
from litellm.utils import (
|
||||
_get_base_model_from_metadata,
|
||||
|
@ -58,22 +59,21 @@ from litellm.utils import (
|
|||
prompt_token_calculator,
|
||||
)
|
||||
|
||||
from ..integrations.aispend import AISpendLogger
|
||||
from ..integrations.argilla import ArgillaLogger
|
||||
from ..integrations.arize_ai import ArizeLogger
|
||||
from ..integrations.athina import AthinaLogger
|
||||
from ..integrations.berrispend import BerriSpendLogger
|
||||
from ..integrations.braintrust_logging import BraintrustLogger
|
||||
from ..integrations.clickhouse import ClickhouseLogger
|
||||
from ..integrations.datadog.datadog import DataDogLogger
|
||||
from ..integrations.datadog.datadog_llm_obs import DataDogLLMObsLogger
|
||||
from ..integrations.dynamodb import DyanmoDBLogger
|
||||
from ..integrations.galileo import GalileoObserve
|
||||
from ..integrations.gcs_bucket.gcs_bucket import GCSBucketLogger
|
||||
from ..integrations.greenscale import GreenscaleLogger
|
||||
from ..integrations.helicone import HeliconeLogger
|
||||
from ..integrations.lago import LagoLogger
|
||||
from ..integrations.langfuse import LangFuseLogger
|
||||
from ..integrations.langfuse.langfuse import LangFuseLogger
|
||||
from ..integrations.langfuse.langfuse_handler import LangFuseHandler
|
||||
from ..integrations.langsmith import LangsmithLogger
|
||||
from ..integrations.litedebugger import LiteDebugger
|
||||
from ..integrations.literal_ai import LiteralAILogger
|
||||
from ..integrations.logfire_logger import LogfireLevel, LogfireLogger
|
||||
from ..integrations.lunary import LunaryLogger
|
||||
|
@ -122,13 +122,9 @@ prometheusLogger = None
|
|||
dynamoLogger = None
|
||||
s3Logger = None
|
||||
genericAPILogger = None
|
||||
clickHouseLogger = None
|
||||
greenscaleLogger = None
|
||||
lunaryLogger = None
|
||||
aispendLogger = None
|
||||
berrispendLogger = None
|
||||
supabaseClient = None
|
||||
liteDebuggerClient = None
|
||||
callback_list: Optional[List[str]] = []
|
||||
user_logger_fn = None
|
||||
additional_details: Optional[Dict[str, str]] = {}
|
||||
|
@ -191,7 +187,7 @@ in_memory_dynamic_logger_cache = DynamicLoggingCache()
|
|||
|
||||
|
||||
class Logging:
|
||||
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
|
||||
global supabaseClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
|
||||
custom_pricing: bool = False
|
||||
stream_options = None
|
||||
|
||||
|
@ -970,22 +966,6 @@ class Logging:
|
|||
):
|
||||
print_verbose("no-log request, skipping logging")
|
||||
continue
|
||||
if callback == "lite_debugger" and liteDebuggerClient is not None:
|
||||
print_verbose("reaches lite_debugger for logging!")
|
||||
print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
|
||||
print_verbose(
|
||||
f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}"
|
||||
)
|
||||
liteDebuggerClient.log_event(
|
||||
end_user=kwargs.get("user", "default"),
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
litellm_call_id=self.litellm_call_id,
|
||||
print_verbose=print_verbose,
|
||||
call_type=self.call_type,
|
||||
stream=self.stream,
|
||||
)
|
||||
if callback == "promptlayer" and promptLayerLogger is not None:
|
||||
print_verbose("reaches promptlayer for logging!")
|
||||
promptLayerLogger.log_event(
|
||||
|
@ -1136,74 +1116,13 @@ class Logging:
|
|||
print_verbose("reaches langfuse for streaming logging!")
|
||||
result = kwargs["complete_streaming_response"]
|
||||
|
||||
temp_langfuse_logger = langFuseLogger
|
||||
if langFuseLogger is None or (
|
||||
(
|
||||
self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
)
|
||||
is not None
|
||||
and self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
)
|
||||
!= langFuseLogger.public_key
|
||||
)
|
||||
or (
|
||||
self.standard_callback_dynamic_params.get(
|
||||
"langfuse_secret"
|
||||
)
|
||||
is not None
|
||||
and self.standard_callback_dynamic_params.get(
|
||||
"langfuse_secret"
|
||||
)
|
||||
!= langFuseLogger.secret_key
|
||||
)
|
||||
or (
|
||||
self.standard_callback_dynamic_params.get(
|
||||
"langfuse_host"
|
||||
)
|
||||
is not None
|
||||
and self.standard_callback_dynamic_params.get(
|
||||
"langfuse_host"
|
||||
)
|
||||
!= langFuseLogger.langfuse_host
|
||||
)
|
||||
):
|
||||
credentials = {
|
||||
"langfuse_public_key": self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
),
|
||||
"langfuse_secret": self.standard_callback_dynamic_params.get(
|
||||
"langfuse_secret"
|
||||
),
|
||||
"langfuse_host": self.standard_callback_dynamic_params.get(
|
||||
"langfuse_host"
|
||||
),
|
||||
}
|
||||
temp_langfuse_logger = (
|
||||
in_memory_dynamic_logger_cache.get_cache(
|
||||
credentials=credentials, service_name="langfuse"
|
||||
)
|
||||
)
|
||||
if temp_langfuse_logger is None:
|
||||
temp_langfuse_logger = LangFuseLogger(
|
||||
langfuse_public_key=self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
),
|
||||
langfuse_secret=self.standard_callback_dynamic_params.get(
|
||||
"langfuse_secret"
|
||||
),
|
||||
langfuse_host=self.standard_callback_dynamic_params.get(
|
||||
"langfuse_host"
|
||||
),
|
||||
)
|
||||
in_memory_dynamic_logger_cache.set_cache(
|
||||
credentials=credentials,
|
||||
service_name="langfuse",
|
||||
logging_obj=temp_langfuse_logger,
|
||||
)
|
||||
if temp_langfuse_logger is not None:
|
||||
_response = temp_langfuse_logger.log_event(
|
||||
langfuse_logger_to_use = LangFuseHandler.get_langfuse_logger_for_request(
|
||||
globalLangfuseLogger=langFuseLogger,
|
||||
standard_callback_dynamic_params=self.standard_callback_dynamic_params,
|
||||
in_memory_dynamic_logger_cache=in_memory_dynamic_logger_cache,
|
||||
)
|
||||
if langfuse_logger_to_use is not None:
|
||||
_response = langfuse_logger_to_use.log_event(
|
||||
kwargs=kwargs,
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
|
@ -1248,37 +1167,6 @@ class Logging:
|
|||
user_id=kwargs.get("user", None),
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if callback == "clickhouse":
|
||||
global clickHouseLogger
|
||||
verbose_logger.debug("reaches clickhouse for success logging!")
|
||||
kwargs = {}
|
||||
for k, v in self.model_call_details.items():
|
||||
if (
|
||||
k != "original_response"
|
||||
): # copy.deepcopy raises errors as this could be a coroutine
|
||||
kwargs[k] = v
|
||||
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
||||
if self.stream:
|
||||
verbose_logger.debug(
|
||||
f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
|
||||
)
|
||||
if complete_streaming_response is None:
|
||||
continue
|
||||
else:
|
||||
print_verbose(
|
||||
"reaches clickhouse for streaming logging!"
|
||||
)
|
||||
result = kwargs["complete_streaming_response"]
|
||||
if clickHouseLogger is None:
|
||||
clickHouseLogger = ClickhouseLogger()
|
||||
clickHouseLogger.log_event(
|
||||
kwargs=kwargs,
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
user_id=kwargs.get("user", None),
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if callback == "greenscale" and greenscaleLogger is not None:
|
||||
kwargs = {}
|
||||
for k, v in self.model_call_details.items():
|
||||
|
@ -1874,9 +1762,7 @@ class Logging:
|
|||
)
|
||||
for callback in callbacks:
|
||||
try:
|
||||
if callback == "lite_debugger" and liteDebuggerClient is not None:
|
||||
pass
|
||||
elif callback == "lunary" and lunaryLogger is not None:
|
||||
if callback == "lunary" and lunaryLogger is not None:
|
||||
print_verbose("reaches lunary for logging error!")
|
||||
|
||||
model = self.model
|
||||
|
@ -1962,50 +1848,12 @@ class Logging:
|
|||
): # copy.deepcopy raises errors as this could be a coroutine
|
||||
kwargs[k] = v
|
||||
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
||||
if langFuseLogger is None or (
|
||||
(
|
||||
self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
)
|
||||
is not None
|
||||
and self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
)
|
||||
!= langFuseLogger.public_key
|
||||
)
|
||||
or (
|
||||
self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
)
|
||||
is not None
|
||||
and self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
)
|
||||
!= langFuseLogger.public_key
|
||||
)
|
||||
or (
|
||||
self.standard_callback_dynamic_params.get(
|
||||
"langfuse_host"
|
||||
)
|
||||
is not None
|
||||
and self.standard_callback_dynamic_params.get(
|
||||
"langfuse_host"
|
||||
)
|
||||
!= langFuseLogger.langfuse_host
|
||||
)
|
||||
):
|
||||
langFuseLogger = LangFuseLogger(
|
||||
langfuse_public_key=self.standard_callback_dynamic_params.get(
|
||||
"langfuse_public_key"
|
||||
),
|
||||
langfuse_secret=self.standard_callback_dynamic_params.get(
|
||||
"langfuse_secret"
|
||||
),
|
||||
langfuse_host=self.standard_callback_dynamic_params.get(
|
||||
"langfuse_host"
|
||||
),
|
||||
)
|
||||
_response = langFuseLogger.log_event(
|
||||
langfuse_logger_to_use = LangFuseHandler.get_langfuse_logger_for_request(
|
||||
globalLangfuseLogger=langFuseLogger,
|
||||
standard_callback_dynamic_params=self.standard_callback_dynamic_params,
|
||||
in_memory_dynamic_logger_cache=in_memory_dynamic_logger_cache,
|
||||
)
|
||||
_response = langfuse_logger_to_use.log_event(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
response_obj=None,
|
||||
|
@ -2195,7 +2043,7 @@ def set_callbacks(callback_list, function_id=None): # noqa: PLR0915
|
|||
"""
|
||||
Globally sets the callback client
|
||||
"""
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, supabaseClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
||||
|
||||
try:
|
||||
for callback in callback_list:
|
||||
|
@ -2275,26 +2123,12 @@ def set_callbacks(callback_list, function_id=None): # noqa: PLR0915
|
|||
weightsBiasesLogger = WeightsBiasesLogger()
|
||||
elif callback == "logfire":
|
||||
logfireLogger = LogfireLogger()
|
||||
elif callback == "aispend":
|
||||
aispendLogger = AISpendLogger()
|
||||
elif callback == "berrispend":
|
||||
berrispendLogger = BerriSpendLogger()
|
||||
elif callback == "supabase":
|
||||
print_verbose("instantiating supabase")
|
||||
supabaseClient = Supabase()
|
||||
elif callback == "greenscale":
|
||||
greenscaleLogger = GreenscaleLogger()
|
||||
print_verbose("Initialized Greenscale Logger")
|
||||
elif callback == "lite_debugger":
|
||||
print_verbose("instantiating lite_debugger")
|
||||
if function_id:
|
||||
liteDebuggerClient = LiteDebugger(email=function_id)
|
||||
elif litellm.token:
|
||||
liteDebuggerClient = LiteDebugger(email=litellm.token)
|
||||
elif litellm.email:
|
||||
liteDebuggerClient = LiteDebugger(email=litellm.email)
|
||||
else:
|
||||
liteDebuggerClient = LiteDebugger(email=str(uuid.uuid4()))
|
||||
elif callable(callback):
|
||||
customLogger = CustomLogger()
|
||||
except Exception as e:
|
||||
|
@ -2372,6 +2206,10 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
|||
_datadog_logger = DataDogLogger()
|
||||
_in_memory_loggers.append(_datadog_logger)
|
||||
return _datadog_logger # type: ignore
|
||||
elif logging_integration == "datadog_llm_observability":
|
||||
_datadog_llm_obs_logger = DataDogLLMObsLogger()
|
||||
_in_memory_loggers.append(_datadog_llm_obs_logger)
|
||||
return _datadog_llm_obs_logger # type: ignore
|
||||
elif logging_integration == "gcs_bucket":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, GCSBucketLogger):
|
||||
|
@ -2389,22 +2227,16 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
|||
_in_memory_loggers.append(_opik_logger)
|
||||
return _opik_logger # type: ignore
|
||||
elif logging_integration == "arize":
|
||||
if "ARIZE_SPACE_KEY" not in os.environ:
|
||||
raise ValueError("ARIZE_SPACE_KEY not found in environment variables")
|
||||
if "ARIZE_API_KEY" not in os.environ:
|
||||
raise ValueError("ARIZE_API_KEY not found in environment variables")
|
||||
from litellm.integrations.opentelemetry import (
|
||||
OpenTelemetry,
|
||||
OpenTelemetryConfig,
|
||||
)
|
||||
|
||||
arize_endpoint = (
|
||||
os.environ.get("ARIZE_ENDPOINT", None) or "https://otlp.arize.com/v1"
|
||||
)
|
||||
otel_config = OpenTelemetryConfig(
|
||||
exporter="otlp_grpc",
|
||||
endpoint=arize_endpoint,
|
||||
)
|
||||
otel_config = ArizeLogger.get_arize_opentelemetry_config()
|
||||
if otel_config is None:
|
||||
raise ValueError(
|
||||
"No valid endpoint found for Arize, please set 'ARIZE_ENDPOINT' to your GRPC endpoint or 'ARIZE_HTTP_ENDPOINT' to your HTTP endpoint"
|
||||
)
|
||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
||||
f"space_key={os.getenv('ARIZE_SPACE_KEY')},api_key={os.getenv('ARIZE_API_KEY')}"
|
||||
)
|
||||
|
@ -2417,7 +2249,6 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
|||
_otel_logger = OpenTelemetry(config=otel_config, callback_name="arize")
|
||||
_in_memory_loggers.append(_otel_logger)
|
||||
return _otel_logger # type: ignore
|
||||
|
||||
elif logging_integration == "otel":
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
|
||||
|
@ -2546,6 +2377,10 @@ def get_custom_logger_compatible_class(
|
|||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, DataDogLogger):
|
||||
return callback
|
||||
elif logging_integration == "datadog_llm_observability":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, DataDogLLMObsLogger):
|
||||
return callback
|
||||
elif logging_integration == "gcs_bucket":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, GCSBucketLogger):
|
||||
|
@ -2629,7 +2464,184 @@ def is_valid_sha256_hash(value: str) -> bool:
|
|||
return bool(re.fullmatch(r"[a-fA-F0-9]{64}", value))
|
||||
|
||||
|
||||
def get_standard_logging_object_payload( # noqa: PLR0915
|
||||
class StandardLoggingPayloadSetup:
|
||||
@staticmethod
|
||||
def cleanup_timestamps(
|
||||
start_time: Union[dt_object, float],
|
||||
end_time: Union[dt_object, float],
|
||||
completion_start_time: Union[dt_object, float],
|
||||
) -> Tuple[float, float, float]:
|
||||
"""
|
||||
Convert datetime objects to floats
|
||||
"""
|
||||
|
||||
if isinstance(start_time, datetime.datetime):
|
||||
start_time_float = start_time.timestamp()
|
||||
elif isinstance(start_time, float):
|
||||
start_time_float = start_time
|
||||
else:
|
||||
raise ValueError(
|
||||
f"start_time is required, got={start_time} of type {type(start_time)}"
|
||||
)
|
||||
|
||||
if isinstance(end_time, datetime.datetime):
|
||||
end_time_float = end_time.timestamp()
|
||||
elif isinstance(end_time, float):
|
||||
end_time_float = end_time
|
||||
else:
|
||||
raise ValueError(
|
||||
f"end_time is required, got={end_time} of type {type(end_time)}"
|
||||
)
|
||||
|
||||
if isinstance(completion_start_time, datetime.datetime):
|
||||
completion_start_time_float = completion_start_time.timestamp()
|
||||
elif isinstance(completion_start_time, float):
|
||||
completion_start_time_float = completion_start_time
|
||||
else:
|
||||
completion_start_time_float = end_time_float
|
||||
|
||||
return start_time_float, end_time_float, completion_start_time_float
|
||||
|
||||
@staticmethod
|
||||
def get_standard_logging_metadata(
|
||||
metadata: Optional[Dict[str, Any]]
|
||||
) -> StandardLoggingMetadata:
|
||||
"""
|
||||
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
|
||||
|
||||
Args:
|
||||
metadata (Optional[Dict[str, Any]]): The original metadata dictionary.
|
||||
|
||||
Returns:
|
||||
StandardLoggingMetadata: A StandardLoggingMetadata object containing the cleaned metadata.
|
||||
|
||||
Note:
|
||||
- If the input metadata is None or not a dictionary, an empty StandardLoggingMetadata object is returned.
|
||||
- If 'user_api_key' is present in metadata and is a valid SHA256 hash, it's stored as 'user_api_key_hash'.
|
||||
"""
|
||||
# Initialize with default values
|
||||
clean_metadata = StandardLoggingMetadata(
|
||||
user_api_key_hash=None,
|
||||
user_api_key_alias=None,
|
||||
user_api_key_team_id=None,
|
||||
user_api_key_org_id=None,
|
||||
user_api_key_user_id=None,
|
||||
user_api_key_team_alias=None,
|
||||
spend_logs_metadata=None,
|
||||
requester_ip_address=None,
|
||||
requester_metadata=None,
|
||||
)
|
||||
if isinstance(metadata, dict):
|
||||
# Filter the metadata dictionary to include only the specified keys
|
||||
clean_metadata = StandardLoggingMetadata(
|
||||
**{ # type: ignore
|
||||
key: metadata[key]
|
||||
for key in StandardLoggingMetadata.__annotations__.keys()
|
||||
if key in metadata
|
||||
}
|
||||
)
|
||||
|
||||
if metadata.get("user_api_key") is not None:
|
||||
if is_valid_sha256_hash(str(metadata.get("user_api_key"))):
|
||||
clean_metadata["user_api_key_hash"] = metadata.get(
|
||||
"user_api_key"
|
||||
) # this is the hash
|
||||
return clean_metadata
|
||||
|
||||
@staticmethod
|
||||
def get_usage_from_response_obj(response_obj: Optional[dict]) -> Usage:
|
||||
## BASE CASE ##
|
||||
if response_obj is None:
|
||||
return Usage(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
)
|
||||
|
||||
usage = response_obj.get("usage", None) or {}
|
||||
if usage is None or (
|
||||
not isinstance(usage, dict) and not isinstance(usage, Usage)
|
||||
):
|
||||
return Usage(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
)
|
||||
elif isinstance(usage, Usage):
|
||||
return usage
|
||||
elif isinstance(usage, dict):
|
||||
return Usage(**usage)
|
||||
|
||||
raise ValueError(f"usage is required, got={usage} of type {type(usage)}")
|
||||
|
||||
@staticmethod
|
||||
def get_model_cost_information(
|
||||
base_model: Optional[str],
|
||||
custom_pricing: Optional[bool],
|
||||
custom_llm_provider: Optional[str],
|
||||
init_response_obj: Union[Any, BaseModel, dict],
|
||||
) -> StandardLoggingModelInformation:
|
||||
|
||||
model_cost_name = _select_model_name_for_cost_calc(
|
||||
model=None,
|
||||
completion_response=init_response_obj, # type: ignore
|
||||
base_model=base_model,
|
||||
custom_pricing=custom_pricing,
|
||||
)
|
||||
if model_cost_name is None:
|
||||
model_cost_information = StandardLoggingModelInformation(
|
||||
model_map_key="", model_map_value=None
|
||||
)
|
||||
else:
|
||||
try:
|
||||
_model_cost_information = litellm.get_model_info(
|
||||
model=model_cost_name, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
model_cost_information = StandardLoggingModelInformation(
|
||||
model_map_key=model_cost_name,
|
||||
model_map_value=_model_cost_information,
|
||||
)
|
||||
except Exception:
|
||||
verbose_logger.debug( # keep in debug otherwise it will trigger on every call
|
||||
"Model={} is not mapped in model cost map. Defaulting to None model_cost_information for standard_logging_payload".format(
|
||||
model_cost_name
|
||||
)
|
||||
)
|
||||
model_cost_information = StandardLoggingModelInformation(
|
||||
model_map_key=model_cost_name, model_map_value=None
|
||||
)
|
||||
return model_cost_information
|
||||
|
||||
@staticmethod
|
||||
def get_final_response_obj(
|
||||
response_obj: dict, init_response_obj: Union[Any, BaseModel, dict], kwargs: dict
|
||||
) -> Optional[Union[dict, str, list]]:
|
||||
"""
|
||||
Get final response object after redacting the message input/output from logging
|
||||
"""
|
||||
if response_obj is not None:
|
||||
final_response_obj: Optional[Union[dict, str, list]] = response_obj
|
||||
elif isinstance(init_response_obj, list) or isinstance(init_response_obj, str):
|
||||
final_response_obj = init_response_obj
|
||||
else:
|
||||
final_response_obj = None
|
||||
|
||||
modified_final_response_obj = redact_message_input_output_from_logging(
|
||||
model_call_details=kwargs,
|
||||
result=final_response_obj,
|
||||
)
|
||||
|
||||
if modified_final_response_obj is not None and isinstance(
|
||||
modified_final_response_obj, BaseModel
|
||||
):
|
||||
final_response_obj = modified_final_response_obj.model_dump()
|
||||
else:
|
||||
final_response_obj = modified_final_response_obj
|
||||
|
||||
return final_response_obj
|
||||
|
||||
|
||||
def get_standard_logging_object_payload(
|
||||
kwargs: Optional[dict],
|
||||
init_response_obj: Union[Any, BaseModel, dict],
|
||||
start_time: dt_object,
|
||||
|
@ -2677,9 +2689,9 @@ def get_standard_logging_object_payload( # noqa: PLR0915
|
|||
completion_start_time = kwargs.get("completion_start_time", end_time)
|
||||
call_type = kwargs.get("call_type")
|
||||
cache_hit = kwargs.get("cache_hit", False)
|
||||
usage = response_obj.get("usage", None) or {}
|
||||
if type(usage) is litellm.Usage:
|
||||
usage = dict(usage)
|
||||
usage = StandardLoggingPayloadSetup.get_usage_from_response_obj(
|
||||
response_obj=response_obj
|
||||
)
|
||||
id = response_obj.get("id", kwargs.get("litellm_call_id"))
|
||||
|
||||
_model_id = metadata.get("model_info", {}).get("id", "")
|
||||
|
@ -2692,20 +2704,13 @@ def get_standard_logging_object_payload( # noqa: PLR0915
|
|||
)
|
||||
|
||||
# cleanup timestamps
|
||||
if isinstance(start_time, datetime.datetime):
|
||||
start_time_float = start_time.timestamp()
|
||||
elif isinstance(start_time, float):
|
||||
start_time_float = start_time
|
||||
if isinstance(end_time, datetime.datetime):
|
||||
end_time_float = end_time.timestamp()
|
||||
elif isinstance(end_time, float):
|
||||
end_time_float = end_time
|
||||
if isinstance(completion_start_time, datetime.datetime):
|
||||
completion_start_time_float = completion_start_time.timestamp()
|
||||
elif isinstance(completion_start_time, float):
|
||||
completion_start_time_float = completion_start_time
|
||||
else:
|
||||
completion_start_time_float = end_time_float
|
||||
start_time_float, end_time_float, completion_start_time_float = (
|
||||
StandardLoggingPayloadSetup.cleanup_timestamps(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
completion_start_time=completion_start_time,
|
||||
)
|
||||
)
|
||||
# clean up litellm hidden params
|
||||
clean_hidden_params = StandardLoggingHiddenParams(
|
||||
model_id=None,
|
||||
|
@ -2723,7 +2728,9 @@ def get_standard_logging_object_payload( # noqa: PLR0915
|
|||
}
|
||||
)
|
||||
# clean up litellm metadata
|
||||
clean_metadata = get_standard_logging_metadata(metadata=metadata)
|
||||
clean_metadata = StandardLoggingPayloadSetup.get_standard_logging_metadata(
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
if litellm.cache is not None:
|
||||
cache_key = litellm.cache.get_cache_key(**kwargs)
|
||||
|
@ -2745,58 +2752,21 @@ def get_standard_logging_object_payload( # noqa: PLR0915
|
|||
## Get model cost information ##
|
||||
base_model = _get_base_model_from_metadata(model_call_details=kwargs)
|
||||
custom_pricing = use_custom_pricing_for_model(litellm_params=litellm_params)
|
||||
model_cost_name = _select_model_name_for_cost_calc(
|
||||
model=None,
|
||||
completion_response=init_response_obj, # type: ignore
|
||||
model_cost_information = StandardLoggingPayloadSetup.get_model_cost_information(
|
||||
base_model=base_model,
|
||||
custom_pricing=custom_pricing,
|
||||
custom_llm_provider=kwargs.get("custom_llm_provider"),
|
||||
init_response_obj=init_response_obj,
|
||||
)
|
||||
if model_cost_name is None:
|
||||
model_cost_information = StandardLoggingModelInformation(
|
||||
model_map_key="", model_map_value=None
|
||||
)
|
||||
else:
|
||||
custom_llm_provider = kwargs.get("custom_llm_provider", None)
|
||||
|
||||
try:
|
||||
_model_cost_information = litellm.get_model_info(
|
||||
model=model_cost_name, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
model_cost_information = StandardLoggingModelInformation(
|
||||
model_map_key=model_cost_name,
|
||||
model_map_value=_model_cost_information,
|
||||
)
|
||||
except Exception:
|
||||
verbose_logger.debug( # keep in debug otherwise it will trigger on every call
|
||||
"Model={} is not mapped in model cost map. Defaulting to None model_cost_information for standard_logging_payload".format(
|
||||
model_cost_name
|
||||
)
|
||||
)
|
||||
model_cost_information = StandardLoggingModelInformation(
|
||||
model_map_key=model_cost_name, model_map_value=None
|
||||
)
|
||||
|
||||
response_cost: float = kwargs.get("response_cost", 0) or 0.0
|
||||
|
||||
if response_obj is not None:
|
||||
final_response_obj: Optional[Union[dict, str, list]] = response_obj
|
||||
elif isinstance(init_response_obj, list) or isinstance(init_response_obj, str):
|
||||
final_response_obj = init_response_obj
|
||||
else:
|
||||
final_response_obj = None
|
||||
|
||||
modified_final_response_obj = redact_message_input_output_from_logging(
|
||||
model_call_details=kwargs,
|
||||
result=final_response_obj,
|
||||
## get final response object ##
|
||||
final_response_obj = StandardLoggingPayloadSetup.get_final_response_obj(
|
||||
response_obj=response_obj,
|
||||
init_response_obj=init_response_obj,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
if modified_final_response_obj is not None and isinstance(
|
||||
modified_final_response_obj, BaseModel
|
||||
):
|
||||
final_response_obj = modified_final_response_obj.model_dump()
|
||||
else:
|
||||
final_response_obj = modified_final_response_obj
|
||||
|
||||
payload: StandardLoggingPayload = StandardLoggingPayload(
|
||||
id=str(id),
|
||||
call_type=call_type or "",
|
||||
|
@ -2810,9 +2780,9 @@ def get_standard_logging_object_payload( # noqa: PLR0915
|
|||
metadata=clean_metadata,
|
||||
cache_key=cache_key,
|
||||
response_cost=response_cost,
|
||||
total_tokens=usage.get("total_tokens", 0),
|
||||
prompt_tokens=usage.get("prompt_tokens", 0),
|
||||
completion_tokens=usage.get("completion_tokens", 0),
|
||||
total_tokens=usage.total_tokens,
|
||||
prompt_tokens=usage.prompt_tokens,
|
||||
completion_tokens=usage.completion_tokens,
|
||||
request_tags=request_tags,
|
||||
end_user=end_user_id or "",
|
||||
api_base=litellm_params.get("api_base", ""),
|
||||
|
@ -2859,6 +2829,7 @@ def get_standard_logging_metadata(
|
|||
user_api_key_hash=None,
|
||||
user_api_key_alias=None,
|
||||
user_api_key_team_id=None,
|
||||
user_api_key_org_id=None,
|
||||
user_api_key_user_id=None,
|
||||
user_api_key_team_alias=None,
|
||||
spend_logs_metadata=None,
|
||||
|
|
|
@ -0,0 +1,508 @@
|
|||
import asyncio
|
||||
import json
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Dict, Iterable, List, Literal, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.types.utils import (
|
||||
ChatCompletionDeltaToolCall,
|
||||
ChatCompletionMessageToolCall,
|
||||
Choices,
|
||||
Delta,
|
||||
EmbeddingResponse,
|
||||
Function,
|
||||
ImageResponse,
|
||||
Message,
|
||||
ModelResponse,
|
||||
RerankResponse,
|
||||
StreamingChoices,
|
||||
TranscriptionResponse,
|
||||
Usage,
|
||||
)
|
||||
|
||||
from .get_headers import get_response_headers
|
||||
|
||||
|
||||
async def convert_to_streaming_response_async(response_object: Optional[dict] = None):
|
||||
"""
|
||||
Asynchronously converts a response object to a streaming response.
|
||||
|
||||
Args:
|
||||
response_object (Optional[dict]): The response object to be converted. Defaults to None.
|
||||
|
||||
Raises:
|
||||
Exception: If the response object is None.
|
||||
|
||||
Yields:
|
||||
ModelResponse: The converted streaming response object.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
model_response_object = ModelResponse(stream=True)
|
||||
|
||||
if model_response_object is None:
|
||||
raise Exception("Error in response creating model response object")
|
||||
|
||||
choice_list = []
|
||||
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
if (
|
||||
choice["message"].get("tool_calls", None) is not None
|
||||
and isinstance(choice["message"]["tool_calls"], list)
|
||||
and len(choice["message"]["tool_calls"]) > 0
|
||||
and isinstance(choice["message"]["tool_calls"][0], dict)
|
||||
):
|
||||
pydantic_tool_calls = []
|
||||
for index, t in enumerate(choice["message"]["tool_calls"]):
|
||||
if "index" not in t:
|
||||
t["index"] = index
|
||||
pydantic_tool_calls.append(ChatCompletionDeltaToolCall(**t))
|
||||
choice["message"]["tool_calls"] = pydantic_tool_calls
|
||||
delta = Delta(
|
||||
content=choice["message"].get("content", None),
|
||||
role=choice["message"]["role"],
|
||||
function_call=choice["message"].get("function_call", None),
|
||||
tool_calls=choice["message"].get("tool_calls", None),
|
||||
)
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
|
||||
if finish_reason is None:
|
||||
finish_reason = choice.get("finish_details")
|
||||
|
||||
logprobs = choice.get("logprobs", None)
|
||||
|
||||
choice = StreamingChoices(
|
||||
finish_reason=finish_reason, index=idx, delta=delta, logprobs=logprobs
|
||||
)
|
||||
choice_list.append(choice)
|
||||
|
||||
model_response_object.choices = choice_list
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
setattr(
|
||||
model_response_object,
|
||||
"usage",
|
||||
Usage(
|
||||
completion_tokens=response_object["usage"].get("completion_tokens", 0),
|
||||
prompt_tokens=response_object["usage"].get("prompt_tokens", 0),
|
||||
total_tokens=response_object["usage"].get("total_tokens", 0),
|
||||
),
|
||||
)
|
||||
|
||||
if "id" in response_object:
|
||||
model_response_object.id = response_object["id"]
|
||||
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"]
|
||||
|
||||
if "system_fingerprint" in response_object:
|
||||
model_response_object.system_fingerprint = response_object["system_fingerprint"]
|
||||
|
||||
if "model" in response_object:
|
||||
model_response_object.model = response_object["model"]
|
||||
|
||||
yield model_response_object
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def convert_to_streaming_response(response_object: Optional[dict] = None):
|
||||
# used for yielding Cache hits when stream == True
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
model_response_object = ModelResponse(stream=True)
|
||||
choice_list = []
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
delta = Delta(
|
||||
content=choice["message"].get("content", None),
|
||||
role=choice["message"]["role"],
|
||||
function_call=choice["message"].get("function_call", None),
|
||||
tool_calls=choice["message"].get("tool_calls", None),
|
||||
)
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
if finish_reason is None:
|
||||
# gpt-4 vision can return 'finish_reason' or 'finish_details'
|
||||
finish_reason = choice.get("finish_details")
|
||||
logprobs = choice.get("logprobs", None)
|
||||
enhancements = choice.get("enhancements", None)
|
||||
choice = StreamingChoices(
|
||||
finish_reason=finish_reason,
|
||||
index=idx,
|
||||
delta=delta,
|
||||
logprobs=logprobs,
|
||||
enhancements=enhancements,
|
||||
)
|
||||
|
||||
choice_list.append(choice)
|
||||
model_response_object.choices = choice_list
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
setattr(model_response_object, "usage", Usage())
|
||||
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
|
||||
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
|
||||
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
||||
|
||||
if "id" in response_object:
|
||||
model_response_object.id = response_object["id"]
|
||||
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"]
|
||||
|
||||
if "system_fingerprint" in response_object:
|
||||
model_response_object.system_fingerprint = response_object["system_fingerprint"]
|
||||
|
||||
if "model" in response_object:
|
||||
model_response_object.model = response_object["model"]
|
||||
yield model_response_object
|
||||
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
def _handle_invalid_parallel_tool_calls(
|
||||
tool_calls: List[ChatCompletionMessageToolCall],
|
||||
):
|
||||
"""
|
||||
Handle hallucinated parallel tool call from openai - https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
|
||||
|
||||
Code modified from: https://github.com/phdowling/openai_multi_tool_use_parallel_patch/blob/main/openai_multi_tool_use_parallel_patch.py
|
||||
"""
|
||||
|
||||
if tool_calls is None:
|
||||
return
|
||||
try:
|
||||
replacements: Dict[int, List[ChatCompletionMessageToolCall]] = defaultdict(list)
|
||||
for i, tool_call in enumerate(tool_calls):
|
||||
current_function = tool_call.function.name
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
if current_function == "multi_tool_use.parallel":
|
||||
verbose_logger.debug(
|
||||
"OpenAI did a weird pseudo-multi-tool-use call, fixing call structure.."
|
||||
)
|
||||
for _fake_i, _fake_tool_use in enumerate(function_args["tool_uses"]):
|
||||
_function_args = _fake_tool_use["parameters"]
|
||||
_current_function = _fake_tool_use["recipient_name"]
|
||||
if _current_function.startswith("functions."):
|
||||
_current_function = _current_function[len("functions.") :]
|
||||
|
||||
fixed_tc = ChatCompletionMessageToolCall(
|
||||
id=f"{tool_call.id}_{_fake_i}",
|
||||
type="function",
|
||||
function=Function(
|
||||
name=_current_function, arguments=json.dumps(_function_args)
|
||||
),
|
||||
)
|
||||
replacements[i].append(fixed_tc)
|
||||
|
||||
shift = 0
|
||||
for i, replacement in replacements.items():
|
||||
tool_calls[:] = (
|
||||
tool_calls[: i + shift] + replacement + tool_calls[i + shift + 1 :]
|
||||
)
|
||||
shift += len(replacement)
|
||||
|
||||
return tool_calls
|
||||
except json.JSONDecodeError:
|
||||
# if there is a JSONDecodeError, return the original tool_calls
|
||||
return tool_calls
|
||||
|
||||
|
||||
class LiteLLMResponseObjectHandler:
|
||||
|
||||
@staticmethod
|
||||
def convert_to_image_response(
|
||||
response_object: dict,
|
||||
model_response_object: Optional[ImageResponse] = None,
|
||||
hidden_params: Optional[dict] = None,
|
||||
) -> ImageResponse:
|
||||
|
||||
response_object.update({"hidden_params": hidden_params})
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = ImageResponse(**response_object)
|
||||
return model_response_object
|
||||
else:
|
||||
model_response_dict = model_response_object.model_dump()
|
||||
|
||||
model_response_dict.update(response_object)
|
||||
model_response_object = ImageResponse(**model_response_dict)
|
||||
return model_response_object
|
||||
|
||||
|
||||
def convert_to_model_response_object( # noqa: PLR0915
|
||||
response_object: Optional[dict] = None,
|
||||
model_response_object: Optional[
|
||||
Union[
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
TranscriptionResponse,
|
||||
RerankResponse,
|
||||
]
|
||||
] = None,
|
||||
response_type: Literal[
|
||||
"completion", "embedding", "image_generation", "audio_transcription", "rerank"
|
||||
] = "completion",
|
||||
stream=False,
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
hidden_params: Optional[dict] = None,
|
||||
_response_headers: Optional[dict] = None,
|
||||
convert_tool_call_to_json_mode: Optional[
|
||||
bool
|
||||
] = None, # used for supporting 'json_schema' on older models
|
||||
):
|
||||
received_args = locals()
|
||||
|
||||
additional_headers = get_response_headers(_response_headers)
|
||||
|
||||
if hidden_params is None:
|
||||
hidden_params = {}
|
||||
hidden_params["additional_headers"] = additional_headers
|
||||
|
||||
### CHECK IF ERROR IN RESPONSE ### - openrouter returns these in the dictionary
|
||||
if (
|
||||
response_object is not None
|
||||
and "error" in response_object
|
||||
and response_object["error"] is not None
|
||||
):
|
||||
error_args = {"status_code": 422, "message": "Error in response object"}
|
||||
if isinstance(response_object["error"], dict):
|
||||
if "code" in response_object["error"]:
|
||||
error_args["status_code"] = response_object["error"]["code"]
|
||||
if "message" in response_object["error"]:
|
||||
if isinstance(response_object["error"]["message"], dict):
|
||||
message_str = json.dumps(response_object["error"]["message"])
|
||||
else:
|
||||
message_str = str(response_object["error"]["message"])
|
||||
error_args["message"] = message_str
|
||||
raised_exception = Exception()
|
||||
setattr(raised_exception, "status_code", error_args["status_code"])
|
||||
setattr(raised_exception, "message", error_args["message"])
|
||||
raise raised_exception
|
||||
|
||||
try:
|
||||
if response_type == "completion" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, ModelResponse)
|
||||
):
|
||||
if response_object is None or model_response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
if stream is True:
|
||||
# for returning cached responses, we need to yield a generator
|
||||
return convert_to_streaming_response(response_object=response_object)
|
||||
choice_list = []
|
||||
|
||||
assert response_object["choices"] is not None and isinstance(
|
||||
response_object["choices"], Iterable
|
||||
)
|
||||
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
## HANDLE JSON MODE - anthropic returns single function call]
|
||||
tool_calls = choice["message"].get("tool_calls", None)
|
||||
if tool_calls is not None:
|
||||
_openai_tool_calls = []
|
||||
for _tc in tool_calls:
|
||||
_openai_tc = ChatCompletionMessageToolCall(**_tc)
|
||||
_openai_tool_calls.append(_openai_tc)
|
||||
fixed_tool_calls = _handle_invalid_parallel_tool_calls(
|
||||
_openai_tool_calls
|
||||
)
|
||||
|
||||
if fixed_tool_calls is not None:
|
||||
tool_calls = fixed_tool_calls
|
||||
|
||||
message: Optional[Message] = None
|
||||
finish_reason: Optional[str] = None
|
||||
if (
|
||||
convert_tool_call_to_json_mode
|
||||
and tool_calls is not None
|
||||
and len(tool_calls) == 1
|
||||
):
|
||||
# to support 'json_schema' logic on older models
|
||||
json_mode_content_str: Optional[str] = tool_calls[0][
|
||||
"function"
|
||||
].get("arguments")
|
||||
if json_mode_content_str is not None:
|
||||
message = litellm.Message(content=json_mode_content_str)
|
||||
finish_reason = "stop"
|
||||
if message is None:
|
||||
message = Message(
|
||||
content=choice["message"].get("content", None),
|
||||
role=choice["message"]["role"] or "assistant",
|
||||
function_call=choice["message"].get("function_call", None),
|
||||
tool_calls=tool_calls,
|
||||
audio=choice["message"].get("audio", None),
|
||||
)
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
if finish_reason is None:
|
||||
# gpt-4 vision can return 'finish_reason' or 'finish_details'
|
||||
finish_reason = choice.get("finish_details") or "stop"
|
||||
logprobs = choice.get("logprobs", None)
|
||||
enhancements = choice.get("enhancements", None)
|
||||
choice = Choices(
|
||||
finish_reason=finish_reason,
|
||||
index=idx,
|
||||
message=message,
|
||||
logprobs=logprobs,
|
||||
enhancements=enhancements,
|
||||
)
|
||||
choice_list.append(choice)
|
||||
model_response_object.choices = choice_list
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
usage_object = litellm.Usage(**response_object["usage"])
|
||||
setattr(model_response_object, "usage", usage_object)
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"] or int(
|
||||
time.time()
|
||||
)
|
||||
|
||||
if "id" in response_object:
|
||||
model_response_object.id = response_object["id"] or str(uuid.uuid4())
|
||||
|
||||
if "system_fingerprint" in response_object:
|
||||
model_response_object.system_fingerprint = response_object[
|
||||
"system_fingerprint"
|
||||
]
|
||||
|
||||
if "model" in response_object:
|
||||
if model_response_object.model is None:
|
||||
model_response_object.model = response_object["model"]
|
||||
elif (
|
||||
"/" in model_response_object.model
|
||||
and response_object["model"] is not None
|
||||
):
|
||||
openai_compatible_provider = model_response_object.model.split("/")[
|
||||
0
|
||||
]
|
||||
model_response_object.model = (
|
||||
openai_compatible_provider + "/" + response_object["model"]
|
||||
)
|
||||
|
||||
if start_time is not None and end_time is not None:
|
||||
if isinstance(start_time, type(end_time)):
|
||||
model_response_object._response_ms = ( # type: ignore
|
||||
end_time - start_time
|
||||
).total_seconds() * 1000
|
||||
|
||||
if hidden_params is not None:
|
||||
if model_response_object._hidden_params is None:
|
||||
model_response_object._hidden_params = {}
|
||||
model_response_object._hidden_params.update(hidden_params)
|
||||
|
||||
if _response_headers is not None:
|
||||
model_response_object._response_headers = _response_headers
|
||||
|
||||
special_keys = list(litellm.ModelResponse.model_fields.keys())
|
||||
special_keys.append("usage")
|
||||
for k, v in response_object.items():
|
||||
if k not in special_keys:
|
||||
setattr(model_response_object, k, v)
|
||||
|
||||
return model_response_object
|
||||
elif response_type == "embedding" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, EmbeddingResponse)
|
||||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = EmbeddingResponse()
|
||||
|
||||
if "model" in response_object:
|
||||
model_response_object.model = response_object["model"]
|
||||
|
||||
if "object" in response_object:
|
||||
model_response_object.object = response_object["object"]
|
||||
|
||||
model_response_object.data = response_object["data"]
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
|
||||
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
|
||||
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
||||
|
||||
if start_time is not None and end_time is not None:
|
||||
model_response_object._response_ms = ( # type: ignore
|
||||
end_time - start_time
|
||||
).total_seconds() * 1000 # return response latency in ms like openai
|
||||
|
||||
if hidden_params is not None:
|
||||
model_response_object._hidden_params = hidden_params
|
||||
|
||||
if _response_headers is not None:
|
||||
model_response_object._response_headers = _response_headers
|
||||
|
||||
return model_response_object
|
||||
elif response_type == "image_generation" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, ImageResponse)
|
||||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
return LiteLLMResponseObjectHandler.convert_to_image_response(
|
||||
response_object=response_object,
|
||||
model_response_object=model_response_object,
|
||||
hidden_params=hidden_params,
|
||||
)
|
||||
|
||||
elif response_type == "audio_transcription" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, TranscriptionResponse)
|
||||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = TranscriptionResponse()
|
||||
|
||||
if "text" in response_object:
|
||||
model_response_object.text = response_object["text"]
|
||||
|
||||
optional_keys = ["language", "task", "duration", "words", "segments"]
|
||||
for key in optional_keys: # not guaranteed to be in response
|
||||
if key in response_object:
|
||||
setattr(model_response_object, key, response_object[key])
|
||||
|
||||
if hidden_params is not None:
|
||||
model_response_object._hidden_params = hidden_params
|
||||
|
||||
if _response_headers is not None:
|
||||
model_response_object._response_headers = _response_headers
|
||||
|
||||
return model_response_object
|
||||
elif response_type == "rerank" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, RerankResponse)
|
||||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = RerankResponse(**response_object)
|
||||
return model_response_object
|
||||
|
||||
if "id" in response_object:
|
||||
model_response_object.id = response_object["id"]
|
||||
|
||||
if "meta" in response_object:
|
||||
model_response_object.meta = response_object["meta"]
|
||||
|
||||
if "results" in response_object:
|
||||
model_response_object.results = response_object["results"]
|
||||
|
||||
return model_response_object
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f"Invalid response object {traceback.format_exc()}\n\nreceived_args={received_args}"
|
||||
)
|
|
@ -15,27 +15,28 @@ def get_response_headers(_response_headers: Optional[dict] = None) -> dict:
|
|||
dict: _response_headers with OpenAI headers and llm_provider-{header}
|
||||
|
||||
"""
|
||||
if _response_headers is not None:
|
||||
openai_headers = {}
|
||||
if "x-ratelimit-limit-requests" in _response_headers:
|
||||
openai_headers["x-ratelimit-limit-requests"] = _response_headers[
|
||||
"x-ratelimit-limit-requests"
|
||||
]
|
||||
if "x-ratelimit-remaining-requests" in _response_headers:
|
||||
openai_headers["x-ratelimit-remaining-requests"] = _response_headers[
|
||||
"x-ratelimit-remaining-requests"
|
||||
]
|
||||
if "x-ratelimit-limit-tokens" in _response_headers:
|
||||
openai_headers["x-ratelimit-limit-tokens"] = _response_headers[
|
||||
"x-ratelimit-limit-tokens"
|
||||
]
|
||||
if "x-ratelimit-remaining-tokens" in _response_headers:
|
||||
openai_headers["x-ratelimit-remaining-tokens"] = _response_headers[
|
||||
"x-ratelimit-remaining-tokens"
|
||||
]
|
||||
llm_provider_headers = _get_llm_provider_headers(_response_headers)
|
||||
return {**llm_provider_headers, **openai_headers}
|
||||
return {}
|
||||
if _response_headers is None:
|
||||
return {}
|
||||
|
||||
openai_headers = {}
|
||||
if "x-ratelimit-limit-requests" in _response_headers:
|
||||
openai_headers["x-ratelimit-limit-requests"] = _response_headers[
|
||||
"x-ratelimit-limit-requests"
|
||||
]
|
||||
if "x-ratelimit-remaining-requests" in _response_headers:
|
||||
openai_headers["x-ratelimit-remaining-requests"] = _response_headers[
|
||||
"x-ratelimit-remaining-requests"
|
||||
]
|
||||
if "x-ratelimit-limit-tokens" in _response_headers:
|
||||
openai_headers["x-ratelimit-limit-tokens"] = _response_headers[
|
||||
"x-ratelimit-limit-tokens"
|
||||
]
|
||||
if "x-ratelimit-remaining-tokens" in _response_headers:
|
||||
openai_headers["x-ratelimit-remaining-tokens"] = _response_headers[
|
||||
"x-ratelimit-remaining-tokens"
|
||||
]
|
||||
llm_provider_headers = _get_llm_provider_headers(_response_headers)
|
||||
return {**llm_provider_headers, **openai_headers}
|
||||
|
||||
|
||||
def _get_llm_provider_headers(response_headers: dict) -> dict:
|
||||
|
|
|
@ -26,15 +26,24 @@ async with websockets.connect( # type: ignore
|
|||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import json
|
||||
import traceback
|
||||
from asyncio import Task
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import litellm
|
||||
|
||||
from .litellm_logging import Logging as LiteLLMLogging
|
||||
|
||||
# Create a thread pool with a maximum of 10 threads
|
||||
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
||||
|
||||
DefaultLoggedRealTimeEventTypes = [
|
||||
"session.created",
|
||||
"response.create",
|
||||
"response.done",
|
||||
]
|
||||
|
||||
|
||||
class RealTimeStreaming:
|
||||
def __init__(
|
||||
|
@ -49,9 +58,27 @@ class RealTimeStreaming:
|
|||
self.messages: List = []
|
||||
self.input_message: Dict = {}
|
||||
|
||||
_logged_real_time_event_types = litellm.logged_real_time_event_types
|
||||
|
||||
if _logged_real_time_event_types is None:
|
||||
_logged_real_time_event_types = DefaultLoggedRealTimeEventTypes
|
||||
self.logged_real_time_event_types = _logged_real_time_event_types
|
||||
|
||||
def _should_store_message(self, message: Union[str, bytes]) -> bool:
|
||||
if isinstance(message, bytes):
|
||||
message = message.decode("utf-8")
|
||||
message_obj = json.loads(message)
|
||||
_msg_type = message_obj["type"]
|
||||
if self.logged_real_time_event_types == "*":
|
||||
return True
|
||||
if _msg_type in self.logged_real_time_event_types:
|
||||
return True
|
||||
return False
|
||||
|
||||
def store_message(self, message: Union[str, bytes]):
|
||||
"""Store message in list"""
|
||||
self.messages.append(message)
|
||||
if self._should_store_message(message):
|
||||
self.messages.append(message)
|
||||
|
||||
def store_input(self, message: dict):
|
||||
"""Store input message"""
|
||||
|
|
|
@ -198,9 +198,6 @@ class AzureOpenAIConfig:
|
|||
optional_params["json_mode"] = True
|
||||
else:
|
||||
optional_params["response_format"] = value
|
||||
elif param == "max_completion_tokens":
|
||||
# TODO - Azure OpenAI will probably add support for this, we should pass it through when Azure adds support
|
||||
optional_params["max_tokens"] = value
|
||||
elif param in supported_openai_params:
|
||||
optional_params[param] = value
|
||||
|
||||
|
|
|
@ -72,5 +72,5 @@ class AzureOpenAIRealtime(AzureChatCompletion):
|
|||
|
||||
except websockets.exceptions.InvalidStatusCode as e: # type: ignore
|
||||
await websocket.close(code=e.status_code, reason=str(e))
|
||||
except Exception as e:
|
||||
await websocket.close(code=1011, reason=f"Internal server error: {str(e)}")
|
||||
except Exception:
|
||||
pass
|
||||
|
|
|
@ -1349,7 +1349,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
if aimg_generation is True:
|
||||
return self.aimage_generation(data=data, prompt=prompt, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore
|
||||
|
||||
openai_client = self._get_openai_client(
|
||||
openai_client: OpenAI = self._get_openai_client( # type: ignore
|
||||
is_async=False,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1371,8 +1371,9 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
)
|
||||
|
||||
## COMPLETION CALL
|
||||
response = openai_client.images.generate(**data, timeout=timeout) # type: ignore
|
||||
response = response.model_dump() # type: ignore
|
||||
_response = openai_client.images.generate(**data, timeout=timeout) # type: ignore
|
||||
|
||||
response = _response.model_dump()
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
|
@ -1380,7 +1381,6 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
additional_args={"complete_input_dict": data},
|
||||
original_response=response,
|
||||
)
|
||||
# return response
|
||||
return convert_to_model_response_object(response_object=response, model_response_object=model_response, response_type="image_generation") # type: ignore
|
||||
except OpenAIError as e:
|
||||
|
||||
|
|
|
@ -398,6 +398,8 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
error_response = getattr(e, "response", None)
|
||||
if error_headers is None and error_response:
|
||||
error_headers = getattr(error_response, "headers", None)
|
||||
if error_response and hasattr(error_response, "text"):
|
||||
error_text = getattr(error_response, "text", error_text)
|
||||
raise AnthropicError(
|
||||
message=error_text,
|
||||
status_code=status_code,
|
||||
|
|
|
@ -9,7 +9,7 @@ import httpx
|
|||
from openai import OpenAI
|
||||
|
||||
import litellm
|
||||
from litellm.llms.cohere.embed import embedding as cohere_embedding
|
||||
from litellm.llms.cohere.embed.handler import embedding as cohere_embedding
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
|
|
|
@ -19,6 +19,7 @@ from ..common_utils import BedrockError
|
|||
from .invoke_handler import AWSEventStreamDecoder, MockResponseIterator, make_call
|
||||
|
||||
BEDROCK_CONVERSE_MODELS = [
|
||||
"anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
"anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
"anthropic.claude-3-opus-20240229-v1:0",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
|
|
|
@ -7,6 +7,7 @@ Why separate file? Make it easy to see how transformation works
|
|||
from typing import List
|
||||
|
||||
import litellm
|
||||
from litellm.llms.cohere.embed.transformation import CohereEmbeddingConfig
|
||||
from litellm.types.llms.bedrock import CohereEmbeddingRequest, CohereEmbeddingResponse
|
||||
from litellm.types.utils import Embedding, EmbeddingResponse
|
||||
|
||||
|
@ -26,15 +27,21 @@ class BedrockCohereEmbeddingConfig:
|
|||
optional_params["embedding_types"] = v
|
||||
return optional_params
|
||||
|
||||
def _is_v3_model(self, model: str) -> bool:
|
||||
return "3" in model
|
||||
|
||||
def _transform_request(
|
||||
self, input: List[str], inference_params: dict
|
||||
self, model: str, input: List[str], inference_params: dict
|
||||
) -> CohereEmbeddingRequest:
|
||||
transformed_request = CohereEmbeddingRequest(
|
||||
texts=input,
|
||||
input_type=litellm.COHERE_DEFAULT_EMBEDDING_INPUT_TYPE, # type: ignore
|
||||
transformed_request = CohereEmbeddingConfig()._transform_request(
|
||||
model, input, inference_params
|
||||
)
|
||||
|
||||
for k, v in inference_params.items():
|
||||
transformed_request[k] = v # type: ignore
|
||||
new_transformed_request = CohereEmbeddingRequest(
|
||||
input_type=transformed_request["input_type"],
|
||||
)
|
||||
for k in CohereEmbeddingRequest.__annotations__.keys():
|
||||
if k in transformed_request:
|
||||
new_transformed_request[k] = transformed_request[k] # type: ignore
|
||||
|
||||
return transformed_request
|
||||
return new_transformed_request
|
||||
|
|
|
@ -11,7 +11,7 @@ from typing import Any, Callable, List, Literal, Optional, Tuple, Union
|
|||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.llms.cohere.embed import embedding as cohere_embedding
|
||||
from litellm.llms.cohere.embed.handler import embedding as cohere_embedding
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
|
@ -369,7 +369,7 @@ class BedrockEmbedding(BaseAWSLLM):
|
|||
batch_data: Optional[List] = None
|
||||
if provider == "cohere":
|
||||
data = BedrockCohereEmbeddingConfig()._transform_request(
|
||||
input=input, inference_params=inference_params
|
||||
model=model, input=input, inference_params=inference_params
|
||||
)
|
||||
elif provider == "amazon" and model in [
|
||||
"amazon.titan-embed-image-v1",
|
||||
|
|
|
@ -12,8 +12,11 @@ import requests # type: ignore
|
|||
import litellm
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.types.llms.bedrock import CohereEmbeddingRequest
|
||||
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||
|
||||
from .transformation import CohereEmbeddingConfig
|
||||
|
||||
|
||||
def validate_environment(api_key, headers: dict):
|
||||
headers.update(
|
||||
|
@ -41,39 +44,9 @@ class CohereError(Exception):
|
|||
) # Call the base class constructor with the parameters it needs
|
||||
|
||||
|
||||
def _process_embedding_response(
|
||||
embeddings: list,
|
||||
model_response: litellm.EmbeddingResponse,
|
||||
model: str,
|
||||
encoding: Any,
|
||||
input: list,
|
||||
) -> litellm.EmbeddingResponse:
|
||||
output_data = []
|
||||
for idx, embedding in enumerate(embeddings):
|
||||
output_data.append(
|
||||
{"object": "embedding", "index": idx, "embedding": embedding}
|
||||
)
|
||||
model_response.object = "list"
|
||||
model_response.data = output_data
|
||||
model_response.model = model
|
||||
input_tokens = 0
|
||||
for text in input:
|
||||
input_tokens += len(encoding.encode(text))
|
||||
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
),
|
||||
)
|
||||
|
||||
return model_response
|
||||
|
||||
|
||||
async def async_embedding(
|
||||
model: str,
|
||||
data: dict,
|
||||
data: Union[dict, CohereEmbeddingRequest],
|
||||
input: list,
|
||||
model_response: litellm.utils.EmbeddingResponse,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
|
@ -121,19 +94,12 @@ async def async_embedding(
|
|||
)
|
||||
raise e
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=response.text,
|
||||
)
|
||||
|
||||
embeddings = response.json()["embeddings"]
|
||||
|
||||
## PROCESS RESPONSE ##
|
||||
return _process_embedding_response(
|
||||
embeddings=embeddings,
|
||||
return CohereEmbeddingConfig()._transform_response(
|
||||
response=response,
|
||||
api_key=api_key,
|
||||
logging_obj=logging_obj,
|
||||
data=data,
|
||||
model_response=model_response,
|
||||
model=model,
|
||||
encoding=encoding,
|
||||
|
@ -149,7 +115,7 @@ def embedding(
|
|||
optional_params: dict,
|
||||
headers: dict,
|
||||
encoding: Any,
|
||||
data: Optional[dict] = None,
|
||||
data: Optional[Union[dict, CohereEmbeddingRequest]] = None,
|
||||
complete_api_base: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
aembedding: Optional[bool] = None,
|
||||
|
@ -159,11 +125,10 @@ def embedding(
|
|||
headers = validate_environment(api_key, headers=headers)
|
||||
embed_url = complete_api_base or "https://api.cohere.ai/v1/embed"
|
||||
model = model
|
||||
data = data or {"model": model, "texts": input, **optional_params}
|
||||
|
||||
if "3" in model and "input_type" not in data:
|
||||
# cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document"
|
||||
data["input_type"] = "search_document"
|
||||
data = data or CohereEmbeddingConfig()._transform_request(
|
||||
model=model, input=input, inference_params=optional_params
|
||||
)
|
||||
|
||||
## ROUTING
|
||||
if aembedding is True:
|
||||
|
@ -193,30 +158,12 @@ def embedding(
|
|||
client = HTTPHandler(concurrent_limit=1)
|
||||
|
||||
response = client.post(embed_url, headers=headers, data=json.dumps(data))
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=response,
|
||||
)
|
||||
"""
|
||||
response
|
||||
{
|
||||
'object': "list",
|
||||
'data': [
|
||||
|
||||
]
|
||||
'model',
|
||||
'usage'
|
||||
}
|
||||
"""
|
||||
if response.status_code != 200:
|
||||
raise CohereError(message=response.text, status_code=response.status_code)
|
||||
embeddings = response.json()["embeddings"]
|
||||
|
||||
return _process_embedding_response(
|
||||
embeddings=embeddings,
|
||||
return CohereEmbeddingConfig()._transform_response(
|
||||
response=response,
|
||||
api_key=api_key,
|
||||
logging_obj=logging_obj,
|
||||
data=data,
|
||||
model_response=model_response,
|
||||
model=model,
|
||||
encoding=encoding,
|
160
litellm/llms/cohere/embed/transformation.py
Normal file
160
litellm/llms/cohere/embed/transformation.py
Normal file
|
@ -0,0 +1,160 @@
|
|||
"""
|
||||
Transformation logic from OpenAI /v1/embeddings format to Cohere's /v1/embed format.
|
||||
|
||||
Why separate file? Make it easy to see how transformation works
|
||||
|
||||
Convers
|
||||
- v3 embedding models
|
||||
- v2 embedding models
|
||||
|
||||
Docs - https://docs.cohere.com/v2/reference/embed
|
||||
"""
|
||||
|
||||
import types
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm import COHERE_DEFAULT_EMBEDDING_INPUT_TYPE
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.types.llms.bedrock import (
|
||||
COHERE_EMBEDDING_INPUT_TYPES,
|
||||
CohereEmbeddingRequest,
|
||||
CohereEmbeddingRequestWithModel,
|
||||
)
|
||||
from litellm.types.utils import (
|
||||
Embedding,
|
||||
EmbeddingResponse,
|
||||
PromptTokensDetailsWrapper,
|
||||
Usage,
|
||||
)
|
||||
from litellm.utils import is_base64_encoded
|
||||
|
||||
|
||||
class CohereEmbeddingConfig:
|
||||
"""
|
||||
Reference: https://docs.cohere.com/v2/reference/embed
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def get_supported_openai_params(self) -> List[str]:
|
||||
return ["encoding_format"]
|
||||
|
||||
def map_openai_params(
|
||||
self, non_default_params: dict, optional_params: dict
|
||||
) -> dict:
|
||||
for k, v in non_default_params.items():
|
||||
if k == "encoding_format":
|
||||
optional_params["embedding_types"] = v
|
||||
return optional_params
|
||||
|
||||
def _is_v3_model(self, model: str) -> bool:
|
||||
return "3" in model
|
||||
|
||||
def _transform_request(
|
||||
self, model: str, input: List[str], inference_params: dict
|
||||
) -> CohereEmbeddingRequestWithModel:
|
||||
is_encoded = False
|
||||
for input_str in input:
|
||||
is_encoded = is_base64_encoded(input_str)
|
||||
|
||||
if is_encoded: # check if string is b64 encoded image or not
|
||||
transformed_request = CohereEmbeddingRequestWithModel(
|
||||
model=model,
|
||||
images=input,
|
||||
input_type="image",
|
||||
)
|
||||
else:
|
||||
transformed_request = CohereEmbeddingRequestWithModel(
|
||||
model=model,
|
||||
texts=input,
|
||||
input_type=COHERE_DEFAULT_EMBEDDING_INPUT_TYPE,
|
||||
)
|
||||
|
||||
for k, v in inference_params.items():
|
||||
transformed_request[k] = v # type: ignore
|
||||
|
||||
return transformed_request
|
||||
|
||||
def _calculate_usage(self, input: List[str], encoding: Any, meta: dict) -> Usage:
|
||||
|
||||
input_tokens = 0
|
||||
|
||||
text_tokens: Optional[int] = meta.get("billed_units", {}).get("input_tokens")
|
||||
|
||||
image_tokens: Optional[int] = meta.get("billed_units", {}).get("images")
|
||||
|
||||
prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
|
||||
if image_tokens is None and text_tokens is None:
|
||||
for text in input:
|
||||
input_tokens += len(encoding.encode(text))
|
||||
else:
|
||||
prompt_tokens_details = PromptTokensDetailsWrapper(
|
||||
image_tokens=image_tokens,
|
||||
text_tokens=text_tokens,
|
||||
)
|
||||
if image_tokens:
|
||||
input_tokens += image_tokens
|
||||
if text_tokens:
|
||||
input_tokens += text_tokens
|
||||
|
||||
return Usage(
|
||||
prompt_tokens=input_tokens,
|
||||
completion_tokens=0,
|
||||
total_tokens=input_tokens,
|
||||
prompt_tokens_details=prompt_tokens_details,
|
||||
)
|
||||
|
||||
def _transform_response(
|
||||
self,
|
||||
response: httpx.Response,
|
||||
api_key: Optional[str],
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
data: Union[dict, CohereEmbeddingRequest],
|
||||
model_response: EmbeddingResponse,
|
||||
model: str,
|
||||
encoding: Any,
|
||||
input: list,
|
||||
) -> EmbeddingResponse:
|
||||
|
||||
response_json = response.json()
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=api_key,
|
||||
additional_args={"complete_input_dict": data},
|
||||
original_response=response_json,
|
||||
)
|
||||
"""
|
||||
response
|
||||
{
|
||||
'object': "list",
|
||||
'data': [
|
||||
|
||||
]
|
||||
'model',
|
||||
'usage'
|
||||
}
|
||||
"""
|
||||
embeddings = response_json["embeddings"]
|
||||
output_data = []
|
||||
for idx, embedding in enumerate(embeddings):
|
||||
output_data.append(
|
||||
{"object": "embedding", "index": idx, "embedding": embedding}
|
||||
)
|
||||
model_response.object = "list"
|
||||
model_response.data = output_data
|
||||
model_response.model = model
|
||||
input_tokens = 0
|
||||
for text in input:
|
||||
input_tokens += len(encoding.encode(text))
|
||||
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
self._calculate_usage(input, encoding, response_json.get("meta", {})),
|
||||
)
|
||||
|
||||
return model_response
|
|
@ -1,7 +1,7 @@
|
|||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
|
||||
|
||||
import httpx
|
||||
from httpx import USE_CLIENT_DEFAULT
|
||||
|
@ -32,15 +32,20 @@ class AsyncHTTPHandler:
|
|||
def __init__(
|
||||
self,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]] = None,
|
||||
concurrent_limit=1000,
|
||||
):
|
||||
self.timeout = timeout
|
||||
self.event_hooks = event_hooks
|
||||
self.client = self.create_client(
|
||||
timeout=timeout, concurrent_limit=concurrent_limit
|
||||
timeout=timeout, concurrent_limit=concurrent_limit, event_hooks=event_hooks
|
||||
)
|
||||
|
||||
def create_client(
|
||||
self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
|
||||
self,
|
||||
timeout: Optional[Union[float, httpx.Timeout]],
|
||||
concurrent_limit: int,
|
||||
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
|
||||
) -> httpx.AsyncClient:
|
||||
|
||||
# SSL certificates (a.k.a CA bundle) used to verify the identity of requested hosts.
|
||||
|
@ -55,6 +60,7 @@ class AsyncHTTPHandler:
|
|||
# Create a client with a connection pool
|
||||
|
||||
return httpx.AsyncClient(
|
||||
event_hooks=event_hooks,
|
||||
timeout=timeout,
|
||||
limits=httpx.Limits(
|
||||
max_connections=concurrent_limit,
|
||||
|
@ -114,7 +120,9 @@ class AsyncHTTPHandler:
|
|||
return response
|
||||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||||
# Retry the request with a new session if there is a connection error
|
||||
new_client = self.create_client(timeout=timeout, concurrent_limit=1)
|
||||
new_client = self.create_client(
|
||||
timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
|
||||
)
|
||||
try:
|
||||
return await self.single_connection_post_request(
|
||||
url=url,
|
||||
|
@ -144,8 +152,10 @@ class AsyncHTTPHandler:
|
|||
setattr(e, "status_code", e.response.status_code)
|
||||
if stream is True:
|
||||
setattr(e, "message", await e.response.aread())
|
||||
setattr(e, "text", await e.response.aread())
|
||||
else:
|
||||
setattr(e, "message", e.response.text)
|
||||
setattr(e, "text", e.response.text)
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
@ -172,7 +182,9 @@ class AsyncHTTPHandler:
|
|||
return response
|
||||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||||
# Retry the request with a new session if there is a connection error
|
||||
new_client = self.create_client(timeout=timeout, concurrent_limit=1)
|
||||
new_client = self.create_client(
|
||||
timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
|
||||
)
|
||||
try:
|
||||
return await self.single_connection_post_request(
|
||||
url=url,
|
||||
|
@ -229,7 +241,9 @@ class AsyncHTTPHandler:
|
|||
return response
|
||||
except (httpx.RemoteProtocolError, httpx.ConnectError):
|
||||
# Retry the request with a new session if there is a connection error
|
||||
new_client = self.create_client(timeout=timeout, concurrent_limit=1)
|
||||
new_client = self.create_client(
|
||||
timeout=timeout, concurrent_limit=1, event_hooks=self.event_hooks
|
||||
)
|
||||
try:
|
||||
return await self.single_connection_post_request(
|
||||
url=url,
|
||||
|
|
|
@ -398,6 +398,7 @@ def ollama_completion_stream(url, data, logging_obj):
|
|||
isinstance(content_chunk, StreamingChoices)
|
||||
and hasattr(content_chunk, "delta")
|
||||
and hasattr(content_chunk.delta, "content")
|
||||
and content_chunk.delta.content is not None
|
||||
):
|
||||
content_chunks.append(content_chunk.delta.content)
|
||||
response_content = "".join(content_chunks)
|
||||
|
|
|
@ -2429,6 +2429,15 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
|
|||
contents: List[BedrockMessageBlock] = []
|
||||
msg_i = 0
|
||||
|
||||
## BASE CASE ##
|
||||
if len(messages) == 0:
|
||||
raise litellm.BadRequestError(
|
||||
message=BAD_MESSAGE_ERROR_STR
|
||||
+ "bedrock requires at least one non-system message",
|
||||
model=model,
|
||||
llm_provider=llm_provider,
|
||||
)
|
||||
|
||||
# if initial message is assistant message
|
||||
if messages[0].get("role") is not None and messages[0]["role"] == "assistant":
|
||||
if user_continue_message is not None:
|
||||
|
|
|
@ -177,3 +177,16 @@ class VertexAIAnthropicConfig:
|
|||
optional_params["json_mode"] = True
|
||||
|
||||
return optional_params
|
||||
|
||||
@classmethod
|
||||
def is_supported_model(
|
||||
cls, model: str, custom_llm_provider: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the model is supported by the VertexAI Anthropic API.
|
||||
"""
|
||||
if custom_llm_provider == "vertex_ai" and "claude" in model.lower():
|
||||
return True
|
||||
elif model in litellm.vertex_anthropic_models:
|
||||
return True
|
||||
return False
|
||||
|
|
|
@ -113,7 +113,7 @@ from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM
|
|||
from .llms.bedrock.embed.embedding import BedrockEmbedding
|
||||
from .llms.cohere import chat as cohere_chat
|
||||
from .llms.cohere import completion as cohere_completion # type: ignore
|
||||
from .llms.cohere import embed as cohere_embed
|
||||
from .llms.cohere.embed import handler as cohere_embed
|
||||
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
||||
from .llms.databricks.chat import DatabricksChatCompletion
|
||||
from .llms.groq.chat.handler import GroqChatCompletion
|
||||
|
@ -4986,7 +4986,6 @@ def speech(
|
|||
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
|
||||
proxy_server_request = kwargs.get("proxy_server_request", None)
|
||||
model_info = kwargs.get("model_info", None)
|
||||
metadata = kwargs.get("metadata", {})
|
||||
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore
|
||||
kwargs.pop("tags", [])
|
||||
|
||||
|
|
|
@ -1104,7 +1104,7 @@
|
|||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat"
|
||||
},
|
||||
"azure_ai/Meta-Llama-31-8B-Instruct": {
|
||||
"azure_ai/Meta-Llama-3.1-8B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
|
@ -1114,7 +1114,7 @@
|
|||
"mode": "chat",
|
||||
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice"
|
||||
},
|
||||
"azure_ai/Meta-Llama-31-70B-Instruct": {
|
||||
"azure_ai/Meta-Llama-3.1-70B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
|
@ -1124,7 +1124,7 @@
|
|||
"mode": "chat",
|
||||
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice"
|
||||
},
|
||||
"azure_ai/Meta-Llama-31-405B-Instruct": {
|
||||
"azure_ai/Meta-Llama-3.1-405B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
|
@ -1751,6 +1751,22 @@
|
|||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"claude-3-5-sonnet-20241022": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
"cache_read_input_token_cost": 0.0000003,
|
||||
"litellm_provider": "anthropic",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"text-bison": {
|
||||
"max_tokens": 2048,
|
||||
"max_input_tokens": 8192,
|
||||
|
@ -2578,6 +2594,18 @@
|
|||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"vertex_ai/claude-3-5-sonnet-v2@20241022": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"vertex_ai/claude-3-haiku@20240307": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -3336,54 +3364,56 @@
|
|||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"embed-english-v3.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-english-light-v3.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 1024,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-multilingual-v3.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 1024,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-english-v2.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-english-light-v2.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 1024,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-multilingual-v2.0": {
|
||||
"max_tokens": 256,
|
||||
"max_input_tokens": 256,
|
||||
"max_tokens": 768,
|
||||
"max_input_tokens": 768,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-english-v3.0": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 1024,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"input_cost_per_image": 0.0001,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding",
|
||||
"supports_image_input": true
|
||||
},
|
||||
"replicate/meta/llama-2-13b": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
|
@ -3572,6 +3602,22 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 264
|
||||
},
|
||||
"anthropic/claude-3-5-sonnet-20241022": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
"cache_read_input_token_cost": 0.0000003,
|
||||
"litellm_provider": "anthropic",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"openrouter/anthropic/claude-3.5-sonnet": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -4093,6 +4139,18 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"amazon.titan-embed-image-v1": {
|
||||
"max_tokens": 128,
|
||||
"max_input_tokens": 128,
|
||||
"output_vector_size": 1024,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"input_cost_per_image": 0.00006,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "bedrock",
|
||||
"supports_image_input": true,
|
||||
"mode": "embedding",
|
||||
"source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1"
|
||||
},
|
||||
"mistral.mistral-7b-instruct-v0:2": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
|
@ -4246,6 +4304,17 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -4290,6 +4359,17 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -4334,6 +4414,17 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -6369,6 +6460,14 @@
|
|||
"litellm_provider": "voyage",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"voyage/voyage-finance-2": {
|
||||
"max_tokens": 4000,
|
||||
"max_input_tokens": 4000,
|
||||
"input_cost_per_token": 0.00000012,
|
||||
"output_cost_per_token": 0.000000,
|
||||
"litellm_provider": "voyage",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"databricks/databricks-meta-llama-3-1-405b-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
|
|
1
litellm/proxy/_experimental/out/404.html
Normal file
1
litellm/proxy/_experimental/out/404.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/00256a1984d35914.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68031,[\"665\",\"static/chunks/3014691f-b24e8254c7593934.js\",\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"902\",\"static/chunks/902-58bf23027703b2e8.js\",\"131\",\"static/chunks/131-3d2257b0ff5aadb2.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"626\",\"static/chunks/626-fc3969bfc35ead00.js\",\"777\",\"static/chunks/777-a81b45dec53652df.js\",\"931\",\"static/chunks/app/page-7c218fb97a2a9817.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/00256a1984d35914.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"Q5YcBgN0qLD3pcZcx1fRm\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_86ef86\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f593049e31b05aeb.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-8316d07d1f41e39f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/00256a1984d35914.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[68031,[\"665\",\"static/chunks/3014691f-b24e8254c7593934.js\",\"936\",\"static/chunks/2f6dbc85-cac2949a76539886.js\",\"902\",\"static/chunks/902-58bf23027703b2e8.js\",\"131\",\"static/chunks/131-3d2257b0ff5aadb2.js\",\"684\",\"static/chunks/684-16b194c83a169f6d.js\",\"626\",\"static/chunks/626-fc3969bfc35ead00.js\",\"777\",\"static/chunks/777-a81b45dec53652df.js\",\"931\",\"static/chunks/app/page-7b75dc53f1c6e449.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/00256a1984d35914.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"ffXp7j1jzMKpweBFKW_w2\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_86ef86\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[68031,["665","static/chunks/3014691f-b24e8254c7593934.js","936","static/chunks/2f6dbc85-cac2949a76539886.js","902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","684","static/chunks/684-16b194c83a169f6d.js","626","static/chunks/626-fc3969bfc35ead00.js","777","static/chunks/777-a81b45dec53652df.js","931","static/chunks/app/page-7c218fb97a2a9817.js"],""]
|
||||
3:I[68031,["665","static/chunks/3014691f-b24e8254c7593934.js","936","static/chunks/2f6dbc85-cac2949a76539886.js","902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","684","static/chunks/684-16b194c83a169f6d.js","626","static/chunks/626-fc3969bfc35ead00.js","777","static/chunks/777-a81b45dec53652df.js","931","static/chunks/app/page-7b75dc53f1c6e449.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["Q5YcBgN0qLD3pcZcx1fRm",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["ffXp7j1jzMKpweBFKW_w2",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -2,6 +2,6 @@
|
|||
3:I[87494,["902","static/chunks/902-58bf23027703b2e8.js","131","static/chunks/131-3d2257b0ff5aadb2.js","777","static/chunks/777-a81b45dec53652df.js","418","static/chunks/app/model_hub/page-8ed460f3f33c0bf2.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["Q5YcBgN0qLD3pcZcx1fRm",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["ffXp7j1jzMKpweBFKW_w2",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -2,6 +2,6 @@
|
|||
3:I[667,["665","static/chunks/3014691f-b24e8254c7593934.js","902","static/chunks/902-58bf23027703b2e8.js","684","static/chunks/684-16b194c83a169f6d.js","777","static/chunks/777-a81b45dec53652df.js","461","static/chunks/app/onboarding/page-cba59362096ed469.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["Q5YcBgN0qLD3pcZcx1fRm",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["ffXp7j1jzMKpweBFKW_w2",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_86ef86","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/00256a1984d35914.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,8 +1,50 @@
|
|||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["prometheus"]
|
||||
callbacks: ["prometheus", "otel"]
|
||||
|
||||
general_settings:
|
||||
user_api_key_cache_ttl: 3600
|
||||
|
||||
router_settings:
|
||||
routing_strategy: latency-based-routing
|
||||
routing_strategy_args:
|
||||
# only assign 40% of traffic to the fastest deployment to avoid overloading it
|
||||
lowest_latency_buffer: 0.4
|
||||
|
||||
# consider last five minutes of calls for latency calculation
|
||||
ttl: 300
|
||||
|
||||
# model_group_alias:
|
||||
# gpt-4o: gpt-4o-128k-2024-05-13
|
||||
# gpt-4o-mini: gpt-4o-mini-128k-2024-07-18
|
||||
|
||||
enable_tag_filtering: True
|
||||
|
||||
# retry call 3 times on each model_name (we don't use fallbacks, so this would be 3 times total)
|
||||
num_retries: 3
|
||||
|
||||
# -- cooldown settings --
|
||||
# see https://github.com/BerriAI/litellm/blob/main/litellm/router_utils/cooldown_handlers.py#L265
|
||||
|
||||
# cooldown model if it fails > n calls in a minute.
|
||||
allowed_fails: 2
|
||||
|
||||
# (in seconds) how long to cooldown model if fails/min > allowed_fails
|
||||
cooldown_time: 60
|
||||
|
||||
allowed_fails_policy:
|
||||
InternalServerErrorAllowedFails: 1
|
||||
RateLimitErrorAllowedFails: 2
|
||||
TimeoutErrorAllowedFails: 3
|
||||
# -- end cooldown settings --
|
||||
|
||||
# see https://docs.litellm.ai/docs/proxy/prod#3-use-redis-porthost-password-not-redis_url
|
||||
redis_host: os.environ/REDIS_HOST
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
redis_password: os.environ/REDIS_PASSWORD
|
||||
|
|
|
@ -104,7 +104,7 @@ class LitellmUserRoles(str, enum.Enum):
|
|||
return ui_labels.get(self.value, "")
|
||||
|
||||
|
||||
class LitellmTableNames(enum.Enum):
|
||||
class LitellmTableNames(str, enum.Enum):
|
||||
"""
|
||||
Enum for Table Names used by LiteLLM
|
||||
"""
|
||||
|
@ -340,6 +340,7 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/sso/get/ui_settings",
|
||||
"/login",
|
||||
"/key/generate",
|
||||
"/key/{token_id}/regenerate",
|
||||
"/key/update",
|
||||
"/key/info",
|
||||
"/key/delete",
|
||||
|
@ -1371,6 +1372,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
|
|||
blocked: Optional[bool] = None
|
||||
litellm_budget_table: Optional[dict] = None
|
||||
org_id: Optional[str] = None # org id for a given key
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ from litellm.proxy._types import (
|
|||
LitellmUserRoles,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.auth.route_checks import is_llm_api_route
|
||||
from litellm.proxy.auth.route_checks import RouteChecks
|
||||
from litellm.proxy.utils import PrismaClient, ProxyLogging, log_to_opentelemetry
|
||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||
|
||||
|
@ -138,7 +138,7 @@ def common_checks( # noqa: PLR0915
|
|||
general_settings.get("enforce_user_param", None) is not None
|
||||
and general_settings["enforce_user_param"] is True
|
||||
):
|
||||
if is_llm_api_route(route=route) and "user" not in request_body:
|
||||
if RouteChecks.is_llm_api_route(route=route) and "user" not in request_body:
|
||||
raise Exception(
|
||||
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||
)
|
||||
|
@ -154,7 +154,7 @@ def common_checks( # noqa: PLR0915
|
|||
+ CommonProxyErrors.not_premium_user.value
|
||||
)
|
||||
|
||||
if is_llm_api_route(route=route):
|
||||
if RouteChecks.is_llm_api_route(route=route):
|
||||
# loop through each enforced param
|
||||
# example enforced_params ['user', 'metadata', 'metadata.generation_name']
|
||||
for enforced_param in general_settings["enforced_params"]:
|
||||
|
@ -182,7 +182,7 @@ def common_checks( # noqa: PLR0915
|
|||
and global_proxy_spend is not None
|
||||
# only run global budget checks for OpenAI routes
|
||||
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
|
||||
and is_llm_api_route(route=route)
|
||||
and RouteChecks.is_llm_api_route(route=route)
|
||||
and route != "/v1/models"
|
||||
and route != "/models"
|
||||
):
|
||||
|
|
|
@ -17,175 +17,199 @@ from .auth_checks_organization import _user_is_org_admin
|
|||
from .auth_utils import _has_user_setup_sso
|
||||
|
||||
|
||||
def non_proxy_admin_allowed_routes_check(
|
||||
user_obj: Optional[LiteLLM_UserTable],
|
||||
_user_role: Optional[LitellmUserRoles],
|
||||
route: str,
|
||||
request: Request,
|
||||
valid_token: UserAPIKeyAuth,
|
||||
api_key: str,
|
||||
request_data: dict,
|
||||
):
|
||||
"""
|
||||
Checks if Non Proxy Admin User is allowed to access the route
|
||||
"""
|
||||
class RouteChecks:
|
||||
|
||||
# Check user has defined custom admin routes
|
||||
custom_admin_only_route_check(
|
||||
route=route,
|
||||
)
|
||||
|
||||
if is_llm_api_route(route=route):
|
||||
pass
|
||||
elif (
|
||||
route in LiteLLMRoutes.info_routes.value
|
||||
): # check if user allowed to call an info route
|
||||
if route == "/key/info":
|
||||
# check if user can access this route
|
||||
query_params = request.query_params
|
||||
key = query_params.get("key")
|
||||
if key is not None and hash_token(token=key) != api_key:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="user not allowed to access this key's info",
|
||||
)
|
||||
elif route == "/user/info":
|
||||
# check if user can access this route
|
||||
query_params = request.query_params
|
||||
user_id = query_params.get("user_id")
|
||||
verbose_proxy_logger.debug(
|
||||
f"user_id: {user_id} & valid_token.user_id: {valid_token.user_id}"
|
||||
)
|
||||
if user_id and user_id != valid_token.user_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="key not allowed to access this user's info. user_id={}, key's user_id={}".format(
|
||||
user_id, valid_token.user_id
|
||||
),
|
||||
)
|
||||
elif route == "/model/info":
|
||||
# /model/info just shows models user has access to
|
||||
pass
|
||||
elif route == "/team/info":
|
||||
pass # handled by function itself
|
||||
elif _has_user_setup_sso() and route in LiteLLMRoutes.sso_only_routes.value:
|
||||
pass
|
||||
elif (
|
||||
route in LiteLLMRoutes.global_spend_tracking_routes.value
|
||||
and getattr(valid_token, "permissions", None) is not None
|
||||
and "get_spend_routes" in getattr(valid_token, "permissions", [])
|
||||
@staticmethod
|
||||
def non_proxy_admin_allowed_routes_check(
|
||||
user_obj: Optional[LiteLLM_UserTable],
|
||||
_user_role: Optional[LitellmUserRoles],
|
||||
route: str,
|
||||
request: Request,
|
||||
valid_token: UserAPIKeyAuth,
|
||||
api_key: str,
|
||||
request_data: dict,
|
||||
):
|
||||
"""
|
||||
Checks if Non Proxy Admin User is allowed to access the route
|
||||
"""
|
||||
|
||||
pass
|
||||
elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
|
||||
if is_llm_api_route(route=route):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this OpenAI routes, role= {_user_role}",
|
||||
)
|
||||
if route in LiteLLMRoutes.management_routes.value:
|
||||
# the Admin Viewer is only allowed to call /user/update for their own user_id and can only update
|
||||
if route == "/user/update":
|
||||
|
||||
# Check the Request params are valid for PROXY_ADMIN_VIEW_ONLY
|
||||
if request_data is not None and isinstance(request_data, dict):
|
||||
_params_updated = request_data.keys()
|
||||
for param in _params_updated:
|
||||
if param not in ["user_email", "password"]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this route, role= {_user_role}. Trying to access: {route} and updating invalid param: {param}. only user_email and password can be updated",
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this route, role= {_user_role}. Trying to access: {route}",
|
||||
)
|
||||
|
||||
elif (
|
||||
_user_role == LitellmUserRoles.INTERNAL_USER.value
|
||||
and route in LiteLLMRoutes.internal_user_routes.value
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
_user_is_org_admin(request_data=request_data, user_object=user_obj)
|
||||
and route in LiteLLMRoutes.org_admin_allowed_routes.value
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
_user_role == LitellmUserRoles.INTERNAL_USER_VIEW_ONLY.value
|
||||
and route in LiteLLMRoutes.internal_user_view_only_routes.value
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
route in LiteLLMRoutes.self_managed_routes.value
|
||||
): # routes that manage their own allowed/disallowed logic
|
||||
pass
|
||||
else:
|
||||
user_role = "unknown"
|
||||
user_id = "unknown"
|
||||
if user_obj is not None:
|
||||
user_role = user_obj.user_role or "unknown"
|
||||
user_id = user_obj.user_id or "unknown"
|
||||
raise Exception(
|
||||
f"Only proxy admin can be used to generate, delete, update info for new keys/users/teams. Route={route}. Your role={user_role}. Your user_id={user_id}"
|
||||
# Check user has defined custom admin routes
|
||||
RouteChecks.custom_admin_only_route_check(
|
||||
route=route,
|
||||
)
|
||||
|
||||
if RouteChecks.is_llm_api_route(route=route):
|
||||
pass
|
||||
elif (
|
||||
route in LiteLLMRoutes.info_routes.value
|
||||
): # check if user allowed to call an info route
|
||||
if route == "/key/info":
|
||||
# check if user can access this route
|
||||
query_params = request.query_params
|
||||
key = query_params.get("key")
|
||||
if key is not None and hash_token(token=key) != api_key:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="user not allowed to access this key's info",
|
||||
)
|
||||
elif route == "/user/info":
|
||||
# check if user can access this route
|
||||
query_params = request.query_params
|
||||
user_id = query_params.get("user_id")
|
||||
verbose_proxy_logger.debug(
|
||||
f"user_id: {user_id} & valid_token.user_id: {valid_token.user_id}"
|
||||
)
|
||||
if user_id and user_id != valid_token.user_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="key not allowed to access this user's info. user_id={}, key's user_id={}".format(
|
||||
user_id, valid_token.user_id
|
||||
),
|
||||
)
|
||||
elif route == "/model/info":
|
||||
# /model/info just shows models user has access to
|
||||
pass
|
||||
elif route == "/team/info":
|
||||
pass # handled by function itself
|
||||
elif _has_user_setup_sso() and route in LiteLLMRoutes.sso_only_routes.value:
|
||||
pass
|
||||
elif (
|
||||
route in LiteLLMRoutes.global_spend_tracking_routes.value
|
||||
and getattr(valid_token, "permissions", None) is not None
|
||||
and "get_spend_routes" in getattr(valid_token, "permissions", [])
|
||||
):
|
||||
|
||||
def custom_admin_only_route_check(route: str):
|
||||
from litellm.proxy.proxy_server import general_settings, premium_user
|
||||
pass
|
||||
elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
|
||||
if RouteChecks.is_llm_api_route(route=route):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this OpenAI routes, role= {_user_role}",
|
||||
)
|
||||
if route in LiteLLMRoutes.management_routes.value:
|
||||
# the Admin Viewer is only allowed to call /user/update for their own user_id and can only update
|
||||
if route == "/user/update":
|
||||
|
||||
if "admin_only_routes" in general_settings:
|
||||
if premium_user is not True:
|
||||
verbose_proxy_logger.error(
|
||||
f"Trying to use 'admin_only_routes' this is an Enterprise only feature. {CommonProxyErrors.not_premium_user.value}"
|
||||
# Check the Request params are valid for PROXY_ADMIN_VIEW_ONLY
|
||||
if request_data is not None and isinstance(request_data, dict):
|
||||
_params_updated = request_data.keys()
|
||||
for param in _params_updated:
|
||||
if param not in ["user_email", "password"]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this route, role= {_user_role}. Trying to access: {route} and updating invalid param: {param}. only user_email and password can be updated",
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this route, role= {_user_role}. Trying to access: {route}",
|
||||
)
|
||||
|
||||
elif (
|
||||
_user_role == LitellmUserRoles.INTERNAL_USER.value
|
||||
and route in LiteLLMRoutes.internal_user_routes.value
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
_user_is_org_admin(request_data=request_data, user_object=user_obj)
|
||||
and route in LiteLLMRoutes.org_admin_allowed_routes.value
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
_user_role == LitellmUserRoles.INTERNAL_USER_VIEW_ONLY.value
|
||||
and route in LiteLLMRoutes.internal_user_view_only_routes.value
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
route in LiteLLMRoutes.self_managed_routes.value
|
||||
): # routes that manage their own allowed/disallowed logic
|
||||
pass
|
||||
else:
|
||||
user_role = "unknown"
|
||||
user_id = "unknown"
|
||||
if user_obj is not None:
|
||||
user_role = user_obj.user_role or "unknown"
|
||||
user_id = user_obj.user_id or "unknown"
|
||||
raise Exception(
|
||||
f"Only proxy admin can be used to generate, delete, update info for new keys/users/teams. Route={route}. Your role={user_role}. Your user_id={user_id}"
|
||||
)
|
||||
return
|
||||
if route in general_settings["admin_only_routes"]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this route. Route={route} is an admin only route",
|
||||
)
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def custom_admin_only_route_check(route: str):
|
||||
from litellm.proxy.proxy_server import general_settings, premium_user
|
||||
|
||||
if "admin_only_routes" in general_settings:
|
||||
if premium_user is not True:
|
||||
verbose_proxy_logger.error(
|
||||
f"Trying to use 'admin_only_routes' this is an Enterprise only feature. {CommonProxyErrors.not_premium_user.value}"
|
||||
)
|
||||
return
|
||||
if route in general_settings["admin_only_routes"]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"user not allowed to access this route. Route={route} is an admin only route",
|
||||
)
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def is_llm_api_route(route: str) -> bool:
|
||||
"""
|
||||
Helper to checks if provided route is an OpenAI route
|
||||
|
||||
|
||||
def is_llm_api_route(route: str) -> bool:
|
||||
"""
|
||||
Helper to checks if provided route is an OpenAI route
|
||||
Returns:
|
||||
- True: if route is an OpenAI route
|
||||
- False: if route is not an OpenAI route
|
||||
"""
|
||||
|
||||
if route in LiteLLMRoutes.openai_routes.value:
|
||||
return True
|
||||
|
||||
if route in LiteLLMRoutes.anthropic_routes.value:
|
||||
return True
|
||||
|
||||
# fuzzy match routes like "/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ"
|
||||
# Check for routes with placeholders
|
||||
for openai_route in LiteLLMRoutes.openai_routes.value:
|
||||
# Replace placeholders with regex pattern
|
||||
# placeholders are written as "/threads/{thread_id}"
|
||||
if "{" in openai_route:
|
||||
if RouteChecks._route_matches_pattern(
|
||||
route=route, pattern=openai_route
|
||||
):
|
||||
return True
|
||||
|
||||
# Pass through Bedrock, VertexAI, and Cohere Routes
|
||||
if "/bedrock/" in route:
|
||||
return True
|
||||
if "/vertex-ai/" in route:
|
||||
return True
|
||||
if "/gemini/" in route:
|
||||
return True
|
||||
if "/cohere/" in route:
|
||||
return True
|
||||
if "/langfuse/" in route:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _route_matches_pattern(route: str, pattern: str) -> bool:
|
||||
"""
|
||||
Check if route matches the pattern placed in proxy/_types.py
|
||||
|
||||
Example:
|
||||
- pattern: "/threads/{thread_id}"
|
||||
- route: "/threads/thread_49EIN5QF32s4mH20M7GFKdlZ"
|
||||
- returns: True
|
||||
|
||||
|
||||
Returns:
|
||||
- True: if route is an OpenAI route
|
||||
- False: if route is not an OpenAI route
|
||||
"""
|
||||
|
||||
if route in LiteLLMRoutes.openai_routes.value:
|
||||
return True
|
||||
|
||||
if route in LiteLLMRoutes.anthropic_routes.value:
|
||||
return True
|
||||
|
||||
# fuzzy match routes like "/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ"
|
||||
# Check for routes with placeholders
|
||||
for openai_route in LiteLLMRoutes.openai_routes.value:
|
||||
# Replace placeholders with regex pattern
|
||||
# placeholders are written as "/threads/{thread_id}"
|
||||
if "{" in openai_route:
|
||||
pattern = re.sub(r"\{[^}]+\}", r"[^/]+", openai_route)
|
||||
# Anchor the pattern to match the entire string
|
||||
pattern = f"^{pattern}$"
|
||||
if re.match(pattern, route):
|
||||
return True
|
||||
|
||||
# Pass through Bedrock, VertexAI, and Cohere Routes
|
||||
if "/bedrock/" in route:
|
||||
return True
|
||||
if "/vertex-ai/" in route:
|
||||
return True
|
||||
if "/gemini/" in route:
|
||||
return True
|
||||
if "/cohere/" in route:
|
||||
return True
|
||||
if "/langfuse/" in route:
|
||||
return True
|
||||
return False
|
||||
- pattern: "/key/{token_id}/regenerate"
|
||||
- route: "/key/regenerate/82akk800000000jjsk"
|
||||
- returns: False, pattern is "/key/{token_id}/regenerate"
|
||||
"""
|
||||
pattern = re.sub(r"\{[^}]+\}", r"[^/]+", pattern)
|
||||
# Anchor the pattern to match the entire string
|
||||
pattern = f"^{pattern}$"
|
||||
if re.match(pattern, route):
|
||||
return True
|
||||
return False
|
||||
|
|
|
@ -69,7 +69,7 @@ from litellm.proxy.auth.auth_utils import (
|
|||
)
|
||||
from litellm.proxy.auth.oauth2_check import check_oauth2_token
|
||||
from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
|
||||
from litellm.proxy.auth.route_checks import non_proxy_admin_allowed_routes_check
|
||||
from litellm.proxy.auth.route_checks import RouteChecks
|
||||
from litellm.proxy.auth.service_account_checks import service_account_checks
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from litellm.proxy.utils import _to_ns
|
||||
|
@ -122,6 +122,11 @@ def _is_ui_route_allowed(
|
|||
):
|
||||
# Do something if the current route starts with any of the allowed routes
|
||||
return True
|
||||
elif any(
|
||||
RouteChecks._route_matches_pattern(route=route, pattern=allowed_route)
|
||||
for allowed_route in allowed_routes
|
||||
):
|
||||
return True
|
||||
else:
|
||||
if user_obj is not None and _is_user_proxy_admin(user_obj=user_obj):
|
||||
return True
|
||||
|
@ -150,7 +155,7 @@ def _is_api_route_allowed(
|
|||
raise Exception("Invalid proxy server token passed")
|
||||
|
||||
if not _is_user_proxy_admin(user_obj=user_obj): # if non-admin
|
||||
non_proxy_admin_allowed_routes_check(
|
||||
RouteChecks.non_proxy_admin_allowed_routes_check(
|
||||
user_obj=user_obj,
|
||||
_user_role=_user_role,
|
||||
route=route,
|
||||
|
|
|
@ -120,7 +120,7 @@ async def health_services_endpoint( # noqa: PLR0915
|
|||
}
|
||||
|
||||
if service == "langfuse":
|
||||
from litellm.integrations.langfuse import LangFuseLogger
|
||||
from litellm.integrations.langfuse.langfuse import LangFuseLogger
|
||||
|
||||
langfuse_logger = LangFuseLogger()
|
||||
langfuse_logger.Langfuse.auth_check()
|
||||
|
@ -372,6 +372,11 @@ async def _db_health_readiness_check():
|
|||
return db_health_cache
|
||||
|
||||
|
||||
@router.get(
|
||||
"/settings",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@router.get(
|
||||
"/active/callbacks",
|
||||
tags=["health"],
|
||||
|
@ -379,8 +384,29 @@ async def _db_health_readiness_check():
|
|||
)
|
||||
async def active_callbacks():
|
||||
"""
|
||||
Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback
|
||||
Returns a list of litellm level settings
|
||||
|
||||
This is useful for debugging and ensuring the proxy server is configured correctly.
|
||||
|
||||
Response schema:
|
||||
```
|
||||
{
|
||||
"alerting": _alerting,
|
||||
"litellm.callbacks": litellm_callbacks,
|
||||
"litellm.input_callback": litellm_input_callbacks,
|
||||
"litellm.failure_callback": litellm_failure_callbacks,
|
||||
"litellm.success_callback": litellm_success_callbacks,
|
||||
"litellm._async_success_callback": litellm_async_success_callbacks,
|
||||
"litellm._async_failure_callback": litellm_async_failure_callbacks,
|
||||
"litellm._async_input_callback": litellm_async_input_callbacks,
|
||||
"all_litellm_callbacks": all_litellm_callbacks,
|
||||
"num_callbacks": len(all_litellm_callbacks),
|
||||
"num_alerting": _num_alerting,
|
||||
"litellm.request_timeout": litellm.request_timeout,
|
||||
}
|
||||
```
|
||||
"""
|
||||
|
||||
from litellm.proxy.proxy_server import general_settings, proxy_logging_obj
|
||||
|
||||
_alerting = str(general_settings.get("alerting"))
|
||||
|
@ -421,6 +447,7 @@ async def active_callbacks():
|
|||
"all_litellm_callbacks": all_litellm_callbacks,
|
||||
"num_callbacks": len(all_litellm_callbacks),
|
||||
"num_alerting": _num_alerting,
|
||||
"litellm.request_timeout": litellm.request_timeout,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,10 @@ from litellm.proxy._types import (
|
|||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.auth.auth_utils import get_request_route
|
||||
from litellm.types.utils import SupportedCacheControls
|
||||
from litellm.types.utils import (
|
||||
StandardLoggingUserAPIKeyMetadata,
|
||||
SupportedCacheControls,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
|
||||
|
@ -159,56 +162,111 @@ def clean_headers(
|
|||
return clean_headers
|
||||
|
||||
|
||||
def get_forwardable_headers(
|
||||
headers: Union[Headers, dict],
|
||||
):
|
||||
"""
|
||||
Get the headers that should be forwarded to the LLM Provider.
|
||||
|
||||
Looks for any `x-` headers and sends them to the LLM Provider.
|
||||
"""
|
||||
forwarded_headers = {}
|
||||
for header, value in headers.items():
|
||||
if header.lower().startswith("x-") and not header.lower().startswith(
|
||||
"x-stainless"
|
||||
): # causes openai sdk to fail
|
||||
forwarded_headers[header] = value
|
||||
|
||||
return forwarded_headers
|
||||
|
||||
|
||||
def get_openai_org_id_from_headers(
|
||||
headers: dict, general_settings: Optional[Dict] = None
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Get the OpenAI Org ID from the headers.
|
||||
"""
|
||||
if (
|
||||
general_settings is not None
|
||||
and general_settings.get("forward_openai_org_id") is not True
|
||||
class LiteLLMProxyRequestSetup:
|
||||
@staticmethod
|
||||
def _get_forwardable_headers(
|
||||
headers: Union[Headers, dict],
|
||||
):
|
||||
"""
|
||||
Get the headers that should be forwarded to the LLM Provider.
|
||||
|
||||
Looks for any `x-` headers and sends them to the LLM Provider.
|
||||
"""
|
||||
forwarded_headers = {}
|
||||
for header, value in headers.items():
|
||||
if header.lower().startswith("x-") and not header.lower().startswith(
|
||||
"x-stainless"
|
||||
): # causes openai sdk to fail
|
||||
forwarded_headers[header] = value
|
||||
|
||||
return forwarded_headers
|
||||
|
||||
@staticmethod
|
||||
def get_openai_org_id_from_headers(
|
||||
headers: dict, general_settings: Optional[Dict] = None
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Get the OpenAI Org ID from the headers.
|
||||
"""
|
||||
if (
|
||||
general_settings is not None
|
||||
and general_settings.get("forward_openai_org_id") is not True
|
||||
):
|
||||
return None
|
||||
for header, value in headers.items():
|
||||
if header.lower() == "openai-organization":
|
||||
return value
|
||||
return None
|
||||
for header, value in headers.items():
|
||||
if header.lower() == "openai-organization":
|
||||
return value
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def add_headers_to_llm_call(
|
||||
headers: dict, user_api_key_dict: UserAPIKeyAuth
|
||||
) -> dict:
|
||||
"""
|
||||
Add headers to the LLM call
|
||||
|
||||
def add_litellm_data_for_backend_llm_call(
|
||||
headers: dict, general_settings: Optional[Dict[str, Any]] = None
|
||||
) -> LitellmDataForBackendLLMCall:
|
||||
"""
|
||||
- Adds forwardable headers
|
||||
- Adds org id
|
||||
"""
|
||||
data = LitellmDataForBackendLLMCall()
|
||||
_headers = get_forwardable_headers(headers)
|
||||
if _headers != {}:
|
||||
data["headers"] = _headers
|
||||
_organization = get_openai_org_id_from_headers(headers, general_settings)
|
||||
if _organization is not None:
|
||||
data["organization"] = _organization
|
||||
return data
|
||||
- Checks request headers for forwardable headers
|
||||
- Checks if user information should be added to the headers
|
||||
"""
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
get_standard_logging_metadata,
|
||||
)
|
||||
|
||||
returned_headers = LiteLLMProxyRequestSetup._get_forwardable_headers(headers)
|
||||
|
||||
if litellm.add_user_information_to_llm_headers is True:
|
||||
litellm_logging_metadata_headers = (
|
||||
LiteLLMProxyRequestSetup.get_sanitized_user_information_from_key(
|
||||
user_api_key_dict=user_api_key_dict
|
||||
)
|
||||
)
|
||||
for k, v in litellm_logging_metadata_headers.items():
|
||||
if v is not None:
|
||||
returned_headers["x-litellm-{}".format(k)] = v
|
||||
|
||||
return returned_headers
|
||||
|
||||
@staticmethod
|
||||
def add_litellm_data_for_backend_llm_call(
|
||||
*,
|
||||
headers: dict,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
general_settings: Optional[Dict[str, Any]] = None,
|
||||
) -> LitellmDataForBackendLLMCall:
|
||||
"""
|
||||
- Adds forwardable headers
|
||||
- Adds org id
|
||||
"""
|
||||
data = LitellmDataForBackendLLMCall()
|
||||
if (
|
||||
general_settings
|
||||
and general_settings.get("forward_client_headers_to_llm_api") is True
|
||||
):
|
||||
_headers = LiteLLMProxyRequestSetup.add_headers_to_llm_call(
|
||||
headers, user_api_key_dict
|
||||
)
|
||||
if _headers != {}:
|
||||
data["headers"] = _headers
|
||||
_organization = LiteLLMProxyRequestSetup.get_openai_org_id_from_headers(
|
||||
headers, general_settings
|
||||
)
|
||||
if _organization is not None:
|
||||
data["organization"] = _organization
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def get_sanitized_user_information_from_key(
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
) -> StandardLoggingUserAPIKeyMetadata:
|
||||
user_api_key_logged_metadata = StandardLoggingUserAPIKeyMetadata(
|
||||
user_api_key_hash=user_api_key_dict.api_key, # just the hashed token
|
||||
user_api_key_alias=user_api_key_dict.key_alias,
|
||||
user_api_key_team_id=user_api_key_dict.team_id,
|
||||
user_api_key_user_id=user_api_key_dict.user_id,
|
||||
user_api_key_org_id=user_api_key_dict.org_id,
|
||||
user_api_key_team_alias=user_api_key_dict.team_alias,
|
||||
)
|
||||
return user_api_key_logged_metadata
|
||||
|
||||
|
||||
async def add_litellm_data_to_request( # noqa: PLR0915
|
||||
|
@ -246,7 +304,13 @@ async def add_litellm_data_to_request( # noqa: PLR0915
|
|||
),
|
||||
)
|
||||
|
||||
data.update(add_litellm_data_for_backend_llm_call(_headers, general_settings))
|
||||
data.update(
|
||||
LiteLLMProxyRequestSetup.add_litellm_data_for_backend_llm_call(
|
||||
headers=_headers,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
general_settings=general_settings,
|
||||
)
|
||||
)
|
||||
|
||||
# Include original request and headers in the data
|
||||
data["proxy_server_request"] = {
|
||||
|
@ -294,13 +358,22 @@ async def add_litellm_data_to_request( # noqa: PLR0915
|
|||
data["metadata"]
|
||||
)
|
||||
|
||||
data[_metadata_variable_name]["user_api_key"] = user_api_key_dict.api_key
|
||||
data[_metadata_variable_name]["user_api_key_alias"] = getattr(
|
||||
user_api_key_dict, "key_alias", None
|
||||
user_api_key_logged_metadata = (
|
||||
LiteLLMProxyRequestSetup.get_sanitized_user_information_from_key(
|
||||
user_api_key_dict=user_api_key_dict
|
||||
)
|
||||
)
|
||||
data[_metadata_variable_name].update(user_api_key_logged_metadata)
|
||||
data[_metadata_variable_name][
|
||||
"user_api_key"
|
||||
] = (
|
||||
user_api_key_dict.api_key
|
||||
) # this is just the hashed token. [TODO]: replace variable name in repo.
|
||||
|
||||
data[_metadata_variable_name]["user_api_end_user_max_budget"] = getattr(
|
||||
user_api_key_dict, "end_user_max_budget", None
|
||||
)
|
||||
|
||||
data[_metadata_variable_name]["litellm_api_version"] = version
|
||||
|
||||
if general_settings is not None:
|
||||
|
@ -308,15 +381,6 @@ async def add_litellm_data_to_request( # noqa: PLR0915
|
|||
general_settings.get("global_max_parallel_requests", None)
|
||||
)
|
||||
|
||||
data[_metadata_variable_name]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||
data[_metadata_variable_name]["user_api_key_org_id"] = user_api_key_dict.org_id
|
||||
data[_metadata_variable_name]["user_api_key_team_id"] = getattr(
|
||||
user_api_key_dict, "team_id", None
|
||||
)
|
||||
data[_metadata_variable_name]["user_api_key_team_alias"] = getattr(
|
||||
user_api_key_dict, "team_alias", None
|
||||
)
|
||||
|
||||
### KEY-LEVEL Controls
|
||||
key_metadata = user_api_key_dict.metadata
|
||||
if "cache" in key_metadata:
|
||||
|
|
43
litellm/proxy/management_helpers/audit_logs.py
Normal file
43
litellm/proxy/management_helpers/audit_logs.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
"""
|
||||
Functions to create audit logs for LiteLLM Proxy
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import LiteLLM_AuditLogs
|
||||
|
||||
|
||||
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
||||
from litellm.proxy.proxy_server import premium_user, prisma_client
|
||||
|
||||
if premium_user is not True:
|
||||
return
|
||||
|
||||
if litellm.store_audit_logs is not True:
|
||||
return
|
||||
if prisma_client is None:
|
||||
raise Exception("prisma_client is None, no DB connected")
|
||||
|
||||
verbose_proxy_logger.debug("creating audit log for %s", request_data)
|
||||
|
||||
if isinstance(request_data.updated_values, dict):
|
||||
request_data.updated_values = json.dumps(request_data.updated_values)
|
||||
|
||||
if isinstance(request_data.before_value, dict):
|
||||
request_data.before_value = json.dumps(request_data.before_value)
|
||||
|
||||
_request_data = request_data.model_dump(exclude_none=True)
|
||||
|
||||
try:
|
||||
await prisma_client.db.litellm_auditlog.create(
|
||||
data={
|
||||
**_request_data, # type: ignore
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
# [Non-Blocking Exception. Do not allow blocking LLM API call]
|
||||
verbose_proxy_logger.error(f"Failed Creating audit log {e}")
|
||||
|
||||
return
|
|
@ -125,7 +125,7 @@ def is_port_in_use(port):
|
|||
)
|
||||
@click.option(
|
||||
"--request_timeout",
|
||||
default=600,
|
||||
default=6000,
|
||||
type=int,
|
||||
help="Set timeout in seconds for completion calls",
|
||||
)
|
||||
|
|
|
@ -1,48 +1,20 @@
|
|||
model_list:
|
||||
################################################################################
|
||||
# Azure
|
||||
- model_name: gpt-4o-mini
|
||||
litellm_params:
|
||||
model: azure/gpt-4o-mini
|
||||
api_base: https://amazin-prod.openai.azure.com
|
||||
api_key: "os.environ/AZURE_GPT_4O"
|
||||
deployment_id: gpt-4o-mini
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: azure/gpt-4o
|
||||
api_base: https://very-cool-prod.openai.azure.com
|
||||
api_key: "os.environ/AZURE_GPT_4O"
|
||||
deployment_id: gpt-4o
|
||||
model: gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
tpm: 1000000
|
||||
rpm: 10000
|
||||
|
||||
|
||||
################################################################################
|
||||
# Fireworks
|
||||
- model_name: fireworks-llama-v3p1-405b-instruct
|
||||
litellm_params:
|
||||
model: fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||
api_key: "os.environ/FIREWORKS"
|
||||
- model_name: fireworks-llama-v3p1-70b-instruct
|
||||
litellm_params:
|
||||
model: fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||
api_key: "os.environ/FIREWORKS"
|
||||
|
||||
general_settings:
|
||||
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
|
||||
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
|
||||
success_callback: ["prometheus"]
|
||||
service_callback: ["prometheus_system"]
|
||||
drop_params: False # Raise an exception if the openai param being passed in isn't supported.
|
||||
cache: false
|
||||
default_internal_user_params:
|
||||
user_role: os.environ/DEFAULT_USER_ROLE
|
||||
# master key is set via env var
|
||||
# master_key: #######
|
||||
proxy_batch_write_at: 60 # Batch write spend updates every 60s
|
||||
|
||||
success_callback: ["s3"]
|
||||
s3_callback_params:
|
||||
s3_bucket_name: logs-bucket-litellm # AWS Bucket Name for S3
|
||||
s3_region_name: us-west-2 # AWS Region Name for S3
|
||||
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
|
||||
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
|
||||
s3_path: my-test-path # [OPTIONAL] set path in bucket you want to write logs to
|
||||
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 buckets
|
||||
litellm_settings:
|
||||
store_audit_logs: true
|
||||
|
||||
router_settings:
|
||||
routing_strategy: simple-shuffle # "simple-shuffle" shown to result in highest throughput. https://docs.litellm.ai/docs/proxy/configs#load-balancing
|
||||
# https://docs.litellm.ai/docs/proxy/reliability#default-fallbacks
|
||||
default_fallbacks: ["gpt-4o-2024-08-06", "claude-3-5-sonnet-20240620"]
|
||||
fallbacks: [{"gpt-4o-2024-08-06": ["claude-3-5-sonnet-20240620"]}, {"gpt-4o-2024-05-13": ["claude-3-5-sonnet-20240620"]}]
|
|
@ -194,6 +194,7 @@ from litellm.proxy.management_endpoints.team_callback_endpoints import (
|
|||
)
|
||||
from litellm.proxy.management_endpoints.team_endpoints import router as team_router
|
||||
from litellm.proxy.management_endpoints.ui_sso import router as ui_sso_router
|
||||
from litellm.proxy.management_helpers.audit_logs import create_audit_log_for_update
|
||||
from litellm.proxy.openai_files_endpoints.files_endpoints import is_known_model
|
||||
from litellm.proxy.openai_files_endpoints.files_endpoints import (
|
||||
router as openai_files_router,
|
||||
|
@ -638,18 +639,6 @@ def _resolve_pydantic_type(typ) -> List:
|
|||
return typs
|
||||
|
||||
|
||||
def prisma_setup(database_url: Optional[str]):
|
||||
global prisma_client, proxy_logging_obj, user_api_key_cache
|
||||
|
||||
if database_url is not None:
|
||||
try:
|
||||
prisma_client = PrismaClient(
|
||||
database_url=database_url, proxy_logging_obj=proxy_logging_obj
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def load_from_azure_key_vault(use_azure_key_vault: bool = False):
|
||||
if use_azure_key_vault is False:
|
||||
return
|
||||
|
@ -1548,7 +1537,7 @@ class ProxyConfig:
|
|||
## INIT PROXY REDIS USAGE CLIENT ##
|
||||
redis_usage_cache = litellm.cache.cache
|
||||
|
||||
|
||||
|
||||
async def get_config(self, config_file_path: Optional[str] = None) -> dict:
|
||||
"""
|
||||
Load config file
|
||||
|
@ -2801,137 +2790,55 @@ def giveup(e):
|
|||
return result
|
||||
|
||||
|
||||
@router.on_event("startup")
|
||||
async def startup_event(): # noqa: PLR0915
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client, store_model_in_db, premium_user, _license_check
|
||||
import json
|
||||
class ProxyStartupEvent:
|
||||
@classmethod
|
||||
def _initialize_startup_logging(
|
||||
cls,
|
||||
llm_router: Optional[litellm.Router],
|
||||
proxy_logging_obj: ProxyLogging,
|
||||
redis_usage_cache: Optional[RedisCache],
|
||||
):
|
||||
"""Initialize logging and alerting on startup"""
|
||||
## COST TRACKING ##
|
||||
cost_tracking()
|
||||
|
||||
init_verbose_loggers()
|
||||
## Error Tracking ##
|
||||
error_tracking()
|
||||
|
||||
### LOAD MASTER KEY ###
|
||||
# check if master key set in environment - load from there
|
||||
master_key = get_secret("LITELLM_MASTER_KEY", None) # type: ignore
|
||||
# check if DATABASE_URL in environment - load from there
|
||||
if prisma_client is None:
|
||||
_db_url: Optional[str] = get_secret("DATABASE_URL", None) # type: ignore
|
||||
prisma_setup(database_url=_db_url)
|
||||
proxy_logging_obj.startup_event(
|
||||
llm_router=llm_router, redis_usage_cache=redis_usage_cache
|
||||
)
|
||||
|
||||
### LOAD CONFIG ###
|
||||
worker_config: Optional[Union[str, dict]] = get_secret("WORKER_CONFIG") # type: ignore
|
||||
env_config_yaml: Optional[str] = get_secret_str("CONFIG_FILE_PATH")
|
||||
verbose_proxy_logger.debug("worker_config: %s", worker_config)
|
||||
# check if it's a valid file path
|
||||
if env_config_yaml is not None:
|
||||
if os.path.isfile(env_config_yaml) and proxy_config.is_yaml(
|
||||
config_file_path=env_config_yaml
|
||||
):
|
||||
(
|
||||
llm_router,
|
||||
llm_model_list,
|
||||
general_settings,
|
||||
) = await proxy_config.load_config(
|
||||
router=llm_router, config_file_path=env_config_yaml
|
||||
)
|
||||
elif worker_config is not None:
|
||||
if (
|
||||
isinstance(worker_config, str)
|
||||
and os.path.isfile(worker_config)
|
||||
and proxy_config.is_yaml(config_file_path=worker_config)
|
||||
):
|
||||
(
|
||||
llm_router,
|
||||
llm_model_list,
|
||||
general_settings,
|
||||
) = await proxy_config.load_config(
|
||||
router=llm_router, config_file_path=worker_config
|
||||
)
|
||||
elif os.environ.get("LITELLM_CONFIG_BUCKET_NAME") is not None and isinstance(
|
||||
worker_config, str
|
||||
):
|
||||
(
|
||||
llm_router,
|
||||
llm_model_list,
|
||||
general_settings,
|
||||
) = await proxy_config.load_config(
|
||||
router=llm_router, config_file_path=worker_config
|
||||
)
|
||||
elif isinstance(worker_config, dict):
|
||||
await initialize(**worker_config)
|
||||
@classmethod
|
||||
def _initialize_jwt_auth(
|
||||
cls,
|
||||
general_settings: dict,
|
||||
prisma_client: Optional[PrismaClient],
|
||||
user_api_key_cache: DualCache,
|
||||
):
|
||||
"""Initialize JWT auth on startup"""
|
||||
if general_settings.get("litellm_jwtauth", None) is not None:
|
||||
for k, v in general_settings["litellm_jwtauth"].items():
|
||||
if isinstance(v, str) and v.startswith("os.environ/"):
|
||||
general_settings["litellm_jwtauth"][k] = get_secret(v)
|
||||
litellm_jwtauth = LiteLLM_JWTAuth(**general_settings["litellm_jwtauth"])
|
||||
else:
|
||||
# if not, assume it's a json string
|
||||
worker_config = json.loads(worker_config)
|
||||
if isinstance(worker_config, dict):
|
||||
await initialize(**worker_config)
|
||||
|
||||
## CHECK PREMIUM USER
|
||||
verbose_proxy_logger.debug(
|
||||
"litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format(
|
||||
premium_user
|
||||
litellm_jwtauth = LiteLLM_JWTAuth()
|
||||
jwt_handler.update_environment(
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
litellm_jwtauth=litellm_jwtauth,
|
||||
)
|
||||
)
|
||||
if premium_user is False:
|
||||
premium_user = _license_check.is_premium()
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"litellm.proxy.proxy_server.py::startup() - PREMIUM USER value - {}".format(
|
||||
premium_user
|
||||
)
|
||||
)
|
||||
|
||||
## COST TRACKING ##
|
||||
cost_tracking()
|
||||
|
||||
## Error Tracking ##
|
||||
error_tracking()
|
||||
|
||||
## UPDATE SLACK ALERTING ##
|
||||
proxy_logging_obj.slack_alerting_instance.update_values(llm_router=llm_router)
|
||||
|
||||
db_writer_client = HTTPHandler()
|
||||
|
||||
## UPDATE INTERNAL USAGE CACHE ##
|
||||
proxy_logging_obj.update_values(
|
||||
redis_cache=redis_usage_cache
|
||||
) # used by parallel request limiter for rate limiting keys across instances
|
||||
|
||||
proxy_logging_obj._init_litellm_callbacks(
|
||||
llm_router=llm_router
|
||||
) # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
|
||||
|
||||
if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types:
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.slack_alerting_instance._run_scheduled_daily_report(
|
||||
llm_router=llm_router
|
||||
)
|
||||
) # RUN DAILY REPORT (if scheduled)
|
||||
|
||||
## JWT AUTH ##
|
||||
if general_settings.get("litellm_jwtauth", None) is not None:
|
||||
for k, v in general_settings["litellm_jwtauth"].items():
|
||||
if isinstance(v, str) and v.startswith("os.environ/"):
|
||||
general_settings["litellm_jwtauth"][k] = get_secret(v)
|
||||
litellm_jwtauth = LiteLLM_JWTAuth(**general_settings["litellm_jwtauth"])
|
||||
else:
|
||||
litellm_jwtauth = LiteLLM_JWTAuth()
|
||||
jwt_handler.update_environment(
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
litellm_jwtauth=litellm_jwtauth,
|
||||
)
|
||||
|
||||
if use_background_health_checks:
|
||||
asyncio.create_task(
|
||||
_run_background_health_check()
|
||||
) # start the background health check coroutine.
|
||||
|
||||
if prompt_injection_detection_obj is not None:
|
||||
prompt_injection_detection_obj.update_environment(router=llm_router)
|
||||
|
||||
verbose_proxy_logger.debug("prisma_client: %s", prisma_client)
|
||||
if prisma_client is not None:
|
||||
await prisma_client.connect()
|
||||
|
||||
if prisma_client is not None and master_key is not None:
|
||||
@classmethod
|
||||
def _add_master_key_hash_to_db(
|
||||
cls,
|
||||
master_key: str,
|
||||
prisma_client: PrismaClient,
|
||||
litellm_proxy_admin_name: str,
|
||||
general_settings: dict,
|
||||
):
|
||||
"""Adds master key hash to db for cost tracking"""
|
||||
if os.getenv("PROXY_ADMIN_ID", None) is not None:
|
||||
litellm_proxy_admin_name = os.getenv(
|
||||
"PROXY_ADMIN_ID", litellm_proxy_admin_name
|
||||
|
@ -2956,7 +2863,9 @@ async def startup_event(): # noqa: PLR0915
|
|||
)
|
||||
asyncio.create_task(task_1)
|
||||
|
||||
if prisma_client is not None and litellm.max_budget > 0:
|
||||
@classmethod
|
||||
def _add_proxy_budget_to_db(cls, litellm_proxy_budget_name: str):
|
||||
"""Adds a global proxy budget to db"""
|
||||
if litellm.budget_duration is None:
|
||||
raise Exception(
|
||||
"budget_duration not set on Proxy. budget_duration is required to use max_budget."
|
||||
|
@ -2982,8 +2891,18 @@ async def startup_event(): # noqa: PLR0915
|
|||
)
|
||||
)
|
||||
|
||||
### START BATCH WRITING DB + CHECKING NEW MODELS###
|
||||
if prisma_client is not None:
|
||||
@classmethod
|
||||
async def initialize_scheduled_background_jobs(
|
||||
cls,
|
||||
general_settings: dict,
|
||||
prisma_client: PrismaClient,
|
||||
proxy_budget_rescheduler_min_time: int,
|
||||
proxy_budget_rescheduler_max_time: int,
|
||||
proxy_batch_write_at: int,
|
||||
proxy_logging_obj: ProxyLogging,
|
||||
store_model_in_db: bool,
|
||||
):
|
||||
"""Initializes scheduled background jobs"""
|
||||
scheduler = AsyncIOScheduler()
|
||||
interval = random.randint(
|
||||
proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time
|
||||
|
@ -3072,6 +2991,165 @@ async def startup_event(): # noqa: PLR0915
|
|||
|
||||
scheduler.start()
|
||||
|
||||
@classmethod
|
||||
def _setup_prisma_client(
|
||||
cls,
|
||||
database_url: Optional[str],
|
||||
proxy_logging_obj: ProxyLogging,
|
||||
user_api_key_cache: DualCache,
|
||||
) -> Optional[PrismaClient]:
|
||||
"""
|
||||
- Sets up prisma client
|
||||
- Adds necessary views to proxy
|
||||
"""
|
||||
prisma_client: Optional[PrismaClient] = None
|
||||
if database_url is not None:
|
||||
try:
|
||||
prisma_client = PrismaClient(
|
||||
database_url=database_url, proxy_logging_obj=proxy_logging_obj
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
## Add necessary views to proxy ##
|
||||
asyncio.create_task(
|
||||
prisma_client.check_view_exists()
|
||||
) # check if all necessary views exist. Don't block execution
|
||||
|
||||
return prisma_client
|
||||
|
||||
|
||||
@router.on_event("startup")
|
||||
async def startup_event():
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client, store_model_in_db, premium_user, _license_check
|
||||
import json
|
||||
|
||||
init_verbose_loggers()
|
||||
|
||||
### LOAD MASTER KEY ###
|
||||
# check if master key set in environment - load from there
|
||||
master_key = get_secret("LITELLM_MASTER_KEY", None) # type: ignore
|
||||
# check if DATABASE_URL in environment - load from there
|
||||
if prisma_client is None:
|
||||
_db_url: Optional[str] = get_secret("DATABASE_URL", None) # type: ignore
|
||||
prisma_client = ProxyStartupEvent._setup_prisma_client(
|
||||
database_url=_db_url,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
)
|
||||
|
||||
### LOAD CONFIG ###
|
||||
worker_config: Optional[Union[str, dict]] = get_secret("WORKER_CONFIG") # type: ignore
|
||||
env_config_yaml: Optional[str] = get_secret_str("CONFIG_FILE_PATH")
|
||||
verbose_proxy_logger.debug("worker_config: %s", worker_config)
|
||||
# check if it's a valid file path
|
||||
if env_config_yaml is not None:
|
||||
if os.path.isfile(env_config_yaml) and proxy_config.is_yaml(
|
||||
config_file_path=env_config_yaml
|
||||
):
|
||||
(
|
||||
llm_router,
|
||||
llm_model_list,
|
||||
general_settings,
|
||||
) = await proxy_config.load_config(
|
||||
router=llm_router, config_file_path=env_config_yaml
|
||||
)
|
||||
elif worker_config is not None:
|
||||
if (
|
||||
isinstance(worker_config, str)
|
||||
and os.path.isfile(worker_config)
|
||||
and proxy_config.is_yaml(config_file_path=worker_config)
|
||||
):
|
||||
(
|
||||
llm_router,
|
||||
llm_model_list,
|
||||
general_settings,
|
||||
) = await proxy_config.load_config(
|
||||
router=llm_router, config_file_path=worker_config
|
||||
)
|
||||
elif os.environ.get("LITELLM_CONFIG_BUCKET_NAME") is not None and isinstance(
|
||||
worker_config, str
|
||||
):
|
||||
(
|
||||
llm_router,
|
||||
llm_model_list,
|
||||
general_settings,
|
||||
) = await proxy_config.load_config(
|
||||
router=llm_router, config_file_path=worker_config
|
||||
)
|
||||
elif isinstance(worker_config, dict):
|
||||
await initialize(**worker_config)
|
||||
else:
|
||||
# if not, assume it's a json string
|
||||
worker_config = json.loads(worker_config)
|
||||
if isinstance(worker_config, dict):
|
||||
await initialize(**worker_config)
|
||||
|
||||
## CHECK PREMIUM USER
|
||||
verbose_proxy_logger.debug(
|
||||
"litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format(
|
||||
premium_user
|
||||
)
|
||||
)
|
||||
if premium_user is False:
|
||||
premium_user = _license_check.is_premium()
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"litellm.proxy.proxy_server.py::startup() - PREMIUM USER value - {}".format(
|
||||
premium_user
|
||||
)
|
||||
)
|
||||
|
||||
ProxyStartupEvent._initialize_startup_logging(
|
||||
llm_router=llm_router,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
redis_usage_cache=redis_usage_cache,
|
||||
)
|
||||
|
||||
## JWT AUTH ##
|
||||
ProxyStartupEvent._initialize_jwt_auth(
|
||||
general_settings=general_settings,
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
)
|
||||
|
||||
if use_background_health_checks:
|
||||
asyncio.create_task(
|
||||
_run_background_health_check()
|
||||
) # start the background health check coroutine.
|
||||
|
||||
if prompt_injection_detection_obj is not None: # [TODO] - REFACTOR THIS
|
||||
prompt_injection_detection_obj.update_environment(router=llm_router)
|
||||
|
||||
verbose_proxy_logger.debug("prisma_client: %s", prisma_client)
|
||||
if prisma_client is not None:
|
||||
await prisma_client.connect()
|
||||
|
||||
if prisma_client is not None and master_key is not None:
|
||||
ProxyStartupEvent._add_master_key_hash_to_db(
|
||||
master_key=master_key,
|
||||
prisma_client=prisma_client,
|
||||
litellm_proxy_admin_name=litellm_proxy_admin_name,
|
||||
general_settings=general_settings,
|
||||
)
|
||||
|
||||
if prisma_client is not None and litellm.max_budget > 0:
|
||||
ProxyStartupEvent._add_proxy_budget_to_db(
|
||||
litellm_proxy_budget_name=litellm_proxy_admin_name
|
||||
)
|
||||
|
||||
### START BATCH WRITING DB + CHECKING NEW MODELS###
|
||||
if prisma_client is not None:
|
||||
await ProxyStartupEvent.initialize_scheduled_background_jobs(
|
||||
general_settings=general_settings,
|
||||
prisma_client=prisma_client,
|
||||
proxy_budget_rescheduler_min_time=proxy_budget_rescheduler_min_time,
|
||||
proxy_budget_rescheduler_max_time=proxy_budget_rescheduler_max_time,
|
||||
proxy_batch_write_at=proxy_batch_write_at,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
store_model_in_db=store_model_in_db,
|
||||
)
|
||||
|
||||
|
||||
#### API ENDPOINTS ####
|
||||
@router.get(
|
||||
|
@ -6327,11 +6405,7 @@ async def list_end_user(
|
|||
--header 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
create_audit_log_for_update,
|
||||
litellm_proxy_admin_name,
|
||||
prisma_client,
|
||||
)
|
||||
from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client
|
||||
|
||||
if (
|
||||
user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
|
||||
|
@ -6362,38 +6436,6 @@ async def list_end_user(
|
|||
return returned_response
|
||||
|
||||
|
||||
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
||||
if premium_user is not True:
|
||||
return
|
||||
|
||||
if litellm.store_audit_logs is not True:
|
||||
return
|
||||
if prisma_client is None:
|
||||
raise Exception("prisma_client is None, no DB connected")
|
||||
|
||||
verbose_proxy_logger.debug("creating audit log for %s", request_data)
|
||||
|
||||
if isinstance(request_data.updated_values, dict):
|
||||
request_data.updated_values = json.dumps(request_data.updated_values)
|
||||
|
||||
if isinstance(request_data.before_value, dict):
|
||||
request_data.before_value = json.dumps(request_data.before_value)
|
||||
|
||||
_request_data = request_data.dict(exclude_none=True)
|
||||
|
||||
try:
|
||||
await prisma_client.db.litellm_auditlog.create(
|
||||
data={
|
||||
**_request_data, # type: ignore
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
# [Non-Blocking Exception. Do not allow blocking LLM API call]
|
||||
verbose_proxy_logger.error(f"Failed Creating audit log {e}")
|
||||
|
||||
return
|
||||
|
||||
|
||||
#### BUDGET TABLE MANAGEMENT ####
|
||||
|
||||
|
||||
|
|
|
@ -154,6 +154,8 @@ model LiteLLM_VerificationToken {
|
|||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
budget_id String?
|
||||
created_at DateTime? @default(now()) @map("created_at")
|
||||
updated_at DateTime? @default(now()) @updatedAt @map("updated_at")
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
}
|
||||
|
||||
|
|
|
@ -349,6 +349,31 @@ class ProxyLogging:
|
|||
)
|
||||
self.premium_user = premium_user
|
||||
|
||||
def startup_event(
|
||||
self,
|
||||
llm_router: Optional[litellm.Router],
|
||||
redis_usage_cache: Optional[RedisCache],
|
||||
):
|
||||
"""Initialize logging and alerting on proxy startup"""
|
||||
## UPDATE SLACK ALERTING ##
|
||||
self.slack_alerting_instance.update_values(llm_router=llm_router)
|
||||
|
||||
## UPDATE INTERNAL USAGE CACHE ##
|
||||
self.update_values(
|
||||
redis_cache=redis_usage_cache
|
||||
) # used by parallel request limiter for rate limiting keys across instances
|
||||
|
||||
self._init_litellm_callbacks(
|
||||
llm_router=llm_router
|
||||
) # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
|
||||
|
||||
if "daily_reports" in self.slack_alerting_instance.alert_types:
|
||||
asyncio.create_task(
|
||||
self.slack_alerting_instance._run_scheduled_daily_report(
|
||||
llm_router=llm_router
|
||||
)
|
||||
) # RUN DAILY REPORT (if scheduled)
|
||||
|
||||
def update_values(
|
||||
self,
|
||||
alerting: Optional[List] = None,
|
||||
|
|
|
@ -63,10 +63,7 @@ from litellm.router_utils.batch_utils import (
|
|||
_get_router_metadata_variable_name,
|
||||
replace_model_in_jsonl,
|
||||
)
|
||||
from litellm.router_utils.client_initalization_utils import (
|
||||
set_client,
|
||||
should_initialize_sync_client,
|
||||
)
|
||||
from litellm.router_utils.client_initalization_utils import InitalizeOpenAISDKClient
|
||||
from litellm.router_utils.cooldown_cache import CooldownCache
|
||||
from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
|
||||
from litellm.router_utils.cooldown_handlers import (
|
||||
|
@ -3951,7 +3948,7 @@ class Router:
|
|||
raise Exception(f"Unsupported provider - {custom_llm_provider}")
|
||||
|
||||
# init OpenAI, Azure clients
|
||||
set_client(
|
||||
InitalizeOpenAISDKClient.set_client(
|
||||
litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
|
||||
)
|
||||
|
||||
|
@ -4661,7 +4658,9 @@ class Router:
|
|||
"""
|
||||
Re-initialize the client
|
||||
"""
|
||||
set_client(litellm_router_instance=self, model=deployment)
|
||||
InitalizeOpenAISDKClient.set_client(
|
||||
litellm_router_instance=self, model=deployment
|
||||
)
|
||||
client = self.cache.get_cache(key=cache_key, local_only=True)
|
||||
return client
|
||||
else:
|
||||
|
@ -4671,7 +4670,9 @@ class Router:
|
|||
"""
|
||||
Re-initialize the client
|
||||
"""
|
||||
set_client(litellm_router_instance=self, model=deployment)
|
||||
InitalizeOpenAISDKClient.set_client(
|
||||
litellm_router_instance=self, model=deployment
|
||||
)
|
||||
client = self.cache.get_cache(key=cache_key, local_only=True)
|
||||
return client
|
||||
else:
|
||||
|
@ -4682,7 +4683,9 @@ class Router:
|
|||
"""
|
||||
Re-initialize the client
|
||||
"""
|
||||
set_client(litellm_router_instance=self, model=deployment)
|
||||
InitalizeOpenAISDKClient.set_client(
|
||||
litellm_router_instance=self, model=deployment
|
||||
)
|
||||
client = self.cache.get_cache(key=cache_key)
|
||||
return client
|
||||
else:
|
||||
|
@ -4692,7 +4695,9 @@ class Router:
|
|||
"""
|
||||
Re-initialize the client
|
||||
"""
|
||||
set_client(litellm_router_instance=self, model=deployment)
|
||||
InitalizeOpenAISDKClient.set_client(
|
||||
litellm_router_instance=self, model=deployment
|
||||
)
|
||||
client = self.cache.get_cache(key=cache_key)
|
||||
return client
|
||||
|
||||
|
|
|
@ -23,236 +23,227 @@ else:
|
|||
LitellmRouter = Any
|
||||
|
||||
|
||||
def should_initialize_sync_client(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
) -> bool:
|
||||
"""
|
||||
Returns if Sync OpenAI, Azure Clients should be initialized.
|
||||
class InitalizeOpenAISDKClient:
|
||||
@staticmethod
|
||||
def should_initialize_sync_client(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
) -> bool:
|
||||
"""
|
||||
Returns if Sync OpenAI, Azure Clients should be initialized.
|
||||
|
||||
Do not init sync clients when router.router_general_settings.async_only_mode is True
|
||||
Do not init sync clients when router.router_general_settings.async_only_mode is True
|
||||
|
||||
"""
|
||||
if litellm_router_instance is None:
|
||||
return False
|
||||
|
||||
if litellm_router_instance.router_general_settings is not None:
|
||||
if (
|
||||
hasattr(litellm_router_instance, "router_general_settings")
|
||||
and hasattr(
|
||||
litellm_router_instance.router_general_settings, "async_only_mode"
|
||||
)
|
||||
and litellm_router_instance.router_general_settings.async_only_mode is True
|
||||
):
|
||||
"""
|
||||
if litellm_router_instance is None:
|
||||
return False
|
||||
|
||||
return True
|
||||
if litellm_router_instance.router_general_settings is not None:
|
||||
if (
|
||||
hasattr(litellm_router_instance, "router_general_settings")
|
||||
and hasattr(
|
||||
litellm_router_instance.router_general_settings, "async_only_mode"
|
||||
)
|
||||
and litellm_router_instance.router_general_settings.async_only_mode
|
||||
is True
|
||||
):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PLR0915
|
||||
"""
|
||||
- Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
|
||||
- Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
|
||||
"""
|
||||
client_ttl = litellm_router_instance.client_ttl
|
||||
litellm_params = model.get("litellm_params", {})
|
||||
model_name = litellm_params.get("model")
|
||||
model_id = model["model_info"]["id"]
|
||||
# ### IF RPM SET - initialize a semaphore ###
|
||||
rpm = litellm_params.get("rpm", None)
|
||||
tpm = litellm_params.get("tpm", None)
|
||||
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
|
||||
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||
rpm=rpm,
|
||||
max_parallel_requests=max_parallel_requests,
|
||||
tpm=tpm,
|
||||
default_max_parallel_requests=litellm_router_instance.default_max_parallel_requests,
|
||||
)
|
||||
if calculated_max_parallel_requests:
|
||||
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
|
||||
cache_key = f"{model_id}_max_parallel_requests_client"
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=semaphore,
|
||||
local_only=True,
|
||||
)
|
||||
|
||||
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
||||
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
||||
default_api_base = None
|
||||
default_api_key = None
|
||||
if custom_llm_provider in litellm.openai_compatible_providers:
|
||||
_, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
|
||||
model=model_name
|
||||
)
|
||||
default_api_base = api_base
|
||||
default_api_key = api_key
|
||||
|
||||
if (
|
||||
model_name in litellm.open_ai_chat_completion_models
|
||||
or custom_llm_provider in litellm.openai_compatible_providers
|
||||
or custom_llm_provider == "azure"
|
||||
or custom_llm_provider == "azure_text"
|
||||
or custom_llm_provider == "custom_openai"
|
||||
or custom_llm_provider == "openai"
|
||||
or custom_llm_provider == "text-completion-openai"
|
||||
or "ft:gpt-3.5-turbo" in model_name
|
||||
or model_name in litellm.open_ai_embedding_models
|
||||
@staticmethod
|
||||
def set_client( # noqa: PLR0915
|
||||
litellm_router_instance: LitellmRouter, model: dict
|
||||
):
|
||||
is_azure_ai_studio_model: bool = False
|
||||
if custom_llm_provider == "azure":
|
||||
if litellm.utils._is_non_openai_azure_model(model_name):
|
||||
is_azure_ai_studio_model = True
|
||||
custom_llm_provider = "openai"
|
||||
# remove azure prefx from model_name
|
||||
model_name = model_name.replace("azure/", "")
|
||||
# glorified / complicated reading of configs
|
||||
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
|
||||
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
|
||||
api_key = litellm_params.get("api_key") or default_api_key
|
||||
if api_key and isinstance(api_key, str) and api_key.startswith("os.environ/"):
|
||||
api_key_env_name = api_key.replace("os.environ/", "")
|
||||
api_key = get_secret_str(api_key_env_name)
|
||||
litellm_params["api_key"] = api_key
|
||||
|
||||
api_base = litellm_params.get("api_base")
|
||||
base_url: Optional[str] = litellm_params.get("base_url")
|
||||
api_base = (
|
||||
api_base or base_url or default_api_base
|
||||
) # allow users to pass in `api_base` or `base_url` for azure
|
||||
if api_base and api_base.startswith("os.environ/"):
|
||||
api_base_env_name = api_base.replace("os.environ/", "")
|
||||
api_base = get_secret_str(api_base_env_name)
|
||||
litellm_params["api_base"] = api_base
|
||||
|
||||
## AZURE AI STUDIO MISTRAL CHECK ##
|
||||
"""
|
||||
Make sure api base ends in /v1/
|
||||
|
||||
if not, add it - https://github.com/BerriAI/litellm/issues/2279
|
||||
- Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
|
||||
- Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
|
||||
"""
|
||||
if (
|
||||
is_azure_ai_studio_model is True
|
||||
and api_base is not None
|
||||
and isinstance(api_base, str)
|
||||
and not api_base.endswith("/v1/")
|
||||
):
|
||||
# check if it ends with a trailing slash
|
||||
if api_base.endswith("/"):
|
||||
api_base += "v1/"
|
||||
elif api_base.endswith("/v1"):
|
||||
api_base += "/"
|
||||
else:
|
||||
api_base += "/v1/"
|
||||
|
||||
api_version = litellm_params.get("api_version")
|
||||
if api_version and api_version.startswith("os.environ/"):
|
||||
api_version_env_name = api_version.replace("os.environ/", "")
|
||||
api_version = get_secret_str(api_version_env_name)
|
||||
litellm_params["api_version"] = api_version
|
||||
|
||||
timeout: Optional[float] = (
|
||||
litellm_params.pop("timeout", None) or litellm.request_timeout
|
||||
client_ttl = litellm_router_instance.client_ttl
|
||||
litellm_params = model.get("litellm_params", {})
|
||||
model_name = litellm_params.get("model")
|
||||
model_id = model["model_info"]["id"]
|
||||
# ### IF RPM SET - initialize a semaphore ###
|
||||
rpm = litellm_params.get("rpm", None)
|
||||
tpm = litellm_params.get("tpm", None)
|
||||
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
|
||||
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||
rpm=rpm,
|
||||
max_parallel_requests=max_parallel_requests,
|
||||
tpm=tpm,
|
||||
default_max_parallel_requests=litellm_router_instance.default_max_parallel_requests,
|
||||
)
|
||||
if isinstance(timeout, str) and timeout.startswith("os.environ/"):
|
||||
timeout_env_name = timeout.replace("os.environ/", "")
|
||||
timeout = get_secret(timeout_env_name) # type: ignore
|
||||
litellm_params["timeout"] = timeout
|
||||
|
||||
stream_timeout: Optional[float] = litellm_params.pop(
|
||||
"stream_timeout", timeout
|
||||
) # if no stream_timeout is set, default to timeout
|
||||
if isinstance(stream_timeout, str) and stream_timeout.startswith("os.environ/"):
|
||||
stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
|
||||
stream_timeout = get_secret(stream_timeout_env_name) # type: ignore
|
||||
litellm_params["stream_timeout"] = stream_timeout
|
||||
|
||||
max_retries: Optional[int] = litellm_params.pop(
|
||||
"max_retries", 0
|
||||
) # router handles retry logic
|
||||
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
||||
max_retries_env_name = max_retries.replace("os.environ/", "")
|
||||
max_retries = get_secret(max_retries_env_name) # type: ignore
|
||||
litellm_params["max_retries"] = max_retries
|
||||
|
||||
organization = litellm_params.get("organization", None)
|
||||
if isinstance(organization, str) and organization.startswith("os.environ/"):
|
||||
organization_env_name = organization.replace("os.environ/", "")
|
||||
organization = get_secret_str(organization_env_name)
|
||||
litellm_params["organization"] = organization
|
||||
azure_ad_token_provider: Optional[Callable[[], str]] = None
|
||||
if litellm_params.get("tenant_id"):
|
||||
verbose_router_logger.debug("Using Azure AD Token Provider for Azure Auth")
|
||||
azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
|
||||
tenant_id=litellm_params.get("tenant_id"),
|
||||
client_id=litellm_params.get("client_id"),
|
||||
client_secret=litellm_params.get("client_secret"),
|
||||
if calculated_max_parallel_requests:
|
||||
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
|
||||
cache_key = f"{model_id}_max_parallel_requests_client"
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=semaphore,
|
||||
local_only=True,
|
||||
)
|
||||
|
||||
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
|
||||
if api_base is None or not isinstance(api_base, str):
|
||||
filtered_litellm_params = {
|
||||
k: v for k, v in model["litellm_params"].items() if k != "api_key"
|
||||
}
|
||||
_filtered_model = {
|
||||
"model_name": model["model_name"],
|
||||
"litellm_params": filtered_litellm_params,
|
||||
}
|
||||
raise ValueError(
|
||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
|
||||
)
|
||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||
if azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
elif (
|
||||
azure_ad_token_provider is None
|
||||
and litellm.enable_azure_ad_token_refresh is True
|
||||
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
||||
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
||||
default_api_base = None
|
||||
default_api_key = None
|
||||
if custom_llm_provider in litellm.openai_compatible_providers:
|
||||
_, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
|
||||
model=model_name
|
||||
)
|
||||
default_api_base = api_base
|
||||
default_api_key = api_key
|
||||
|
||||
if (
|
||||
model_name in litellm.open_ai_chat_completion_models
|
||||
or custom_llm_provider in litellm.openai_compatible_providers
|
||||
or custom_llm_provider == "azure"
|
||||
or custom_llm_provider == "azure_text"
|
||||
or custom_llm_provider == "custom_openai"
|
||||
or custom_llm_provider == "openai"
|
||||
or custom_llm_provider == "text-completion-openai"
|
||||
or "ft:gpt-3.5-turbo" in model_name
|
||||
or model_name in litellm.open_ai_embedding_models
|
||||
):
|
||||
is_azure_ai_studio_model: bool = False
|
||||
if custom_llm_provider == "azure":
|
||||
if litellm.utils._is_non_openai_azure_model(model_name):
|
||||
is_azure_ai_studio_model = True
|
||||
custom_llm_provider = "openai"
|
||||
# remove azure prefx from model_name
|
||||
model_name = model_name.replace("azure/", "")
|
||||
# glorified / complicated reading of configs
|
||||
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
|
||||
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
|
||||
api_key = litellm_params.get("api_key") or default_api_key
|
||||
if (
|
||||
api_key
|
||||
and isinstance(api_key, str)
|
||||
and api_key.startswith("os.environ/")
|
||||
):
|
||||
try:
|
||||
azure_ad_token_provider = get_azure_ad_token_provider()
|
||||
except ValueError:
|
||||
verbose_router_logger.debug(
|
||||
"Azure AD Token Provider could not be used."
|
||||
)
|
||||
if api_version is None:
|
||||
api_version = os.getenv(
|
||||
"AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
|
||||
)
|
||||
api_key_env_name = api_key.replace("os.environ/", "")
|
||||
api_key = get_secret_str(api_key_env_name)
|
||||
litellm_params["api_key"] = api_key
|
||||
|
||||
if "gateway.ai.cloudflare.com" in api_base:
|
||||
if not api_base.endswith("/"):
|
||||
api_base = litellm_params.get("api_base")
|
||||
base_url: Optional[str] = litellm_params.get("base_url")
|
||||
api_base = (
|
||||
api_base or base_url or default_api_base
|
||||
) # allow users to pass in `api_base` or `base_url` for azure
|
||||
if api_base and api_base.startswith("os.environ/"):
|
||||
api_base_env_name = api_base.replace("os.environ/", "")
|
||||
api_base = get_secret_str(api_base_env_name)
|
||||
litellm_params["api_base"] = api_base
|
||||
|
||||
## AZURE AI STUDIO MISTRAL CHECK ##
|
||||
"""
|
||||
Make sure api base ends in /v1/
|
||||
|
||||
if not, add it - https://github.com/BerriAI/litellm/issues/2279
|
||||
"""
|
||||
if (
|
||||
is_azure_ai_studio_model is True
|
||||
and api_base is not None
|
||||
and isinstance(api_base, str)
|
||||
and not api_base.endswith("/v1/")
|
||||
):
|
||||
# check if it ends with a trailing slash
|
||||
if api_base.endswith("/"):
|
||||
api_base += "v1/"
|
||||
elif api_base.endswith("/v1"):
|
||||
api_base += "/"
|
||||
azure_model = model_name.replace("azure/", "")
|
||||
api_base += f"{azure_model}"
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI(
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
azure_ad_token_provider=azure_ad_token_provider,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
else:
|
||||
api_base += "/v1/"
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
api_version = litellm_params.get("api_version")
|
||||
if api_version and api_version.startswith("os.environ/"):
|
||||
api_version_env_name = api_version.replace("os.environ/", "")
|
||||
api_version = get_secret_str(api_version_env_name)
|
||||
litellm_params["api_version"] = api_version
|
||||
|
||||
timeout: Optional[float] = (
|
||||
litellm_params.pop("timeout", None) or litellm.request_timeout
|
||||
)
|
||||
if isinstance(timeout, str) and timeout.startswith("os.environ/"):
|
||||
timeout_env_name = timeout.replace("os.environ/", "")
|
||||
timeout = get_secret(timeout_env_name) # type: ignore
|
||||
litellm_params["timeout"] = timeout
|
||||
|
||||
stream_timeout: Optional[float] = litellm_params.pop(
|
||||
"stream_timeout", timeout
|
||||
) # if no stream_timeout is set, default to timeout
|
||||
if isinstance(stream_timeout, str) and stream_timeout.startswith(
|
||||
"os.environ/"
|
||||
):
|
||||
stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
|
||||
stream_timeout = get_secret(stream_timeout_env_name) # type: ignore
|
||||
litellm_params["stream_timeout"] = stream_timeout
|
||||
|
||||
max_retries: Optional[int] = litellm_params.pop(
|
||||
"max_retries", 0
|
||||
) # router handles retry logic
|
||||
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
||||
max_retries_env_name = max_retries.replace("os.environ/", "")
|
||||
max_retries = get_secret(max_retries_env_name) # type: ignore
|
||||
litellm_params["max_retries"] = max_retries
|
||||
|
||||
organization = litellm_params.get("organization", None)
|
||||
if isinstance(organization, str) and organization.startswith("os.environ/"):
|
||||
organization_env_name = organization.replace("os.environ/", "")
|
||||
organization = get_secret_str(organization_env_name)
|
||||
litellm_params["organization"] = organization
|
||||
azure_ad_token_provider: Optional[Callable[[], str]] = None
|
||||
if litellm_params.get("tenant_id"):
|
||||
verbose_router_logger.debug(
|
||||
"Using Azure AD Token Provider for Azure Auth"
|
||||
)
|
||||
azure_ad_token_provider = (
|
||||
InitalizeOpenAISDKClient.get_azure_ad_token_from_entrata_id(
|
||||
tenant_id=litellm_params.get("tenant_id"),
|
||||
client_id=litellm_params.get("client_id"),
|
||||
client_secret=litellm_params.get("client_secret"),
|
||||
)
|
||||
)
|
||||
|
||||
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
|
||||
if api_base is None or not isinstance(api_base, str):
|
||||
filtered_litellm_params = {
|
||||
k: v
|
||||
for k, v in model["litellm_params"].items()
|
||||
if k != "api_key"
|
||||
}
|
||||
_filtered_model = {
|
||||
"model_name": model["model_name"],
|
||||
"litellm_params": filtered_litellm_params,
|
||||
}
|
||||
raise ValueError(
|
||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
|
||||
)
|
||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||
if azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
elif (
|
||||
azure_ad_token_provider is None
|
||||
and litellm.enable_azure_ad_token_refresh is True
|
||||
):
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
try:
|
||||
azure_ad_token_provider = get_azure_ad_token_provider()
|
||||
except ValueError:
|
||||
verbose_router_logger.debug(
|
||||
"Azure AD Token Provider could not be used."
|
||||
)
|
||||
if api_version is None:
|
||||
api_version = os.getenv(
|
||||
"AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
|
||||
)
|
||||
|
||||
if "gateway.ai.cloudflare.com" in api_base:
|
||||
if not api_base.endswith("/"):
|
||||
api_base += "/"
|
||||
azure_model = model_name.replace("azure/", "")
|
||||
api_base += f"{azure_model}"
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI(
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
azure_ad_token_provider=azure_ad_token_provider,
|
||||
|
@ -260,7 +251,7 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
|||
api_version=api_version,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.Client(
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
|
@ -273,35 +264,35 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
|||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
# streaming clients can have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
azure_ad_token_provider=azure_ad_token_provider,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
azure_ad_token_provider=azure_ad_token_provider,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
# streaming clients can have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
azure_ad_token_provider=azure_ad_token_provider,
|
||||
|
@ -309,7 +300,7 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
|||
api_version=api_version,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.Client(
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
|
@ -322,41 +313,159 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
|||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
azure_ad_token_provider=azure_ad_token_provider,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
else:
|
||||
_api_key = api_key
|
||||
if _api_key is not None and isinstance(_api_key, str):
|
||||
# only show first 5 chars of api_key
|
||||
_api_key = _api_key[:8] + "*" * 15
|
||||
verbose_router_logger.debug(
|
||||
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
|
||||
)
|
||||
azure_client_params = {
|
||||
"api_key": api_key,
|
||||
"azure_endpoint": api_base,
|
||||
"api_version": api_version,
|
||||
"azure_ad_token": azure_ad_token,
|
||||
"azure_ad_token_provider": azure_ad_token_provider,
|
||||
}
|
||||
|
||||
if azure_ad_token_provider is not None:
|
||||
azure_client_params["azure_ad_token_provider"] = (
|
||||
azure_ad_token_provider
|
||||
)
|
||||
from litellm.llms.AzureOpenAI.azure import (
|
||||
select_azure_base_url_or_endpoint,
|
||||
)
|
||||
|
||||
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
|
||||
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
|
||||
azure_client_params = select_azure_base_url_or_endpoint(
|
||||
azure_client_params
|
||||
)
|
||||
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
else:
|
||||
_api_key = api_key
|
||||
_api_key = api_key # type: ignore
|
||||
if _api_key is not None and isinstance(_api_key, str):
|
||||
# only show first 5 chars of api_key
|
||||
_api_key = _api_key[:8] + "*" * 15
|
||||
verbose_router_logger.debug(
|
||||
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
|
||||
f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
|
||||
)
|
||||
azure_client_params = {
|
||||
"api_key": api_key,
|
||||
"azure_endpoint": api_base,
|
||||
"api_version": api_version,
|
||||
"azure_ad_token": azure_ad_token,
|
||||
"azure_ad_token_provider": azure_ad_token_provider,
|
||||
}
|
||||
|
||||
if azure_ad_token_provider is not None:
|
||||
azure_client_params["azure_ad_token_provider"] = (
|
||||
azure_ad_token_provider
|
||||
)
|
||||
from litellm.llms.AzureOpenAI.azure import (
|
||||
select_azure_base_url_or_endpoint,
|
||||
)
|
||||
|
||||
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
|
||||
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
|
||||
azure_client_params = select_azure_base_url_or_endpoint(
|
||||
azure_client_params
|
||||
)
|
||||
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
_client = openai.AsyncOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
organization=organization,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
|
@ -370,14 +479,17 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
|||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
if should_initialize_sync_client(
|
||||
|
||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
_client = openai.OpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
organization=organization,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
|
@ -394,16 +506,18 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
|||
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
_client = openai.AsyncOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
organization=organization,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
|
@ -412,20 +526,23 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
|||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if should_initialize_sync_client(
|
||||
if InitalizeOpenAISDKClient.should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
_client = openai.OpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
organization=organization,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
|
@ -434,149 +551,49 @@ def set_client(litellm_router_instance: LitellmRouter, model: dict): # noqa: PL
|
|||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
@staticmethod
|
||||
def get_azure_ad_token_from_entrata_id(
|
||||
tenant_id: str, client_id: str, client_secret: str
|
||||
) -> Callable[[], str]:
|
||||
from azure.identity import (
|
||||
ClientSecretCredential,
|
||||
DefaultAzureCredential,
|
||||
get_bearer_token_provider,
|
||||
)
|
||||
|
||||
verbose_router_logger.debug("Getting Azure AD Token from Entrata ID")
|
||||
|
||||
if tenant_id.startswith("os.environ/"):
|
||||
_tenant_id = get_secret_str(tenant_id)
|
||||
else:
|
||||
_api_key = api_key # type: ignore
|
||||
if _api_key is not None and isinstance(_api_key, str):
|
||||
# only show first 5 chars of api_key
|
||||
_api_key = _api_key[:8] + "*" * 15
|
||||
verbose_router_logger.debug(
|
||||
f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
|
||||
)
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
organization=organization,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
_tenant_id = tenant_id
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.OpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
organization=organization,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
if client_id.startswith("os.environ/"):
|
||||
_client_id = get_secret_str(client_id)
|
||||
else:
|
||||
_client_id = client_id
|
||||
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
organization=organization,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
if client_secret.startswith("os.environ/"):
|
||||
_client_secret = get_secret_str(client_secret)
|
||||
else:
|
||||
_client_secret = client_secret
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.OpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=stream_timeout, # type: ignore
|
||||
max_retries=max_retries, # type: ignore
|
||||
organization=organization,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
verbose_router_logger.debug(
|
||||
"tenant_id %s, client_id %s, client_secret %s",
|
||||
_tenant_id,
|
||||
_client_id,
|
||||
_client_secret,
|
||||
)
|
||||
if _tenant_id is None or _client_id is None or _client_secret is None:
|
||||
raise ValueError("tenant_id, client_id, and client_secret must be provided")
|
||||
credential = ClientSecretCredential(_tenant_id, _client_id, _client_secret)
|
||||
|
||||
verbose_router_logger.debug("credential %s", credential)
|
||||
|
||||
def get_azure_ad_token_from_entrata_id(
|
||||
tenant_id: str, client_id: str, client_secret: str
|
||||
) -> Callable[[], str]:
|
||||
from azure.identity import (
|
||||
ClientSecretCredential,
|
||||
DefaultAzureCredential,
|
||||
get_bearer_token_provider,
|
||||
)
|
||||
token_provider = get_bearer_token_provider(
|
||||
credential, "https://cognitiveservices.azure.com/.default"
|
||||
)
|
||||
|
||||
verbose_router_logger.debug("Getting Azure AD Token from Entrata ID")
|
||||
verbose_router_logger.debug("token_provider %s", token_provider)
|
||||
|
||||
if tenant_id.startswith("os.environ/"):
|
||||
_tenant_id = get_secret_str(tenant_id)
|
||||
else:
|
||||
_tenant_id = tenant_id
|
||||
|
||||
if client_id.startswith("os.environ/"):
|
||||
_client_id = get_secret_str(client_id)
|
||||
else:
|
||||
_client_id = client_id
|
||||
|
||||
if client_secret.startswith("os.environ/"):
|
||||
_client_secret = get_secret_str(client_secret)
|
||||
else:
|
||||
_client_secret = client_secret
|
||||
|
||||
verbose_router_logger.debug(
|
||||
"tenant_id %s, client_id %s, client_secret %s",
|
||||
_tenant_id,
|
||||
_client_id,
|
||||
_client_secret,
|
||||
)
|
||||
if _tenant_id is None or _client_id is None or _client_secret is None:
|
||||
raise ValueError("tenant_id, client_id, and client_secret must be provided")
|
||||
credential = ClientSecretCredential(_tenant_id, _client_id, _client_secret)
|
||||
|
||||
verbose_router_logger.debug("credential %s", credential)
|
||||
|
||||
token_provider = get_bearer_token_provider(
|
||||
credential, "https://cognitiveservices.azure.com/.default"
|
||||
)
|
||||
|
||||
verbose_router_logger.debug("token_provider %s", token_provider)
|
||||
|
||||
return token_provider
|
||||
return token_provider
|
||||
|
|
|
@ -4,6 +4,7 @@ Class to handle llm wildcard routing and regex pattern matching
|
|||
|
||||
import copy
|
||||
import re
|
||||
from re import Match
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from litellm import get_llm_provider
|
||||
|
@ -53,11 +54,12 @@ class PatternMatchRouter:
|
|||
Returns:
|
||||
str: regex pattern
|
||||
"""
|
||||
# Replace '*' with '.*' for regex matching
|
||||
regex = pattern.replace("*", ".*")
|
||||
# Escape other special characters
|
||||
regex = re.escape(regex).replace(r"\.\*", ".*")
|
||||
return f"^{regex}$"
|
||||
# # Replace '*' with '.*' for regex matching
|
||||
# regex = pattern.replace("*", ".*")
|
||||
# # Escape other special characters
|
||||
# regex = re.escape(regex).replace(r"\.\*", ".*")
|
||||
# return f"^{regex}$"
|
||||
return re.escape(pattern).replace(r"\*", "(.*)")
|
||||
|
||||
def route(self, request: Optional[str]) -> Optional[List[Dict]]:
|
||||
"""
|
||||
|
@ -84,6 +86,44 @@ class PatternMatchRouter:
|
|||
|
||||
return None # No matching pattern found
|
||||
|
||||
@staticmethod
|
||||
def set_deployment_model_name(
|
||||
matched_pattern: Match,
|
||||
litellm_deployment_litellm_model: str,
|
||||
) -> str:
|
||||
"""
|
||||
Set the model name for the matched pattern llm deployment
|
||||
|
||||
E.g.:
|
||||
|
||||
model_name: llmengine/* (can be any regex pattern or wildcard pattern)
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
|
||||
if model_name = "llmengine/foo" -> model = "openai/foo"
|
||||
"""
|
||||
## BASE CASE: if the deployment model name does not contain a wildcard, return the deployment model name
|
||||
if "*" not in litellm_deployment_litellm_model:
|
||||
return litellm_deployment_litellm_model
|
||||
|
||||
wildcard_count = litellm_deployment_litellm_model.count("*")
|
||||
|
||||
# Extract all dynamic segments from the request
|
||||
dynamic_segments = matched_pattern.groups()
|
||||
|
||||
if len(dynamic_segments) > wildcard_count:
|
||||
raise ValueError(
|
||||
f"More wildcards in the deployment model name than the pattern. Wildcard count: {wildcard_count}, dynamic segments count: {len(dynamic_segments)}"
|
||||
)
|
||||
|
||||
# Replace the corresponding wildcards in the litellm model pattern with extracted segments
|
||||
for segment in dynamic_segments:
|
||||
litellm_deployment_litellm_model = litellm_deployment_litellm_model.replace(
|
||||
"*", segment, 1
|
||||
)
|
||||
|
||||
return litellm_deployment_litellm_model
|
||||
|
||||
def get_pattern(
|
||||
self, model: str, custom_llm_provider: Optional[str] = None
|
||||
) -> Optional[List[Dict]]:
|
||||
|
|
10
litellm/types/integrations/arize.py
Normal file
10
litellm/types/integrations/arize.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ArizeConfig(BaseModel):
|
||||
space_key: str
|
||||
api_key: str
|
||||
grpc_endpoint: Optional[str] = None
|
||||
http_endpoint: Optional[str] = None
|
52
litellm/types/integrations/datadog_llm_obs.py
Normal file
52
litellm/types/integrations/datadog_llm_obs.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
"""
|
||||
Payloads for Datadog LLM Observability Service (LLMObs)
|
||||
|
||||
API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=example#api-standards
|
||||
"""
|
||||
|
||||
from typing import Any, List, Literal, Optional, TypedDict
|
||||
|
||||
|
||||
class InputMeta(TypedDict):
|
||||
messages: List[Any]
|
||||
|
||||
|
||||
class OutputMeta(TypedDict):
|
||||
messages: List[Any]
|
||||
|
||||
|
||||
class Meta(TypedDict):
|
||||
# The span kind: "agent", "workflow", "llm", "tool", "task", "embedding", or "retrieval".
|
||||
kind: Literal["llm", "tool", "task", "embedding", "retrieval"]
|
||||
input: InputMeta # The span’s input information.
|
||||
output: OutputMeta # The span’s output information.
|
||||
|
||||
|
||||
class LLMMetrics(TypedDict, total=False):
|
||||
input_tokens: float
|
||||
output_tokens: float
|
||||
total_tokens: float
|
||||
time_to_first_token: float
|
||||
time_per_output_token: float
|
||||
|
||||
|
||||
class LLMObsPayload(TypedDict):
|
||||
parent_id: str
|
||||
trace_id: str
|
||||
span_id: str
|
||||
name: str
|
||||
meta: Meta
|
||||
start_ns: int
|
||||
duration: int
|
||||
metrics: LLMMetrics
|
||||
|
||||
|
||||
class DDSpanAttributes(TypedDict):
|
||||
ml_app: str
|
||||
tags: List[str]
|
||||
spans: List[LLMObsPayload]
|
||||
|
||||
|
||||
class DDIntakePayload(TypedDict):
|
||||
type: str
|
||||
attributes: DDSpanAttributes
|
7
litellm/types/integrations/langfuse.py
Normal file
7
litellm/types/integrations/langfuse.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
from typing import Optional, TypedDict
|
||||
|
||||
|
||||
class LangfuseLoggingConfig(TypedDict):
|
||||
langfuse_secret: Optional[str]
|
||||
langfuse_public_key: Optional[str]
|
||||
langfuse_host: Optional[str]
|
|
@ -210,15 +210,23 @@ class ServerSentEvent:
|
|||
return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})"
|
||||
|
||||
|
||||
COHERE_EMBEDDING_INPUT_TYPES = Literal[
|
||||
"search_document", "search_query", "classification", "clustering", "image"
|
||||
]
|
||||
|
||||
|
||||
class CohereEmbeddingRequest(TypedDict, total=False):
|
||||
texts: Required[List[str]]
|
||||
input_type: Required[
|
||||
Literal["search_document", "search_query", "classification", "clustering"]
|
||||
]
|
||||
texts: List[str]
|
||||
images: List[str]
|
||||
input_type: Required[COHERE_EMBEDDING_INPUT_TYPES]
|
||||
truncate: Literal["NONE", "START", "END"]
|
||||
embedding_types: Literal["float", "int8", "uint8", "binary", "ubinary"]
|
||||
|
||||
|
||||
class CohereEmbeddingRequestWithModel(CohereEmbeddingRequest):
|
||||
model: Required[str]
|
||||
|
||||
|
||||
class CohereEmbeddingResponse(TypedDict):
|
||||
embeddings: List[List[float]]
|
||||
id: str
|
||||
|
|
|
@ -970,9 +970,9 @@ class EmbeddingResponse(OpenAIObject):
|
|||
|
||||
class Logprobs(OpenAIObject):
|
||||
text_offset: List[int]
|
||||
token_logprobs: List[float]
|
||||
token_logprobs: List[Union[float, None]]
|
||||
tokens: List[str]
|
||||
top_logprobs: List[Dict[str, float]]
|
||||
top_logprobs: List[Union[Dict[str, float], None]]
|
||||
|
||||
|
||||
class TextChoices(OpenAIObject):
|
||||
|
@ -1177,12 +1177,15 @@ from openai.types.images_response import ImagesResponse as OpenAIImageResponse
|
|||
|
||||
class ImageResponse(OpenAIImageResponse):
|
||||
_hidden_params: dict = {}
|
||||
usage: Usage
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
created: Optional[int] = None,
|
||||
data: Optional[List[ImageObject]] = None,
|
||||
response_ms=None,
|
||||
usage: Optional[Usage] = None,
|
||||
hidden_params: Optional[dict] = None,
|
||||
):
|
||||
if response_ms:
|
||||
_response_ms = response_ms
|
||||
|
@ -1204,8 +1207,13 @@ class ImageResponse(OpenAIImageResponse):
|
|||
_data.append(ImageObject(**d))
|
||||
elif isinstance(d, BaseModel):
|
||||
_data.append(ImageObject(**d.model_dump()))
|
||||
super().__init__(created=created, data=_data)
|
||||
self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
||||
_usage = usage or Usage(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
)
|
||||
super().__init__(created=created, data=_data, usage=_usage) # type: ignore
|
||||
self._hidden_params = hidden_params or {}
|
||||
|
||||
def __contains__(self, key):
|
||||
# Define custom behavior for the 'in' operator
|
||||
|
@ -1404,16 +1412,20 @@ class AdapterCompletionStreamWrapper:
|
|||
raise StopAsyncIteration
|
||||
|
||||
|
||||
class StandardLoggingMetadata(TypedDict):
|
||||
class StandardLoggingUserAPIKeyMetadata(TypedDict):
|
||||
user_api_key_hash: Optional[str] # hash of the litellm virtual key used
|
||||
user_api_key_alias: Optional[str]
|
||||
user_api_key_org_id: Optional[str]
|
||||
user_api_key_team_id: Optional[str]
|
||||
user_api_key_user_id: Optional[str]
|
||||
user_api_key_team_alias: Optional[str]
|
||||
|
||||
|
||||
class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
|
||||
"""
|
||||
Specific metadata k,v pairs logged to integration for easier cost tracking
|
||||
"""
|
||||
|
||||
user_api_key_hash: Optional[str] # hash of the litellm virtual key used
|
||||
user_api_key_alias: Optional[str]
|
||||
user_api_key_team_id: Optional[str]
|
||||
user_api_key_user_id: Optional[str]
|
||||
user_api_key_team_alias: Optional[str]
|
||||
spend_logs_metadata: Optional[
|
||||
dict
|
||||
] # special param to log k,v pairs to spendlogs for a call
|
||||
|
|
507
litellm/utils.py
507
litellm/utils.py
|
@ -70,6 +70,12 @@ from litellm.litellm_core_utils.get_llm_provider_logic import (
|
|||
get_llm_provider,
|
||||
)
|
||||
from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
|
||||
from litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response import (
|
||||
_handle_invalid_parallel_tool_calls,
|
||||
convert_to_model_response_object,
|
||||
convert_to_streaming_response,
|
||||
convert_to_streaming_response_async,
|
||||
)
|
||||
from litellm.litellm_core_utils.llm_response_utils.get_headers import (
|
||||
get_response_headers,
|
||||
)
|
||||
|
@ -126,6 +132,7 @@ except (ImportError, AttributeError):
|
|||
os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
|
||||
"CUSTOM_TIKTOKEN_CACHE_DIR", filename
|
||||
) # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
||||
from tiktoken import Encoding
|
||||
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
from importlib import resources
|
||||
|
@ -213,13 +220,10 @@ prometheusLogger = None
|
|||
dynamoLogger = None
|
||||
s3Logger = None
|
||||
genericAPILogger = None
|
||||
clickHouseLogger = None
|
||||
greenscaleLogger = None
|
||||
lunaryLogger = None
|
||||
aispendLogger = None
|
||||
berrispendLogger = None
|
||||
supabaseClient = None
|
||||
liteDebuggerClient = None
|
||||
callback_list: Optional[List[str]] = []
|
||||
user_logger_fn = None
|
||||
additional_details: Optional[Dict[str, str]] = {}
|
||||
|
@ -609,7 +613,6 @@ def function_setup( # noqa: PLR0915
|
|||
|
||||
|
||||
def client(original_function): # noqa: PLR0915
|
||||
global liteDebuggerClient
|
||||
rules_obj = Rules()
|
||||
|
||||
def check_coroutine(value) -> bool:
|
||||
|
@ -1282,7 +1285,10 @@ def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
|
|||
enc: The encoded text.
|
||||
"""
|
||||
tokenizer_json = custom_tokenizer or _select_tokenizer(model=model)
|
||||
enc = tokenizer_json["tokenizer"].encode(text)
|
||||
if isinstance(tokenizer_json["tokenizer"], Encoding):
|
||||
enc = tokenizer_json["tokenizer"].encode(text, disallowed_special=())
|
||||
else:
|
||||
enc = tokenizer_json["tokenizer"].encode(text)
|
||||
return enc
|
||||
|
||||
|
||||
|
@ -3049,8 +3055,8 @@ def get_optional_params( # noqa: PLR0915
|
|||
)
|
||||
if litellm.vertex_ai_safety_settings is not None:
|
||||
optional_params["safety_settings"] = litellm.vertex_ai_safety_settings
|
||||
elif (
|
||||
custom_llm_provider == "vertex_ai" and model in litellm.vertex_anthropic_models
|
||||
elif litellm.VertexAIAnthropicConfig.is_supported_model(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
):
|
||||
supported_params = get_supported_openai_params(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
|
@ -5191,7 +5197,9 @@ def create_proxy_transport_and_mounts():
|
|||
|
||||
|
||||
def validate_environment( # noqa: PLR0915
|
||||
model: Optional[str] = None, api_key: Optional[str] = None
|
||||
model: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Checks if the environment variables are valid for the given model.
|
||||
|
@ -5218,11 +5226,6 @@ def validate_environment( # noqa: PLR0915
|
|||
_, custom_llm_provider, _, _ = get_llm_provider(model=model)
|
||||
except Exception:
|
||||
custom_llm_provider = None
|
||||
# # check if llm provider part of model name
|
||||
# if model.split("/",1)[0] in litellm.provider_list:
|
||||
# custom_llm_provider = model.split("/", 1)[0]
|
||||
# model = model.split("/", 1)[1]
|
||||
# custom_llm_provider_passed_in = True
|
||||
|
||||
if custom_llm_provider:
|
||||
if custom_llm_provider == "openai":
|
||||
|
@ -5491,476 +5494,20 @@ def validate_environment( # noqa: PLR0915
|
|||
if "api_key" not in key.lower():
|
||||
new_missing_keys.append(key)
|
||||
missing_keys = new_missing_keys
|
||||
|
||||
if api_base is not None:
|
||||
new_missing_keys = []
|
||||
for key in missing_keys:
|
||||
if "api_base" not in key.lower():
|
||||
new_missing_keys.append(key)
|
||||
missing_keys = new_missing_keys
|
||||
|
||||
if len(missing_keys) == 0: # no missing keys
|
||||
keys_in_environment = True
|
||||
|
||||
return {"keys_in_environment": keys_in_environment, "missing_keys": missing_keys}
|
||||
|
||||
|
||||
async def convert_to_streaming_response_async(response_object: Optional[dict] = None):
|
||||
"""
|
||||
Asynchronously converts a response object to a streaming response.
|
||||
|
||||
Args:
|
||||
response_object (Optional[dict]): The response object to be converted. Defaults to None.
|
||||
|
||||
Raises:
|
||||
Exception: If the response object is None.
|
||||
|
||||
Yields:
|
||||
ModelResponse: The converted streaming response object.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
model_response_object = ModelResponse(stream=True)
|
||||
|
||||
if model_response_object is None:
|
||||
raise Exception("Error in response creating model response object")
|
||||
|
||||
choice_list = []
|
||||
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
if (
|
||||
choice["message"].get("tool_calls", None) is not None
|
||||
and isinstance(choice["message"]["tool_calls"], list)
|
||||
and len(choice["message"]["tool_calls"]) > 0
|
||||
and isinstance(choice["message"]["tool_calls"][0], dict)
|
||||
):
|
||||
pydantic_tool_calls = []
|
||||
for index, t in enumerate(choice["message"]["tool_calls"]):
|
||||
if "index" not in t:
|
||||
t["index"] = index
|
||||
pydantic_tool_calls.append(ChatCompletionDeltaToolCall(**t))
|
||||
choice["message"]["tool_calls"] = pydantic_tool_calls
|
||||
delta = Delta(
|
||||
content=choice["message"].get("content", None),
|
||||
role=choice["message"]["role"],
|
||||
function_call=choice["message"].get("function_call", None),
|
||||
tool_calls=choice["message"].get("tool_calls", None),
|
||||
)
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
|
||||
if finish_reason is None:
|
||||
finish_reason = choice.get("finish_details")
|
||||
|
||||
logprobs = choice.get("logprobs", None)
|
||||
|
||||
choice = StreamingChoices(
|
||||
finish_reason=finish_reason, index=idx, delta=delta, logprobs=logprobs
|
||||
)
|
||||
choice_list.append(choice)
|
||||
|
||||
model_response_object.choices = choice_list
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
setattr(
|
||||
model_response_object,
|
||||
"usage",
|
||||
Usage(
|
||||
completion_tokens=response_object["usage"].get("completion_tokens", 0),
|
||||
prompt_tokens=response_object["usage"].get("prompt_tokens", 0),
|
||||
total_tokens=response_object["usage"].get("total_tokens", 0),
|
||||
),
|
||||
)
|
||||
|
||||
if "id" in response_object:
|
||||
model_response_object.id = response_object["id"]
|
||||
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"]
|
||||
|
||||
if "system_fingerprint" in response_object:
|
||||
model_response_object.system_fingerprint = response_object["system_fingerprint"]
|
||||
|
||||
if "model" in response_object:
|
||||
model_response_object.model = response_object["model"]
|
||||
|
||||
yield model_response_object
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
def convert_to_streaming_response(response_object: Optional[dict] = None):
|
||||
# used for yielding Cache hits when stream == True
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
model_response_object = ModelResponse(stream=True)
|
||||
choice_list = []
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
delta = Delta(
|
||||
content=choice["message"].get("content", None),
|
||||
role=choice["message"]["role"],
|
||||
function_call=choice["message"].get("function_call", None),
|
||||
tool_calls=choice["message"].get("tool_calls", None),
|
||||
)
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
if finish_reason is None:
|
||||
# gpt-4 vision can return 'finish_reason' or 'finish_details'
|
||||
finish_reason = choice.get("finish_details")
|
||||
logprobs = choice.get("logprobs", None)
|
||||
enhancements = choice.get("enhancements", None)
|
||||
choice = StreamingChoices(
|
||||
finish_reason=finish_reason,
|
||||
index=idx,
|
||||
delta=delta,
|
||||
logprobs=logprobs,
|
||||
enhancements=enhancements,
|
||||
)
|
||||
|
||||
choice_list.append(choice)
|
||||
model_response_object.choices = choice_list
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
setattr(model_response_object, "usage", Usage())
|
||||
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
|
||||
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
|
||||
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
||||
|
||||
if "id" in response_object:
|
||||
model_response_object.id = response_object["id"]
|
||||
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"]
|
||||
|
||||
if "system_fingerprint" in response_object:
|
||||
model_response_object.system_fingerprint = response_object["system_fingerprint"]
|
||||
|
||||
if "model" in response_object:
|
||||
model_response_object.model = response_object["model"]
|
||||
yield model_response_object
|
||||
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
def _handle_invalid_parallel_tool_calls(
|
||||
tool_calls: List[ChatCompletionMessageToolCall],
|
||||
):
|
||||
"""
|
||||
Handle hallucinated parallel tool call from openai - https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
|
||||
|
||||
Code modified from: https://github.com/phdowling/openai_multi_tool_use_parallel_patch/blob/main/openai_multi_tool_use_parallel_patch.py
|
||||
"""
|
||||
|
||||
if tool_calls is None:
|
||||
return
|
||||
try:
|
||||
replacements: Dict[int, List[ChatCompletionMessageToolCall]] = defaultdict(list)
|
||||
for i, tool_call in enumerate(tool_calls):
|
||||
current_function = tool_call.function.name
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
if current_function == "multi_tool_use.parallel":
|
||||
verbose_logger.debug(
|
||||
"OpenAI did a weird pseudo-multi-tool-use call, fixing call structure.."
|
||||
)
|
||||
for _fake_i, _fake_tool_use in enumerate(function_args["tool_uses"]):
|
||||
_function_args = _fake_tool_use["parameters"]
|
||||
_current_function = _fake_tool_use["recipient_name"]
|
||||
if _current_function.startswith("functions."):
|
||||
_current_function = _current_function[len("functions.") :]
|
||||
|
||||
fixed_tc = ChatCompletionMessageToolCall(
|
||||
id=f"{tool_call.id}_{_fake_i}",
|
||||
type="function",
|
||||
function=Function(
|
||||
name=_current_function, arguments=json.dumps(_function_args)
|
||||
),
|
||||
)
|
||||
replacements[i].append(fixed_tc)
|
||||
|
||||
shift = 0
|
||||
for i, replacement in replacements.items():
|
||||
tool_calls[:] = (
|
||||
tool_calls[: i + shift] + replacement + tool_calls[i + shift + 1 :]
|
||||
)
|
||||
shift += len(replacement)
|
||||
|
||||
return tool_calls
|
||||
except json.JSONDecodeError:
|
||||
# if there is a JSONDecodeError, return the original tool_calls
|
||||
return tool_calls
|
||||
|
||||
|
||||
def convert_to_model_response_object( # noqa: PLR0915
|
||||
response_object: Optional[dict] = None,
|
||||
model_response_object: Optional[
|
||||
Union[
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
TranscriptionResponse,
|
||||
RerankResponse,
|
||||
]
|
||||
] = None,
|
||||
response_type: Literal[
|
||||
"completion", "embedding", "image_generation", "audio_transcription", "rerank"
|
||||
] = "completion",
|
||||
stream=False,
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
hidden_params: Optional[dict] = None,
|
||||
_response_headers: Optional[dict] = None,
|
||||
convert_tool_call_to_json_mode: Optional[
|
||||
bool
|
||||
] = None, # used for supporting 'json_schema' on older models
|
||||
):
|
||||
received_args = locals()
|
||||
|
||||
additional_headers = get_response_headers(_response_headers)
|
||||
|
||||
if hidden_params is None:
|
||||
hidden_params = {}
|
||||
hidden_params["additional_headers"] = additional_headers
|
||||
|
||||
### CHECK IF ERROR IN RESPONSE ### - openrouter returns these in the dictionary
|
||||
if (
|
||||
response_object is not None
|
||||
and "error" in response_object
|
||||
and response_object["error"] is not None
|
||||
):
|
||||
error_args = {"status_code": 422, "message": "Error in response object"}
|
||||
if isinstance(response_object["error"], dict):
|
||||
if "code" in response_object["error"]:
|
||||
error_args["status_code"] = response_object["error"]["code"]
|
||||
if "message" in response_object["error"]:
|
||||
if isinstance(response_object["error"]["message"], dict):
|
||||
message_str = json.dumps(response_object["error"]["message"])
|
||||
else:
|
||||
message_str = str(response_object["error"]["message"])
|
||||
error_args["message"] = message_str
|
||||
raised_exception = Exception()
|
||||
setattr(raised_exception, "status_code", error_args["status_code"])
|
||||
setattr(raised_exception, "message", error_args["message"])
|
||||
raise raised_exception
|
||||
|
||||
try:
|
||||
if response_type == "completion" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, ModelResponse)
|
||||
):
|
||||
if response_object is None or model_response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
if stream is True:
|
||||
# for returning cached responses, we need to yield a generator
|
||||
return convert_to_streaming_response(response_object=response_object)
|
||||
choice_list = []
|
||||
|
||||
assert response_object["choices"] is not None and isinstance(
|
||||
response_object["choices"], Iterable
|
||||
)
|
||||
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
## HANDLE JSON MODE - anthropic returns single function call]
|
||||
tool_calls = choice["message"].get("tool_calls", None)
|
||||
if tool_calls is not None:
|
||||
_openai_tool_calls = []
|
||||
for _tc in tool_calls:
|
||||
_openai_tc = ChatCompletionMessageToolCall(**_tc)
|
||||
_openai_tool_calls.append(_openai_tc)
|
||||
fixed_tool_calls = _handle_invalid_parallel_tool_calls(
|
||||
_openai_tool_calls
|
||||
)
|
||||
|
||||
if fixed_tool_calls is not None:
|
||||
tool_calls = fixed_tool_calls
|
||||
|
||||
message: Optional[Message] = None
|
||||
finish_reason: Optional[str] = None
|
||||
if (
|
||||
convert_tool_call_to_json_mode
|
||||
and tool_calls is not None
|
||||
and len(tool_calls) == 1
|
||||
):
|
||||
# to support 'json_schema' logic on older models
|
||||
json_mode_content_str: Optional[str] = tool_calls[0][
|
||||
"function"
|
||||
].get("arguments")
|
||||
if json_mode_content_str is not None:
|
||||
message = litellm.Message(content=json_mode_content_str)
|
||||
finish_reason = "stop"
|
||||
if message is None:
|
||||
message = Message(
|
||||
content=choice["message"].get("content", None),
|
||||
role=choice["message"]["role"] or "assistant",
|
||||
function_call=choice["message"].get("function_call", None),
|
||||
tool_calls=tool_calls,
|
||||
audio=choice["message"].get("audio", None),
|
||||
)
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
if finish_reason is None:
|
||||
# gpt-4 vision can return 'finish_reason' or 'finish_details'
|
||||
finish_reason = choice.get("finish_details") or "stop"
|
||||
logprobs = choice.get("logprobs", None)
|
||||
enhancements = choice.get("enhancements", None)
|
||||
choice = Choices(
|
||||
finish_reason=finish_reason,
|
||||
index=idx,
|
||||
message=message,
|
||||
logprobs=logprobs,
|
||||
enhancements=enhancements,
|
||||
)
|
||||
choice_list.append(choice)
|
||||
model_response_object.choices = choice_list
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
usage_object = litellm.Usage(**response_object["usage"])
|
||||
setattr(model_response_object, "usage", usage_object)
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"] or int(
|
||||
time.time()
|
||||
)
|
||||
|
||||
if "id" in response_object:
|
||||
model_response_object.id = response_object["id"] or str(uuid.uuid4())
|
||||
|
||||
if "system_fingerprint" in response_object:
|
||||
model_response_object.system_fingerprint = response_object[
|
||||
"system_fingerprint"
|
||||
]
|
||||
|
||||
if "model" in response_object:
|
||||
if model_response_object.model is None:
|
||||
model_response_object.model = response_object["model"]
|
||||
elif (
|
||||
"/" in model_response_object.model
|
||||
and response_object["model"] is not None
|
||||
):
|
||||
openai_compatible_provider = model_response_object.model.split("/")[
|
||||
0
|
||||
]
|
||||
model_response_object.model = (
|
||||
openai_compatible_provider + "/" + response_object["model"]
|
||||
)
|
||||
|
||||
if start_time is not None and end_time is not None:
|
||||
if isinstance(start_time, type(end_time)):
|
||||
model_response_object._response_ms = ( # type: ignore
|
||||
end_time - start_time
|
||||
).total_seconds() * 1000
|
||||
|
||||
if hidden_params is not None:
|
||||
if model_response_object._hidden_params is None:
|
||||
model_response_object._hidden_params = {}
|
||||
model_response_object._hidden_params.update(hidden_params)
|
||||
|
||||
if _response_headers is not None:
|
||||
model_response_object._response_headers = _response_headers
|
||||
|
||||
special_keys = list(litellm.ModelResponse.model_fields.keys())
|
||||
special_keys.append("usage")
|
||||
for k, v in response_object.items():
|
||||
if k not in special_keys:
|
||||
setattr(model_response_object, k, v)
|
||||
|
||||
return model_response_object
|
||||
elif response_type == "embedding" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, EmbeddingResponse)
|
||||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = EmbeddingResponse()
|
||||
|
||||
if "model" in response_object:
|
||||
model_response_object.model = response_object["model"]
|
||||
|
||||
if "object" in response_object:
|
||||
model_response_object.object = response_object["object"]
|
||||
|
||||
model_response_object.data = response_object["data"]
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
|
||||
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
|
||||
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
||||
|
||||
if start_time is not None and end_time is not None:
|
||||
model_response_object._response_ms = ( # type: ignore
|
||||
end_time - start_time
|
||||
).total_seconds() * 1000 # return response latency in ms like openai
|
||||
|
||||
if hidden_params is not None:
|
||||
model_response_object._hidden_params = hidden_params
|
||||
|
||||
if _response_headers is not None:
|
||||
model_response_object._response_headers = _response_headers
|
||||
|
||||
return model_response_object
|
||||
elif response_type == "image_generation" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, ImageResponse)
|
||||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = ImageResponse()
|
||||
|
||||
if "created" in response_object:
|
||||
model_response_object.created = response_object["created"]
|
||||
|
||||
if "data" in response_object:
|
||||
model_response_object.data = response_object["data"]
|
||||
|
||||
if hidden_params is not None:
|
||||
model_response_object._hidden_params = hidden_params
|
||||
|
||||
return model_response_object
|
||||
elif response_type == "audio_transcription" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, TranscriptionResponse)
|
||||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = TranscriptionResponse()
|
||||
|
||||
if "text" in response_object:
|
||||
model_response_object.text = response_object["text"]
|
||||
|
||||
optional_keys = ["language", "task", "duration", "words", "segments"]
|
||||
for key in optional_keys: # not guaranteed to be in response
|
||||
if key in response_object:
|
||||
setattr(model_response_object, key, response_object[key])
|
||||
|
||||
if hidden_params is not None:
|
||||
model_response_object._hidden_params = hidden_params
|
||||
|
||||
if _response_headers is not None:
|
||||
model_response_object._response_headers = _response_headers
|
||||
|
||||
return model_response_object
|
||||
elif response_type == "rerank" and (
|
||||
model_response_object is None
|
||||
or isinstance(model_response_object, RerankResponse)
|
||||
):
|
||||
if response_object is None:
|
||||
raise Exception("Error in response object format")
|
||||
|
||||
if model_response_object is None:
|
||||
model_response_object = RerankResponse(**response_object)
|
||||
return model_response_object
|
||||
|
||||
if "id" in response_object:
|
||||
model_response_object.id = response_object["id"]
|
||||
|
||||
if "meta" in response_object:
|
||||
model_response_object.meta = response_object["meta"]
|
||||
|
||||
if "results" in response_object:
|
||||
model_response_object.results = response_object["results"]
|
||||
|
||||
return model_response_object
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f"Invalid response object {traceback.format_exc()}\n\nreceived_args={received_args}"
|
||||
)
|
||||
|
||||
|
||||
def acreate(*args, **kwargs): ## Thin client to handle the acreate langchain call
|
||||
return litellm.acompletion(*args, **kwargs)
|
||||
|
||||
|
|
|
@ -1104,7 +1104,7 @@
|
|||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat"
|
||||
},
|
||||
"azure_ai/Meta-Llama-31-8B-Instruct": {
|
||||
"azure_ai/Meta-Llama-3.1-8B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
|
@ -1114,7 +1114,7 @@
|
|||
"mode": "chat",
|
||||
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice"
|
||||
},
|
||||
"azure_ai/Meta-Llama-31-70B-Instruct": {
|
||||
"azure_ai/Meta-Llama-3.1-70B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
|
@ -1124,7 +1124,7 @@
|
|||
"mode": "chat",
|
||||
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice"
|
||||
},
|
||||
"azure_ai/Meta-Llama-31-405B-Instruct": {
|
||||
"azure_ai/Meta-Llama-3.1-405B-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
|
@ -1751,6 +1751,22 @@
|
|||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"claude-3-5-sonnet-20241022": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
"cache_read_input_token_cost": 0.0000003,
|
||||
"litellm_provider": "anthropic",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"text-bison": {
|
||||
"max_tokens": 2048,
|
||||
"max_input_tokens": 8192,
|
||||
|
@ -2578,6 +2594,18 @@
|
|||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"vertex_ai/claude-3-5-sonnet-v2@20241022": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"vertex_ai/claude-3-haiku@20240307": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -3336,54 +3364,56 @@
|
|||
"litellm_provider": "cohere",
|
||||
"mode": "rerank"
|
||||
},
|
||||
"embed-english-v3.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-english-light-v3.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 1024,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-multilingual-v3.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 1024,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-english-v2.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-english-light-v2.0": {
|
||||
"max_tokens": 512,
|
||||
"max_input_tokens": 512,
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 1024,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-multilingual-v2.0": {
|
||||
"max_tokens": 256,
|
||||
"max_input_tokens": 256,
|
||||
"max_tokens": 768,
|
||||
"max_input_tokens": 768,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"embed-english-v3.0": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 1024,
|
||||
"input_cost_per_token": 0.00000010,
|
||||
"input_cost_per_image": 0.0001,
|
||||
"output_cost_per_token": 0.00000,
|
||||
"litellm_provider": "cohere",
|
||||
"mode": "embedding",
|
||||
"supports_image_input": true
|
||||
},
|
||||
"replicate/meta/llama-2-13b": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
|
@ -3572,6 +3602,22 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 264
|
||||
},
|
||||
"anthropic/claude-3-5-sonnet-20241022": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
"cache_read_input_token_cost": 0.0000003,
|
||||
"litellm_provider": "anthropic",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"openrouter/anthropic/claude-3.5-sonnet": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -4093,6 +4139,18 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"amazon.titan-embed-image-v1": {
|
||||
"max_tokens": 128,
|
||||
"max_input_tokens": 128,
|
||||
"output_vector_size": 1024,
|
||||
"input_cost_per_token": 0.0000008,
|
||||
"input_cost_per_image": 0.00006,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "bedrock",
|
||||
"supports_image_input": true,
|
||||
"mode": "embedding",
|
||||
"source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1"
|
||||
},
|
||||
"mistral.mistral-7b-instruct-v0:2": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 32000,
|
||||
|
@ -4246,6 +4304,17 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -4290,6 +4359,17 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -4334,6 +4414,17 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
@ -6369,6 +6460,14 @@
|
|||
"litellm_provider": "voyage",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"voyage/voyage-finance-2": {
|
||||
"max_tokens": 4000,
|
||||
"max_input_tokens": 4000,
|
||||
"input_cost_per_token": 0.00000012,
|
||||
"output_cost_per_token": 0.000000,
|
||||
"litellm_provider": "voyage",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"databricks/databricks-meta-llama-3-1-405b-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.50.2"
|
||||
version = "1.51.0"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.50.2"
|
||||
version = "1.51.0"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -154,6 +154,8 @@ model LiteLLM_VerificationToken {
|
|||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
budget_id String?
|
||||
created_at DateTime? @default(now()) @map("created_at")
|
||||
updated_at DateTime? @default(now()) @updatedAt @map("updated_at")
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
}
|
||||
|
||||
|
|
|
@ -695,3 +695,41 @@ def test_convert_to_model_response_object_error():
|
|||
_response_headers=None,
|
||||
convert_tool_call_to_json_mode=False,
|
||||
)
|
||||
|
||||
|
||||
def test_image_generation_openai_with_pydantic_warning(caplog):
|
||||
try:
|
||||
import logging
|
||||
from litellm.types.utils import ImageResponse, ImageObject
|
||||
|
||||
convert_response_args = {
|
||||
"response_object": {
|
||||
"created": 1729709945,
|
||||
"data": [
|
||||
{
|
||||
"b64_json": None,
|
||||
"revised_prompt": "Generate an image of a baby sea otter. It should look incredibly cute, with big, soulful eyes and a fluffy, wet fur coat. The sea otter should be on its back, as sea otters often do, with its tiny hands holding onto a shell as if it is its precious toy. The background should be a tranquil sea under a clear sky, with soft sunlight reflecting off the waters. The color palette should be soothing with blues, browns, and white.",
|
||||
"url": "https://oaidalleapiprodscus.blob.core.windows.net/private/org-ikDc4ex8NB5ZzfTf8m5WYVB7/user-JpwZsbIXubBZvan3Y3GchiiB/img-LL0uoOv4CFJIvNYxoNCKB8oc.png?st=2024-10-23T17%3A59%3A05Z&se=2024-10-23T19%3A59%3A05Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-10-22T19%3A26%3A22Z&ske=2024-10-23T19%3A26%3A22Z&sks=b&skv=2024-08-04&sig=Hl4wczJ3H2vZNdLRt/7JvNi6NvQGDnbNkDy15%2Bl3k5s%3D",
|
||||
}
|
||||
],
|
||||
},
|
||||
"model_response_object": ImageResponse(
|
||||
created=1729709929,
|
||||
data=[],
|
||||
),
|
||||
"response_type": "image_generation",
|
||||
"stream": False,
|
||||
"start_time": None,
|
||||
"end_time": None,
|
||||
"hidden_params": None,
|
||||
"_response_headers": None,
|
||||
"convert_tool_call_to_json_mode": None,
|
||||
}
|
||||
|
||||
resp: ImageResponse = convert_to_model_response_object(**convert_response_args)
|
||||
assert resp is not None
|
||||
assert resp.data is not None
|
||||
assert len(resp.data) == 1
|
||||
assert isinstance(resp.data[0], ImageObject)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Test failed with exception: {e}")
|
||||
|
|
|
@ -235,7 +235,7 @@ def test_all_model_configs():
|
|||
optional_params={},
|
||||
api_version="2022-12-01",
|
||||
drop_params=False,
|
||||
) == {"max_tokens": 10}
|
||||
) == {"max_completion_tokens": 10}
|
||||
|
||||
from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig
|
||||
|
||||
|
|
|
@ -775,3 +775,12 @@ def test_hosted_vllm_tool_param():
|
|||
)
|
||||
assert "tools" not in optional_params
|
||||
assert "tool_choice" not in optional_params
|
||||
|
||||
|
||||
def test_unmapped_vertex_anthropic_model():
|
||||
optional_params = get_optional_params(
|
||||
model="claude-3-5-sonnet-v250@20241022",
|
||||
custom_llm_provider="vertex_ai",
|
||||
max_retries=10,
|
||||
)
|
||||
assert "max_retries" not in optional_params
|
||||
|
|
64
tests/llm_translation/test_text_completion_unit_tests.py
Normal file
64
tests/llm_translation/test_text_completion_unit_tests.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.types.utils import TextCompletionResponse
|
||||
|
||||
|
||||
def test_convert_dict_to_text_completion_response():
|
||||
input_dict = {
|
||||
"id": "cmpl-ALVLPJgRkqpTomotoOMi3j0cAaL4L",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": {
|
||||
"text_offset": [0, 5],
|
||||
"token_logprobs": [None, -12.203847],
|
||||
"tokens": ["hello", " crisp"],
|
||||
"top_logprobs": [None, {",": -2.1568563}],
|
||||
},
|
||||
"text": "hello crisp",
|
||||
}
|
||||
],
|
||||
"created": 1729688739,
|
||||
"model": "davinci-002",
|
||||
"object": "text_completion",
|
||||
"system_fingerprint": None,
|
||||
"usage": {
|
||||
"completion_tokens": 1,
|
||||
"prompt_tokens": 1,
|
||||
"total_tokens": 2,
|
||||
"completion_tokens_details": None,
|
||||
"prompt_tokens_details": None,
|
||||
},
|
||||
}
|
||||
|
||||
response = TextCompletionResponse(**input_dict)
|
||||
|
||||
assert response.id == "cmpl-ALVLPJgRkqpTomotoOMi3j0cAaL4L"
|
||||
assert len(response.choices) == 1
|
||||
assert response.choices[0].finish_reason == "length"
|
||||
assert response.choices[0].index == 0
|
||||
assert response.choices[0].text == "hello crisp"
|
||||
assert response.created == 1729688739
|
||||
assert response.model == "davinci-002"
|
||||
assert response.object == "text_completion"
|
||||
assert response.system_fingerprint is None
|
||||
assert response.usage.completion_tokens == 1
|
||||
assert response.usage.prompt_tokens == 1
|
||||
assert response.usage.total_tokens == 2
|
||||
assert response.usage.completion_tokens_details is None
|
||||
assert response.usage.prompt_tokens_details is None
|
||||
|
||||
# Test logprobs
|
||||
assert response.choices[0].logprobs.text_offset == [0, 5]
|
||||
assert response.choices[0].logprobs.token_logprobs == [None, -12.203847]
|
||||
assert response.choices[0].logprobs.tokens == ["hello", " crisp"]
|
||||
assert response.choices[0].logprobs.top_logprobs == [None, {",": -2.1568563}]
|
|
@ -428,11 +428,16 @@ async def test_aaalangfuse_logging_metadata(langfuse_client):
|
|||
|
||||
await asyncio.sleep(2)
|
||||
langfuse_client.flush()
|
||||
# await asyncio.sleep(10)
|
||||
await asyncio.sleep(4)
|
||||
|
||||
# Tests the metadata filtering and the override of the output to be the last generation
|
||||
for trace_id, generation_ids in trace_identifiers.items():
|
||||
trace = langfuse_client.get_trace(id=trace_id)
|
||||
try:
|
||||
trace = langfuse_client.get_trace(id=trace_id)
|
||||
except Exception as e:
|
||||
if "Trace not found within authorized project" in str(e):
|
||||
print(f"Trace {trace_id} not found")
|
||||
continue
|
||||
assert trace.id == trace_id
|
||||
assert trace.session_id == session_id
|
||||
assert trace.metadata != trace_metadata
|
||||
|
@ -620,7 +625,7 @@ def test_aaalangfuse_existing_trace_id():
|
|||
import datetime
|
||||
|
||||
import litellm
|
||||
from litellm.integrations.langfuse import LangFuseLogger
|
||||
from litellm.integrations.langfuse.langfuse import LangFuseLogger
|
||||
|
||||
langfuse_Logger = LangFuseLogger(
|
||||
langfuse_public_key=os.getenv("LANGFUSE_PROJECT2_PUBLIC"),
|
||||
|
@ -1120,7 +1125,7 @@ generation_params = {
|
|||
)
|
||||
def test_langfuse_prompt_type(prompt):
|
||||
|
||||
from litellm.integrations.langfuse import _add_prompt_to_generation_params
|
||||
from litellm.integrations.langfuse.langfuse import _add_prompt_to_generation_params
|
||||
|
||||
clean_metadata = {
|
||||
"prompt": {
|
||||
|
@ -1227,7 +1232,7 @@ def test_langfuse_prompt_type(prompt):
|
|||
|
||||
|
||||
def test_langfuse_logging_metadata():
|
||||
from litellm.integrations.langfuse import log_requester_metadata
|
||||
from litellm.integrations.langfuse.langfuse import log_requester_metadata
|
||||
|
||||
metadata = {"key": "value", "requester_metadata": {"key": "value"}}
|
||||
|
||||
|
|
|
@ -10,9 +10,9 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanE
|
|||
import litellm
|
||||
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
|
||||
from litellm.integrations.arize_ai import ArizeConfig, ArizeLogger
|
||||
|
||||
load_dotenv()
|
||||
import logging
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
|
@ -32,3 +32,57 @@ async def test_async_otel_callback():
|
|||
)
|
||||
|
||||
await asyncio.sleep(2)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_env_vars(monkeypatch):
|
||||
monkeypatch.setenv("ARIZE_SPACE_KEY", "test_space_key")
|
||||
monkeypatch.setenv("ARIZE_API_KEY", "test_api_key")
|
||||
|
||||
|
||||
def test_get_arize_config(mock_env_vars):
|
||||
"""
|
||||
Use Arize default endpoint when no endpoints are provided
|
||||
"""
|
||||
config = ArizeLogger._get_arize_config()
|
||||
assert isinstance(config, ArizeConfig)
|
||||
assert config.space_key == "test_space_key"
|
||||
assert config.api_key == "test_api_key"
|
||||
assert config.grpc_endpoint == "https://otlp.arize.com/v1"
|
||||
assert config.http_endpoint is None
|
||||
|
||||
|
||||
def test_get_arize_config_with_endpoints(mock_env_vars, monkeypatch):
|
||||
"""
|
||||
Use provided endpoints when they are set
|
||||
"""
|
||||
monkeypatch.setenv("ARIZE_ENDPOINT", "grpc://test.endpoint")
|
||||
monkeypatch.setenv("ARIZE_HTTP_ENDPOINT", "http://test.endpoint")
|
||||
|
||||
config = ArizeLogger._get_arize_config()
|
||||
assert config.grpc_endpoint == "grpc://test.endpoint"
|
||||
assert config.http_endpoint == "http://test.endpoint"
|
||||
|
||||
|
||||
def test_get_arize_opentelemetry_config_grpc(mock_env_vars, monkeypatch):
|
||||
"""
|
||||
Use provided GRPC endpoint when it is set
|
||||
"""
|
||||
monkeypatch.setenv("ARIZE_ENDPOINT", "grpc://test.endpoint")
|
||||
|
||||
config = ArizeLogger.get_arize_opentelemetry_config()
|
||||
assert isinstance(config, OpenTelemetryConfig)
|
||||
assert config.exporter == "otlp_grpc"
|
||||
assert config.endpoint == "grpc://test.endpoint"
|
||||
|
||||
|
||||
def test_get_arize_opentelemetry_config_http(mock_env_vars, monkeypatch):
|
||||
"""
|
||||
Use provided HTTP endpoint when it is set
|
||||
"""
|
||||
monkeypatch.setenv("ARIZE_HTTP_ENDPOINT", "http://test.endpoint")
|
||||
|
||||
config = ArizeLogger.get_arize_opentelemetry_config()
|
||||
assert isinstance(config, OpenTelemetryConfig)
|
||||
assert config.exporter == "otlp_http"
|
||||
assert config.endpoint == "http://test.endpoint"
|
||||
|
|
152
tests/local_testing/test_audit_logs_proxy.py
Normal file
152
tests/local_testing/test_audit_logs_proxy.py
Normal file
|
@ -0,0 +1,152 @@
|
|||
import os
|
||||
import sys
|
||||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import Request
|
||||
from fastapi.routing import APIRoute
|
||||
|
||||
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
|
||||
# this file is to test litellm/proxy
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import pytest
|
||||
import uuid
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
from litellm.proxy.proxy_server import (
|
||||
LitellmUserRoles,
|
||||
audio_transcriptions,
|
||||
chat_completion,
|
||||
completion,
|
||||
embeddings,
|
||||
image_generation,
|
||||
model_list,
|
||||
moderations,
|
||||
new_end_user,
|
||||
user_api_key_auth,
|
||||
)
|
||||
|
||||
from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
|
||||
|
||||
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.proxy.management_helpers.audit_logs import create_audit_log_for_update
|
||||
from litellm.proxy._types import LiteLLM_AuditLogs, LitellmTableNames
|
||||
from litellm.caching.caching import DualCache
|
||||
from unittest.mock import patch, AsyncMock
|
||||
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
import json
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_audit_log_for_update_premium_user():
|
||||
"""
|
||||
Basic unit test for create_audit_log_for_update
|
||||
|
||||
Test that the audit log is created when a premium user updates a team
|
||||
"""
|
||||
with patch("litellm.proxy.proxy_server.premium_user", True), patch(
|
||||
"litellm.store_audit_logs", True
|
||||
), patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma:
|
||||
|
||||
mock_prisma.db.litellm_auditlog.create = AsyncMock()
|
||||
|
||||
request_data = LiteLLM_AuditLogs(
|
||||
id="test_id",
|
||||
updated_at=datetime.now(),
|
||||
changed_by="test_changed_by",
|
||||
action="updated",
|
||||
table_name=LitellmTableNames.TEAM_TABLE_NAME,
|
||||
object_id="test_object_id",
|
||||
updated_values=json.dumps({"key": "value"}),
|
||||
before_value=json.dumps({"old_key": "old_value"}),
|
||||
)
|
||||
|
||||
await create_audit_log_for_update(request_data)
|
||||
|
||||
mock_prisma.db.litellm_auditlog.create.assert_called_once_with(
|
||||
data={
|
||||
"id": "test_id",
|
||||
"updated_at": request_data.updated_at,
|
||||
"changed_by": request_data.changed_by,
|
||||
"action": request_data.action,
|
||||
"table_name": request_data.table_name,
|
||||
"object_id": request_data.object_id,
|
||||
"updated_values": request_data.updated_values,
|
||||
"before_value": request_data.before_value,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def prisma_client():
|
||||
from litellm.proxy.proxy_cli import append_query_params
|
||||
|
||||
### add connection pool + pool timeout args
|
||||
params = {"connection_limit": 100, "pool_timeout": 60}
|
||||
database_url = os.getenv("DATABASE_URL")
|
||||
modified_url = append_query_params(database_url, params)
|
||||
os.environ["DATABASE_URL"] = modified_url
|
||||
|
||||
# Assuming PrismaClient is a class that needs to be instantiated
|
||||
prisma_client = PrismaClient(
|
||||
database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
|
||||
)
|
||||
|
||||
return prisma_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_create_audit_log_in_db(prisma_client):
|
||||
print("prisma client=", prisma_client)
|
||||
|
||||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
setattr(litellm.proxy.proxy_server, "premium_user", True)
|
||||
setattr(litellm, "store_audit_logs", True)
|
||||
|
||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
audit_log_id = f"audit_log_id_{uuid.uuid4()}"
|
||||
|
||||
# create a audit log for /key/generate
|
||||
request_data = LiteLLM_AuditLogs(
|
||||
id=audit_log_id,
|
||||
updated_at=datetime.now(),
|
||||
changed_by="test_changed_by",
|
||||
action="updated",
|
||||
table_name=LitellmTableNames.TEAM_TABLE_NAME,
|
||||
object_id="test_object_id",
|
||||
updated_values=json.dumps({"key": "value"}),
|
||||
before_value=json.dumps({"old_key": "old_value"}),
|
||||
)
|
||||
|
||||
await create_audit_log_for_update(request_data)
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# now read the last log from the db
|
||||
last_log = await prisma_client.db.litellm_auditlog.find_first(
|
||||
where={"id": audit_log_id}
|
||||
)
|
||||
|
||||
assert last_log.id == audit_log_id
|
||||
|
||||
setattr(litellm, "store_audit_logs", False)
|
|
@ -2066,46 +2066,6 @@ async def test_cache_default_off_acompletion():
|
|||
assert response3.id == response4.id
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
@pytest.mark.skip(reason="dual caching should first prioritze local cache")
|
||||
async def test_dual_cache_uses_redis():
|
||||
"""
|
||||
|
||||
- Store diff values in redis and in memory cache
|
||||
- call get cache
|
||||
- Assert that value from redis is used
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
|
||||
current_usage = uuid.uuid4()
|
||||
|
||||
_cache_obj = DualCache(redis_cache=RedisCache(), always_read_redis=True)
|
||||
|
||||
# set cache
|
||||
await _cache_obj.async_set_cache(key=f"current_usage: {current_usage}", value=10)
|
||||
|
||||
# modify value of in memory cache
|
||||
_cache_obj.in_memory_cache.cache_dict[f"current_usage: {current_usage}"] = 1
|
||||
|
||||
# get cache
|
||||
value = await _cache_obj.async_get_cache(key=f"current_usage: {current_usage}")
|
||||
print("value from dual cache", value)
|
||||
assert value == 10
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_proxy_logging_setup():
|
||||
"""
|
||||
Assert always_read_redis is True when used by internal usage cache
|
||||
"""
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
||||
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
assert pl_obj.internal_usage_cache.dual_cache.always_read_redis is True
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="local test. Requires sentinel setup.")
|
||||
@pytest.mark.asyncio
|
||||
async def test_redis_sentinel_caching():
|
||||
|
|
|
@ -1,42 +0,0 @@
|
|||
import sys
|
||||
import os
|
||||
import io, asyncio
|
||||
|
||||
# import logging
|
||||
# logging.basicConfig(level=logging.DEBUG)
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
print("Modified sys.path:", sys.path)
|
||||
|
||||
|
||||
from litellm import completion
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
import logging
|
||||
|
||||
litellm.num_retries = 3
|
||||
|
||||
import time, random
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="beta test - this is a new feature")
|
||||
async def test_custom_api_logging():
|
||||
try:
|
||||
litellm.success_callback = ["clickhouse"]
|
||||
litellm.set_verbose = True
|
||||
verbose_logger.setLevel(logging.DEBUG)
|
||||
await litellm.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": f"This is a test"}],
|
||||
max_tokens=10,
|
||||
temperature=0.7,
|
||||
user="ishaan-2",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
finally:
|
||||
# post, close log file and verify
|
||||
# Reset stdout to the original value
|
||||
print("Passed!")
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue