Merge branch 'main' into main
|
@ -28,8 +28,9 @@ jobs:
|
||||||
pip install "pytest==7.3.1"
|
pip install "pytest==7.3.1"
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
pip install mypy
|
pip install mypy
|
||||||
pip install "google-generativeai>=0.3.2"
|
pip install "google-generativeai==0.3.2"
|
||||||
pip install "google-cloud-aiplatform>=1.38.0"
|
pip install "google-cloud-aiplatform==1.43.0"
|
||||||
|
pip install pyarrow
|
||||||
pip install "boto3>=1.28.57"
|
pip install "boto3>=1.28.57"
|
||||||
pip install "aioboto3>=12.3.0"
|
pip install "aioboto3>=12.3.0"
|
||||||
pip install langchain
|
pip install langchain
|
||||||
|
@ -48,6 +49,7 @@ jobs:
|
||||||
pip install argon2-cffi
|
pip install argon2-cffi
|
||||||
pip install "pytest-mock==3.12.0"
|
pip install "pytest-mock==3.12.0"
|
||||||
pip install python-multipart
|
pip install python-multipart
|
||||||
|
pip install google-cloud-aiplatform
|
||||||
- save_cache:
|
- save_cache:
|
||||||
paths:
|
paths:
|
||||||
- ./venv
|
- ./venv
|
||||||
|
@ -152,10 +154,11 @@ jobs:
|
||||||
pip install "pytest-mock==3.12.0"
|
pip install "pytest-mock==3.12.0"
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
pip install mypy
|
pip install mypy
|
||||||
pip install "google-generativeai>=0.3.2"
|
pip install "google-generativeai==0.3.2"
|
||||||
pip install "google-cloud-aiplatform>=1.38.0"
|
pip install "google-cloud-aiplatform==1.43.0"
|
||||||
pip install "boto3>=1.28.57"
|
pip install pyarrow
|
||||||
pip install "aioboto3>=12.3.0"
|
pip install "boto3==1.34.34"
|
||||||
|
pip install "aioboto3==12.3.0"
|
||||||
pip install langchain
|
pip install langchain
|
||||||
pip install "langfuse>=2.0.0"
|
pip install "langfuse>=2.0.0"
|
||||||
pip install numpydoc
|
pip install numpydoc
|
||||||
|
|
|
@ -7,8 +7,7 @@ baseten
|
||||||
cohere
|
cohere
|
||||||
redis
|
redis
|
||||||
anthropic
|
anthropic
|
||||||
boto3
|
|
||||||
orjson
|
orjson
|
||||||
pydantic
|
pydantic
|
||||||
google-cloud-aiplatform
|
google-cloud-aiplatform==1.43.0
|
||||||
redisvl==0.0.7 # semantic caching
|
redisvl==0.0.7 # semantic caching
|
45
.github/workflows/ghcr_deploy.yml
vendored
|
@ -43,6 +43,13 @@ jobs:
|
||||||
push: true
|
push: true
|
||||||
file: Dockerfile.database
|
file: Dockerfile.database
|
||||||
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
|
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
|
||||||
|
-
|
||||||
|
name: Build and push litellm-spend-logs image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
push: true
|
||||||
|
file: ./litellm-js/spend-logs/Dockerfile
|
||||||
|
tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
|
||||||
|
|
||||||
build-and-push-image:
|
build-and-push-image:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
@ -120,6 +127,44 @@ jobs:
|
||||||
tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-latest
|
tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-latest
|
||||||
labels: ${{ steps.meta-database.outputs.labels }}
|
labels: ${{ steps.meta-database.outputs.labels }}
|
||||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||||
|
|
||||||
|
build-and-push-image-spend-logs:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Log in to the Container registry
|
||||||
|
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata (tags, labels) for spend-logs Dockerfile
|
||||||
|
id: meta-spend-logs
|
||||||
|
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs
|
||||||
|
# Configure multi platform Docker builds
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
|
||||||
|
|
||||||
|
- name: Build and push Database Docker image
|
||||||
|
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: ./litellm-js/spend-logs/Dockerfile
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-latest
|
||||||
|
labels: ${{ steps.meta-spend-logs.outputs.labels }}
|
||||||
|
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||||
|
|
||||||
build-and-push-helm-chart:
|
build-and-push-helm-chart:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# Base image for building
|
# Base image for building
|
||||||
ARG LITELLM_BUILD_IMAGE=python:3.9
|
ARG LITELLM_BUILD_IMAGE=python:3.11.8-slim
|
||||||
|
|
||||||
# Runtime image
|
# Runtime image
|
||||||
ARG LITELLM_RUNTIME_IMAGE=python:3.9-slim
|
ARG LITELLM_RUNTIME_IMAGE=python:3.11.8-slim
|
||||||
# Builder stage
|
# Builder stage
|
||||||
FROM $LITELLM_BUILD_IMAGE as builder
|
FROM $LITELLM_BUILD_IMAGE as builder
|
||||||
|
|
||||||
|
@ -70,5 +70,5 @@ EXPOSE 4000/tcp
|
||||||
ENTRYPOINT ["litellm"]
|
ENTRYPOINT ["litellm"]
|
||||||
|
|
||||||
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
|
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
|
||||||
# CMD ["--port", "4000", "--config", "./proxy_server_config.yaml", "--run_gunicorn", "--detailed_debug"]
|
# CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]
|
||||||
CMD ["--port", "4000", "--config", "./proxy_server_config.yaml", "--run_gunicorn", "--num_workers", "4"]
|
CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# Base image for building
|
# Base image for building
|
||||||
ARG LITELLM_BUILD_IMAGE=python:3.9
|
ARG LITELLM_BUILD_IMAGE=python:3.11.8-slim
|
||||||
|
|
||||||
# Runtime image
|
# Runtime image
|
||||||
ARG LITELLM_RUNTIME_IMAGE=python:3.9-slim
|
ARG LITELLM_RUNTIME_IMAGE=python:3.11.8-slim
|
||||||
# Builder stage
|
# Builder stage
|
||||||
FROM $LITELLM_BUILD_IMAGE as builder
|
FROM $LITELLM_BUILD_IMAGE as builder
|
||||||
|
|
||||||
|
@ -72,5 +72,5 @@ EXPOSE 4000/tcp
|
||||||
ENTRYPOINT ["litellm"]
|
ENTRYPOINT ["litellm"]
|
||||||
|
|
||||||
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
|
# Append "--detailed_debug" to the end of CMD to view detailed debug logs
|
||||||
# CMD ["--port", "4000","--run_gunicorn", "--detailed_debug"]
|
# CMD ["--port", "4000", "--detailed_debug"]
|
||||||
CMD ["--port", "4000", "--run_gunicorn"]
|
CMD ["--port", "4000"]
|
||||||
|
|
|
@ -31,11 +31,11 @@ LiteLLM manages:
|
||||||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||||
- Set Budgets & Rate limits per project, api key, model [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
- Set Budgets & Rate limits per project, api key, model [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||||
|
|
||||||
**Stable Release**: v`1.30.2` 👈 Recommended stable version of proxy.
|
|
||||||
|
|
||||||
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||||
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-provider-docs)
|
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-provider-docs)
|
||||||
|
|
||||||
|
🚨 **Stable Release:** v1.34.1
|
||||||
|
|
||||||
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
||||||
|
|
||||||
# Usage ([**Docs**](https://docs.litellm.ai/docs/))
|
# Usage ([**Docs**](https://docs.litellm.ai/docs/))
|
||||||
|
|
55
deploy/kubernetes/kub.yaml
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: litellm-deployment
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: litellm
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: litellm
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: litellm-container
|
||||||
|
image: ghcr.io/berriai/litellm:main-latest
|
||||||
|
env:
|
||||||
|
- name: AZURE_API_KEY
|
||||||
|
value: "d6f****"
|
||||||
|
- name: AZURE_API_BASE
|
||||||
|
value: "https://openai
|
||||||
|
- name: LITELLM_MASTER_KEY
|
||||||
|
value: "sk-1234"
|
||||||
|
- name: DATABASE_URL
|
||||||
|
value: "postgresql://ishaan:*********""
|
||||||
|
args:
|
||||||
|
- "--config"
|
||||||
|
- "/app/proxy_config.yaml" # Update the path to mount the config file
|
||||||
|
volumeMounts: # Define volume mount for proxy_config.yaml
|
||||||
|
- name: config-volume
|
||||||
|
mountPath: /app
|
||||||
|
readOnly: true
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health/liveliness
|
||||||
|
port: 4000
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 15
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 3
|
||||||
|
timeoutSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health/readiness
|
||||||
|
port: 4000
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 15
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 3
|
||||||
|
timeoutSeconds: 10
|
||||||
|
volumes: # Define volume to mount proxy_config.yaml
|
||||||
|
- name: config-volume
|
||||||
|
configMap:
|
||||||
|
name: litellm-config
|
12
deploy/kubernetes/service.yaml
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: litellm-service
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: litellm
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 4000
|
||||||
|
targetPort: 4000
|
||||||
|
type: LoadBalancer
|
|
@ -76,7 +76,6 @@ Click on your personal dashboard link. Here's how you can find it 👇
|
||||||
|
|
||||||
Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider.
|
Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider.
|
||||||
|
|
||||||
<Image img={require('../../img/dashboard_log_row.png')} alt="Dashboard Log Row" />
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,35 @@ response = completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Additional information in metadata
|
||||||
|
You can send some additional information to Athina by using the `metadata` field in completion. This can be useful for sending metadata about the request, such as the customer_id, prompt_slug, or any other information you want to track.
|
||||||
|
|
||||||
|
```python
|
||||||
|
#openai call with additional metadata
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||||
|
],
|
||||||
|
metadata={
|
||||||
|
"environment": "staging",
|
||||||
|
"prompt_slug": "my_prompt_slug/v1"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Following are the allowed fields in metadata, their types, and their descriptions:
|
||||||
|
|
||||||
|
* `environment: Optional[str]` - Environment your app is running in (ex: production, staging, etc). This is useful for segmenting inference calls by environment.
|
||||||
|
* `prompt_slug: Optional[str]` - Identifier for the prompt used for inference. This is useful for segmenting inference calls by prompt.
|
||||||
|
* `customer_id: Optional[str]` - This is your customer ID. This is useful for segmenting inference calls by customer.
|
||||||
|
* `customer_user_id: Optional[str]` - This is the end user ID. This is useful for segmenting inference calls by the end user.
|
||||||
|
* `session_id: Optional[str]` - is the session or conversation ID. This is used for grouping different inferences into a conversation or chain. [Read more].(https://docs.athina.ai/logging/grouping_inferences)
|
||||||
|
* `external_reference_id: Optional[str]` - This is useful if you want to associate your own internal identifier with the inference logged to Athina.
|
||||||
|
* `context: Optional[Union[dict, str]]` - This is the context used as information for the prompt. For RAG applications, this is the "retrieved" data. You may log context as a string or as an object (dictionary).
|
||||||
|
* `expected_response: Optional[str]` - This is the reference response to compare against for evaluation purposes. This is useful for segmenting inference calls by expected response.
|
||||||
|
* `user_query: Optional[str]` - This is the user's query. For conversational applications, this is the user's last message.
|
||||||
|
|
||||||
## Support & Talk with Athina Team
|
## Support & Talk with Athina Team
|
||||||
|
|
||||||
- [Schedule Demo 👋](https://cal.com/shiv-athina/30min)
|
- [Schedule Demo 👋](https://cal.com/shiv-athina/30min)
|
||||||
|
|
|
@ -60,11 +60,30 @@ export ANTHROPIC_API_KEY="your-api-key"
|
||||||
|
|
||||||
### 2. Start the proxy
|
### 2. Start the proxy
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="cli" label="cli">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ litellm --model claude-3-opus-20240229
|
$ litellm --model claude-3-opus-20240229
|
||||||
|
|
||||||
# Server running on http://0.0.0.0:4000
|
# Server running on http://0.0.0.0:4000
|
||||||
```
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="config" label="config.yaml">
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: claude-3 ### RECEIVED MODEL NAME ###
|
||||||
|
litellm_params: # all params accepted by litellm.completion() - https://docs.litellm.ai/docs/completion/input
|
||||||
|
model: claude-3-opus-20240229 ### MODEL NAME sent to `litellm.completion()` ###
|
||||||
|
api_key: "os.environ/ANTHROPIC_API_KEY" # does os.getenv("AZURE_API_KEY_EU")
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
### 3. Test it
|
### 3. Test it
|
||||||
|
|
||||||
|
@ -76,7 +95,7 @@ $ litellm --model claude-3-opus-20240229
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data ' {
|
--data ' {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "claude-3",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -97,7 +116,7 @@ client = openai.OpenAI(
|
||||||
)
|
)
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
response = client.chat.completions.create(model="claude-3", messages = [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "this is a test request, write a short poem"
|
"content": "this is a test request, write a short poem"
|
||||||
|
@ -121,7 +140,7 @@ from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
chat = ChatOpenAI(
|
||||||
openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy
|
openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy
|
||||||
model = "gpt-3.5-turbo",
|
model = "claude-3",
|
||||||
temperature=0.1
|
temperature=0.1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -238,7 +257,7 @@ resp = litellm.completion(
|
||||||
print(f"\nResponse: {resp}")
|
print(f"\nResponse: {resp}")
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage - "Assistant Pre-fill"
|
## Usage - "Assistant Pre-fill"
|
||||||
|
|
||||||
You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
|
You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
|
||||||
|
|
||||||
|
@ -271,8 +290,8 @@ Human: How do you say 'Hello' in German? Return your answer as a JSON object, li
|
||||||
Assistant: {
|
Assistant: {
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage - "System" messages
|
## Usage - "System" messages
|
||||||
If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you.
|
If you're using Anthropic's Claude 2.1, `system` role messages are properly formatted for you.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import os
|
import os
|
||||||
|
|
|
@ -20,7 +20,28 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||||
os.environ["AWS_REGION_NAME"] = ""
|
os.environ["AWS_REGION_NAME"] = ""
|
||||||
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b",
|
model="sagemaker/<your-endpoint-name>",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
temperature=0.2,
|
||||||
|
max_tokens=80
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Passing Inference Component Name
|
||||||
|
|
||||||
|
If you have multiple models on an endpoint, you'll need to specify the individual model names, do this via `model_id`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||||
|
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||||
|
os.environ["AWS_REGION_NAME"] = ""
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="sagemaker/<your-endpoint-name>",
|
||||||
|
model_id="<your-model-name",
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
max_tokens=80
|
max_tokens=80
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
## Pre-requisites
|
## Pre-requisites
|
||||||
* `pip install -q google-generativeai`
|
* `pip install -q google-generativeai`
|
||||||
|
* Get API Key - https://aistudio.google.com/
|
||||||
|
|
||||||
# Gemini-Pro
|
# Gemini-Pro
|
||||||
## Sample Usage
|
## Sample Usage
|
||||||
|
@ -97,6 +98,6 @@ print(content)
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|------------------|--------------------------------------|-------------------------|
|
|------------------|--------------------------------------|-------------------------|
|
||||||
| gemini-pro | `completion('gemini/gemini-pro', messages)` | `os.environ['GEMINI_API_KEY']` |
|
| gemini-pro | `completion('gemini/gemini-pro', messages)` | `os.environ['GEMINI_API_KEY']` |
|
||||||
| gemini-1.5-pro | `completion('gemini/gemini-pro', messages)` | `os.environ['GEMINI_API_KEY']` |
|
| gemini-1.5-pro | `completion('gemini/gemini-1.5-pro', messages)` | `os.environ['GEMINI_API_KEY']` |
|
||||||
| gemini-pro-vision | `completion('gemini/gemini-pro-vision', messages)` | `os.environ['GEMINI_API_KEY']` |
|
| gemini-pro-vision | `completion('gemini/gemini-pro-vision', messages)` | `os.environ['GEMINI_API_KEY']` |
|
||||||
| gemini-1.5-pro-vision | `completion('gemini/gemini-pro-vision', messages)` | `os.environ['GEMINI_API_KEY']` |
|
| gemini-1.5-pro-vision | `completion('gemini/gemini-1.5-pro-vision', messages)` | `os.environ['GEMINI_API_KEY']` |
|
||||||
|
|
|
@ -1,61 +0,0 @@
|
||||||
import Image from '@theme/IdealImage';
|
|
||||||
|
|
||||||
# 🚨 Budget Alerting
|
|
||||||
|
|
||||||
**Alerts when a project will exceed it’s planned limit**
|
|
||||||
|
|
||||||
<Image img={require('../../img/budget_alerts.png')} />
|
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
### 1. Setup Slack Alerting on your Proxy Config.yaml
|
|
||||||
|
|
||||||
**Add Slack Webhook to your env**
|
|
||||||
Get a slack webhook url from https://api.slack.com/messaging/webhooks
|
|
||||||
|
|
||||||
|
|
||||||
Set `SLACK_WEBHOOK_URL` in your proxy env
|
|
||||||
|
|
||||||
```shell
|
|
||||||
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/<>/<>/<>"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Update proxy config.yaml with slack alerting**
|
|
||||||
|
|
||||||
Add `general_settings:alerting`
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
model_name: "azure-model"
|
|
||||||
litellm_params:
|
|
||||||
model: "azure/gpt-35-turbo"
|
|
||||||
|
|
||||||
general_settings:
|
|
||||||
alerting: ["slack"]
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Start proxy
|
|
||||||
```bash
|
|
||||||
$ litellm --config /path/to/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### 2. Create API Key on Proxy Admin UI
|
|
||||||
The Admin UI is found on `your-litellm-proxy-endpoint/ui`, example `http://localhost:4000/ui/`
|
|
||||||
|
|
||||||
- Set a key name
|
|
||||||
- Set a Soft Budget on when to get alerted
|
|
||||||
|
|
||||||
<Image img={require('../../img/create_key.png')} />
|
|
||||||
|
|
||||||
|
|
||||||
### 3. Test Slack Alerting on Admin UI
|
|
||||||
After creating a key on the Admin UI, click on "Test Slack Alert" to send a test alert to your Slack channel
|
|
||||||
<Image img={require('../../img/test_alert.png')} />
|
|
||||||
|
|
||||||
### 4. Check Slack
|
|
||||||
|
|
||||||
When the test alert works, you should expect to see this on your alerts slack channel
|
|
||||||
|
|
||||||
<Image img={require('../../img/budget_alerts.png')} />
|
|
|
@ -32,8 +32,9 @@ litellm_settings:
|
||||||
cache: True # set cache responses to True, litellm defaults to using a redis cache
|
cache: True # set cache responses to True, litellm defaults to using a redis cache
|
||||||
```
|
```
|
||||||
|
|
||||||
#### [OPTIONAL] Step 1.5: Add redis namespaces
|
#### [OPTIONAL] Step 1.5: Add redis namespaces, default ttl
|
||||||
|
|
||||||
|
## Namespace
|
||||||
If you want to create some folder for your keys, you can set a namespace, like this:
|
If you want to create some folder for your keys, you can set a namespace, like this:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -50,6 +51,16 @@ and keys will be stored like:
|
||||||
litellm_caching:<hash>
|
litellm_caching:<hash>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## TTL
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
cache: true
|
||||||
|
cache_params: # set cache params for redis
|
||||||
|
type: redis
|
||||||
|
ttl: 600 # will be cached on redis for 600s
|
||||||
|
```
|
||||||
|
|
||||||
#### Step 2: Add Redis Credentials to .env
|
#### Step 2: Add Redis Credentials to .env
|
||||||
Set either `REDIS_URL` or the `REDIS_HOST` in your os environment, to enable caching.
|
Set either `REDIS_URL` or the `REDIS_HOST` in your os environment, to enable caching.
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
# Modify / Reject Incoming Requests
|
# Modify / Reject Incoming Requests
|
||||||
|
|
||||||
- Modify data before making llm api calls on proxy
|
- Modify data before making llm api calls on proxy
|
||||||
- Reject data before making llm api calls / before returning the response
|
- Reject data before making llm api calls / before returning the response
|
||||||
|
- Enforce 'user' param for all openai endpoint calls
|
||||||
|
|
||||||
See a complete example with our [parallel request rate limiter](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py)
|
See a complete example with our [parallel request rate limiter](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py)
|
||||||
|
|
||||||
|
@ -95,7 +98,7 @@ We might need to update the function schema in the future, to support multiple e
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
See a complete example with our [Llama Guard content moderation hook](https://github.com/BerriAI/litellm/blob/main/enterprise/hooks/llama_guard.py)
|
See a complete example with our [Llama Guard content moderation hook](https://github.com/BerriAI/litellm/blob/main/enterprise/enterprise_hooks/llm_guard.py)
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
@ -173,3 +176,18 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
],
|
],
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Advanced - Enforce 'user' param
|
||||||
|
|
||||||
|
Set `enforce_user_param` to true, to require all calls to the openai endpoints to have the 'user' param.
|
||||||
|
|
||||||
|
[**See Code**](https://github.com/BerriAI/litellm/blob/4777921a31c4c70e4d87b927cb233b6a09cd8b51/litellm/proxy/auth/auth_checks.py#L72)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
enforce_user_param: True
|
||||||
|
```
|
||||||
|
|
||||||
|
**Result**
|
||||||
|
|
||||||
|
<Image img={require('../../img/end_user_enforcement.png')}/>
|
|
@ -62,7 +62,6 @@ model_list:
|
||||||
|
|
||||||
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
|
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
|
||||||
drop_params: True
|
drop_params: True
|
||||||
set_verbose: True
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
|
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
|
||||||
|
@ -558,6 +557,16 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Disable Swagger UI
|
||||||
|
|
||||||
|
To disable the Swagger docs from the base url, set
|
||||||
|
|
||||||
|
```env
|
||||||
|
NO_DOCS="True"
|
||||||
|
```
|
||||||
|
|
||||||
|
in your environment, and restart the proxy.
|
||||||
|
|
||||||
|
|
||||||
## Configure DB Pool Limits + Connection Timeouts
|
## Configure DB Pool Limits + Connection Timeouts
|
||||||
|
|
||||||
|
@ -593,6 +602,8 @@ general_settings:
|
||||||
"disable_spend_logs": "boolean", # turn off writing each transaction to the db
|
"disable_spend_logs": "boolean", # turn off writing each transaction to the db
|
||||||
"disable_reset_budget": "boolean", # turn off reset budget scheduled task
|
"disable_reset_budget": "boolean", # turn off reset budget scheduled task
|
||||||
"enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
|
"enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
|
||||||
|
"enforce_user_param": "boolean", # requires all openai endpoint requests to have a 'user' param
|
||||||
|
"allowed_routes": "list", # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
|
||||||
"key_management_system": "google_kms", # either google_kms or azure_kms
|
"key_management_system": "google_kms", # either google_kms or azure_kms
|
||||||
"master_key": "string",
|
"master_key": "string",
|
||||||
"database_url": "string",
|
"database_url": "string",
|
||||||
|
|
|
@ -103,7 +103,10 @@ RUN chmod +x entrypoint.sh
|
||||||
EXPOSE 4000/tcp
|
EXPOSE 4000/tcp
|
||||||
|
|
||||||
# Override the CMD instruction with your desired command and arguments
|
# Override the CMD instruction with your desired command and arguments
|
||||||
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug", "--run_gunicorn"]
|
# WARNING: FOR PROD DO NOT USE `--detailed_debug` it slows down response times, instead use the following CMD
|
||||||
|
# CMD ["--port", "4000", "--config", "config.yaml"]
|
||||||
|
|
||||||
|
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
@ -232,7 +235,6 @@ Your OpenAI proxy server is now running on `http://127.0.0.1:4000`.
|
||||||
| [LiteLLM container + Redis](#litellm-container--redis) | + load balance across multiple litellm containers |
|
| [LiteLLM container + Redis](#litellm-container--redis) | + load balance across multiple litellm containers |
|
||||||
| [LiteLLM Database container + PostgresDB + Redis](#litellm-database-container--postgresdb--redis) | + use Virtual Keys + Track Spend + load balance across multiple litellm containers |
|
| [LiteLLM Database container + PostgresDB + Redis](#litellm-database-container--postgresdb--redis) | + use Virtual Keys + Track Spend + load balance across multiple litellm containers |
|
||||||
|
|
||||||
|
|
||||||
## Deploy with Database
|
## Deploy with Database
|
||||||
### Docker, Kubernetes, Helm Chart
|
### Docker, Kubernetes, Helm Chart
|
||||||
|
|
||||||
|
@ -474,25 +476,6 @@ docker run --name litellm-proxy \
|
||||||
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
## Best Practices for Deploying to Production
|
|
||||||
### 1. Switch of debug logs in production
|
|
||||||
don't use [`--detailed-debug`, `--debug`](https://docs.litellm.ai/docs/proxy/debugging#detailed-debug) or `litellm.set_verbose=True`. We found using debug logs can add 5-10% latency per LLM API call
|
|
||||||
|
|
||||||
### 2. Use `run_gunicorn` and `num_workers`
|
|
||||||
|
|
||||||
Example setting `--run_gunicorn` and `--num_workers`
|
|
||||||
```shell
|
|
||||||
docker run ghcr.io/berriai/litellm-database:main-latest --run_gunicorn --num_workers 4
|
|
||||||
```
|
|
||||||
|
|
||||||
Why `Gunicorn`?
|
|
||||||
- Gunicorn takes care of running multiple instances of your web application
|
|
||||||
- Gunicorn is ideal for running litellm proxy on cluster of machines with Kubernetes
|
|
||||||
|
|
||||||
Why `num_workers`?
|
|
||||||
Setting `num_workers` to the number of CPUs available ensures optimal utilization of system resources by matching the number of worker processes to the available CPU cores.
|
|
||||||
|
|
||||||
|
|
||||||
## Advanced Deployment Settings
|
## Advanced Deployment Settings
|
||||||
|
|
||||||
### Customization of the server root path
|
### Customization of the server root path
|
||||||
|
@ -525,6 +508,57 @@ Provide an ssl certificate when starting litellm proxy server
|
||||||
## Platform-specific Guide
|
## Platform-specific Guide
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
|
<TabItem value="AWS EKS" label="AWS EKS - Kubernetes">
|
||||||
|
|
||||||
|
### Kubernetes - Deploy on EKS
|
||||||
|
|
||||||
|
Step1. Create an EKS Cluster with the following spec
|
||||||
|
|
||||||
|
```shell
|
||||||
|
eksctl create cluster --name=litellm-cluster --region=us-west-2 --node-type=t2.small
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 2. Mount litellm proxy config on kub cluster
|
||||||
|
|
||||||
|
This will mount your local file called `proxy_config.yaml` on kubernetes cluster
|
||||||
|
|
||||||
|
```shell
|
||||||
|
kubectl create configmap litellm-config --from-file=proxy_config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 3. Apply `kub.yaml` and `service.yaml`
|
||||||
|
Clone the following `kub.yaml` and `service.yaml` files and apply locally
|
||||||
|
|
||||||
|
- Use this `kub.yaml` file - [litellm kub.yaml](https://github.com/BerriAI/litellm/blob/main/deploy/kubernetes/kub.yaml)
|
||||||
|
|
||||||
|
- Use this `service.yaml` file - [litellm service.yaml](https://github.com/BerriAI/litellm/blob/main/deploy/kubernetes/service.yaml)
|
||||||
|
|
||||||
|
Apply `kub.yaml`
|
||||||
|
```
|
||||||
|
kubectl apply -f kub.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Apply `service.yaml` - creates an AWS load balancer to expose the proxy
|
||||||
|
```
|
||||||
|
kubectl apply -f service.yaml
|
||||||
|
|
||||||
|
# service/litellm-service created
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 4. Get Proxy Base URL
|
||||||
|
|
||||||
|
```shell
|
||||||
|
kubectl get services
|
||||||
|
|
||||||
|
# litellm-service LoadBalancer 10.100.6.31 a472dc7c273fd47fd******.us-west-2.elb.amazonaws.com 4000:30374/TCP 63m
|
||||||
|
```
|
||||||
|
|
||||||
|
Proxy Base URL = `a472dc7c273fd47fd******.us-west-2.elb.amazonaws.com:4000`
|
||||||
|
|
||||||
|
That's it, now you can start using LiteLLM Proxy
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="aws-stack" label="AWS Cloud Formation Stack">
|
<TabItem value="aws-stack" label="AWS Cloud Formation Stack">
|
||||||
|
|
||||||
|
|
|
@ -12,9 +12,9 @@ Features here are behind a commercial license in our `/enterprise` folder. [**Se
|
||||||
:::
|
:::
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
|
- ✅ Content Moderation with LLM Guard
|
||||||
- ✅ Content Moderation with LlamaGuard
|
- ✅ Content Moderation with LlamaGuard
|
||||||
- ✅ Content Moderation with Google Text Moderations
|
- ✅ Content Moderation with Google Text Moderations
|
||||||
- ✅ Content Moderation with LLM Guard
|
|
||||||
- ✅ Reject calls from Blocked User list
|
- ✅ Reject calls from Blocked User list
|
||||||
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
||||||
- ✅ Don't log/store specific requests (eg confidential LLM requests)
|
- ✅ Don't log/store specific requests (eg confidential LLM requests)
|
||||||
|
@ -23,6 +23,71 @@ Features:
|
||||||
|
|
||||||
|
|
||||||
## Content Moderation
|
## Content Moderation
|
||||||
|
### Content Moderation with LLM Guard
|
||||||
|
|
||||||
|
Set the LLM Guard API Base in your environment
|
||||||
|
|
||||||
|
```env
|
||||||
|
LLM_GUARD_API_BASE = "http://0.0.0.0:8192" # deployed llm guard api
|
||||||
|
```
|
||||||
|
|
||||||
|
Add `llmguard_moderations` as a callback
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["llmguard_moderations"]
|
||||||
|
```
|
||||||
|
|
||||||
|
Now you can easily test it
|
||||||
|
|
||||||
|
- Make a regular /chat/completion call
|
||||||
|
|
||||||
|
- Check your proxy logs for any statement with `LLM Guard:`
|
||||||
|
|
||||||
|
Expected results:
|
||||||
|
|
||||||
|
```
|
||||||
|
LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
|
||||||
|
```
|
||||||
|
#### Turn on/off per key
|
||||||
|
|
||||||
|
**1. Update config**
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["llmguard_moderations"]
|
||||||
|
llm_guard_mode: "key-specific"
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Create new key**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://localhost:4000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"models": ["fake-openai-endpoint"],
|
||||||
|
"permissions": {
|
||||||
|
"enable_llm_guard_check": true # 👈 KEY CHANGE
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Returns {..'key': 'my-new-key'}
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Test it!**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer my-new-key' \ # 👈 TEST KEY
|
||||||
|
--data '{"model": "fake-openai-endpoint", "messages": [
|
||||||
|
{"role": "system", "content": "Be helpful"},
|
||||||
|
{"role": "user", "content": "What do you know?"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### Content Moderation with LlamaGuard
|
### Content Moderation with LlamaGuard
|
||||||
|
|
||||||
Currently works with Sagemaker's LlamaGuard endpoint.
|
Currently works with Sagemaker's LlamaGuard endpoint.
|
||||||
|
@ -55,32 +120,7 @@ callbacks: ["llamaguard_moderations"]
|
||||||
llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt
|
llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
### Content Moderation with LLM Guard
|
|
||||||
|
|
||||||
Set the LLM Guard API Base in your environment
|
|
||||||
|
|
||||||
```env
|
|
||||||
LLM_GUARD_API_BASE = "http://0.0.0.0:4000"
|
|
||||||
```
|
|
||||||
|
|
||||||
Add `llmguard_moderations` as a callback
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
litellm_settings:
|
|
||||||
callbacks: ["llmguard_moderations"]
|
|
||||||
```
|
|
||||||
|
|
||||||
Now you can easily test it
|
|
||||||
|
|
||||||
- Make a regular /chat/completion call
|
|
||||||
|
|
||||||
- Check your proxy logs for any statement with `LLM Guard:`
|
|
||||||
|
|
||||||
Expected results:
|
|
||||||
|
|
||||||
```
|
|
||||||
LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Content Moderation with Google Text Moderation
|
### Content Moderation with Google Text Moderation
|
||||||
|
|
||||||
|
|
53
docs/my-website/docs/proxy/grafana_metrics.md
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# Grafana, Prometheus metrics [BETA]
|
||||||
|
|
||||||
|
LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
If you're using the LiteLLM CLI with `litellm --config proxy_config.yaml` then you need to `pip install prometheus_client==0.20.0`. **This is already pre-installed on the litellm Docker image**
|
||||||
|
|
||||||
|
Add this to your proxy config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["prometheus"]
|
||||||
|
```
|
||||||
|
|
||||||
|
Start the proxy
|
||||||
|
```shell
|
||||||
|
litellm --config config.yaml --debug
|
||||||
|
```
|
||||||
|
|
||||||
|
Test Request
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
View Metrics on `/metrics`, Visit `http://localhost:4000/metrics`
|
||||||
|
```shell
|
||||||
|
http://localhost:4000/metrics
|
||||||
|
|
||||||
|
# <proxy_base_url>/metrics
|
||||||
|
```
|
||||||
|
|
||||||
|
## Metrics Tracked
|
||||||
|
|
||||||
|
|
||||||
|
| Metric Name | Description |
|
||||||
|
|----------------------|--------------------------------------|
|
||||||
|
| `litellm_requests_metric` | Number of requests made, per `"user", "key", "model"` |
|
||||||
|
| `litellm_spend_metric` | Total Spend, per `"user", "key", "model"` |
|
||||||
|
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model"` |
|
249
docs/my-website/docs/proxy/prod.md
Normal file
|
@ -0,0 +1,249 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# ⚡ Best Practices for Production
|
||||||
|
|
||||||
|
Expected Performance in Production
|
||||||
|
|
||||||
|
1 LiteLLM Uvicorn Worker on Kubernetes
|
||||||
|
|
||||||
|
| Description | Value |
|
||||||
|
|--------------|-------|
|
||||||
|
| Avg latency | `50ms` |
|
||||||
|
| Median latency | `51ms` |
|
||||||
|
| `/chat/completions` Requests/second | `35` |
|
||||||
|
| `/chat/completions` Requests/minute | `2100` |
|
||||||
|
| `/chat/completions` Requests/hour | `126K` |
|
||||||
|
|
||||||
|
|
||||||
|
## 1. Switch of Debug Logging
|
||||||
|
|
||||||
|
Remove `set_verbose: True` from your config.yaml
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
set_verbose: True
|
||||||
|
```
|
||||||
|
|
||||||
|
You should only see the following level of details in logs on the proxy server
|
||||||
|
```shell
|
||||||
|
# INFO: 192.168.2.205:11774 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
|
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
|
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. On Kubernetes - Use 1 Uvicorn worker [Suggested CMD]
|
||||||
|
|
||||||
|
Use this Docker `CMD`. This will start the proxy with 1 Uvicorn Async Worker
|
||||||
|
|
||||||
|
(Ensure that you're not setting `run_gunicorn` or `num_workers` in the CMD).
|
||||||
|
```shell
|
||||||
|
CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. Batch write spend updates every 60s
|
||||||
|
|
||||||
|
The default proxy batch write is 10s. This is to make it easy to see spend when debugging locally.
|
||||||
|
|
||||||
|
In production, we recommend using a longer interval period of 60s. This reduces the number of connections used to make DB writes.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
proxy_batch_write_at: 5 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## 3. Move spend logs to separate server
|
||||||
|
|
||||||
|
Writing each spend log to the db can slow down your proxy. In testing we saw a 70% improvement in median response time, by moving writing spend logs to a separate server.
|
||||||
|
|
||||||
|
👉 [LiteLLM Spend Logs Server](https://github.com/BerriAI/litellm/tree/main/litellm-js/spend-logs)
|
||||||
|
|
||||||
|
|
||||||
|
**Spend Logs**
|
||||||
|
This is a log of the key, tokens, model, and latency for each call on the proxy.
|
||||||
|
|
||||||
|
[**Full Payload**](https://github.com/BerriAI/litellm/blob/8c9623a6bc4ad9da0a2dac64249a60ed8da719e8/litellm/proxy/utils.py#L1769)
|
||||||
|
|
||||||
|
|
||||||
|
**1. Start the spend logs server**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -p 3000:3000 \
|
||||||
|
-e DATABASE_URL="postgres://.." \
|
||||||
|
ghcr.io/berriai/litellm-spend_logs:main-latest
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:3000
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Connect to proxy**
|
||||||
|
|
||||||
|
|
||||||
|
Example litellm_config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: fake-openai-endpoint
|
||||||
|
litellm_params:
|
||||||
|
model: openai/my-fake-model
|
||||||
|
api_key: my-fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
proxy_batch_write_at: 5 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||||
|
```
|
||||||
|
|
||||||
|
Add `SPEND_LOGS_URL` as an environment variable when starting the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run \
|
||||||
|
-v $(pwd)/litellm_config.yaml:/app/config.yaml \
|
||||||
|
-e DATABASE_URL="postgresql://.." \
|
||||||
|
-e SPEND_LOGS_URL="http://host.docker.internal:3000" \ # 👈 KEY CHANGE
|
||||||
|
-p 4000:4000 \
|
||||||
|
ghcr.io/berriai/litellm:main-latest \
|
||||||
|
--config /app/config.yaml --detailed_debug
|
||||||
|
|
||||||
|
# Running on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test Proxy!**
|
||||||
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--data '{
|
||||||
|
"model": "fake-openai-endpoint",
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": "Be helpful"},
|
||||||
|
{"role": "user", "content": "What do you know?"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
In your LiteLLM Spend Logs Server, you should see
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```
|
||||||
|
Received and stored 1 logs. Total logs in memory: 1
|
||||||
|
...
|
||||||
|
Flushed 1 log to the DB.
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Machine Specification
|
||||||
|
|
||||||
|
A t2.micro should be sufficient to handle 1k logs / minute on this server.
|
||||||
|
|
||||||
|
This consumes at max 120MB, and <0.1 vCPU.
|
||||||
|
|
||||||
|
## 4. Switch off resetting budgets
|
||||||
|
|
||||||
|
Add this to your config.yaml. (Only spend per Key, User and Team will be tracked - spend per API Call will not be written to the LiteLLM Database)
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
disable_spend_logs: true
|
||||||
|
disable_reset_budget: true
|
||||||
|
```
|
||||||
|
|
||||||
|
## 5. Switch of `litellm.telemetry`
|
||||||
|
|
||||||
|
Switch of all telemetry tracking done by litellm
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
telemetry: False
|
||||||
|
```
|
||||||
|
|
||||||
|
## Machine Specifications to Deploy LiteLLM
|
||||||
|
|
||||||
|
| Service | Spec | CPUs | Memory | Architecture | Version|
|
||||||
|
| --- | --- | --- | --- | --- | --- |
|
||||||
|
| Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
|
||||||
|
| Redis Cache | - | - | - | - | 7.0+ Redis Engine|
|
||||||
|
|
||||||
|
|
||||||
|
## Reference Kubernetes Deployment YAML
|
||||||
|
|
||||||
|
Reference Kubernetes `deployment.yaml` that was load tested by us
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: litellm-deployment
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: litellm
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: litellm
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: litellm-container
|
||||||
|
image: ghcr.io/berriai/litellm:main-latest
|
||||||
|
imagePullPolicy: Always
|
||||||
|
env:
|
||||||
|
- name: AZURE_API_KEY
|
||||||
|
value: "d6******"
|
||||||
|
- name: AZURE_API_BASE
|
||||||
|
value: "https://ope******"
|
||||||
|
- name: LITELLM_MASTER_KEY
|
||||||
|
value: "sk-1234"
|
||||||
|
- name: DATABASE_URL
|
||||||
|
value: "po**********"
|
||||||
|
args:
|
||||||
|
- "--config"
|
||||||
|
- "/app/proxy_config.yaml" # Update the path to mount the config file
|
||||||
|
volumeMounts: # Define volume mount for proxy_config.yaml
|
||||||
|
- name: config-volume
|
||||||
|
mountPath: /app
|
||||||
|
readOnly: true
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health/liveliness
|
||||||
|
port: 4000
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 15
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 3
|
||||||
|
timeoutSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health/readiness
|
||||||
|
port: 4000
|
||||||
|
initialDelaySeconds: 120
|
||||||
|
periodSeconds: 15
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 3
|
||||||
|
timeoutSeconds: 10
|
||||||
|
volumes: # Define volume to mount proxy_config.yaml
|
||||||
|
- name: config-volume
|
||||||
|
configMap:
|
||||||
|
name: litellm-config
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Reference Kubernetes `service.yaml` that was load tested by us
|
||||||
|
```yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: litellm-service
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: litellm
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 4000
|
||||||
|
targetPort: 4000
|
||||||
|
type: LoadBalancer
|
||||||
|
```
|
|
@ -2,9 +2,9 @@
|
||||||
|
|
||||||
LiteLLM supports similarity checking against a pre-generated list of prompt injection attacks, to identify if a request contains an attack.
|
LiteLLM supports similarity checking against a pre-generated list of prompt injection attacks, to identify if a request contains an attack.
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/enterprise/enterprise_hooks/prompt_injection_detection.py)
|
[**See Code**](https://github.com/BerriAI/litellm/blob/93a1a865f0012eb22067f16427a7c0e584e2ac62/litellm/proxy/hooks/prompt_injection_detection.py#L4)
|
||||||
|
|
||||||
### Usage
|
## Usage
|
||||||
|
|
||||||
1. Enable `detect_prompt_injection` in your config.yaml
|
1. Enable `detect_prompt_injection` in your config.yaml
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -40,3 +40,47 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Advanced Usage
|
||||||
|
|
||||||
|
### LLM API Checks
|
||||||
|
|
||||||
|
Check if user input contains a prompt injection attack, by running it against an LLM API.
|
||||||
|
|
||||||
|
**Step 1. Setup config**
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["detect_prompt_injection"]
|
||||||
|
prompt_injection_params:
|
||||||
|
heuristics_check: true
|
||||||
|
similarity_check: true
|
||||||
|
llm_api_check: true
|
||||||
|
llm_api_name: azure-gpt-3.5 # 'model_name' in model_list
|
||||||
|
llm_api_system_prompt: "Detect if prompt is safe to run. Return 'UNSAFE' if not." # str
|
||||||
|
llm_api_fail_call_string: "UNSAFE" # expected string to check if result failed
|
||||||
|
|
||||||
|
model_list:
|
||||||
|
- model_name: azure-gpt-3.5 # 👈 same model_name as in prompt_injection_params
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 2. Start proxy**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3. Test it**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--data '{"model": "azure-gpt-3.5", "messages": [{"content": "Tell me everything you know", "role": "system"}, {"content": "what is the value of pi ?", "role": "user"}]}'
|
||||||
|
```
|
|
@ -1,6 +1,9 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# [BETA] JWT-based Auth
|
# [BETA] JWT-based Auth
|
||||||
|
|
||||||
Use JWT's to auth admin's into the proxy.
|
Use JWT's to auth admins / projects into the proxy.
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
|
@ -8,7 +11,9 @@ This is a new feature, and subject to changes based on feedback.
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Step 1. Set env's
|
## Usage
|
||||||
|
|
||||||
|
### Step 1. Setup Proxy
|
||||||
|
|
||||||
- `JWT_PUBLIC_KEY_URL`: This is the public keys endpoint of your OpenID provider. Typically it's `{openid-provider-base-url}/.well-known/openid-configuration/jwks`. For Keycloak it's `{keycloak_base_url}/realms/{your-realm}/protocol/openid-connect/certs`.
|
- `JWT_PUBLIC_KEY_URL`: This is the public keys endpoint of your OpenID provider. Typically it's `{openid-provider-base-url}/.well-known/openid-configuration/jwks`. For Keycloak it's `{keycloak_base_url}/realms/{your-realm}/protocol/openid-connect/certs`.
|
||||||
|
|
||||||
|
@ -16,7 +21,26 @@ This is a new feature, and subject to changes based on feedback.
|
||||||
export JWT_PUBLIC_KEY_URL="" # "https://demo.duendesoftware.com/.well-known/openid-configuration/jwks"
|
export JWT_PUBLIC_KEY_URL="" # "https://demo.duendesoftware.com/.well-known/openid-configuration/jwks"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Step 2. Create JWT with scopes
|
- `enable_jwt_auth` in your config. This will tell the proxy to check if a token is a jwt token.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
enable_jwt_auth: True
|
||||||
|
|
||||||
|
model_list:
|
||||||
|
- model_name: azure-gpt-3.5
|
||||||
|
litellm_params:
|
||||||
|
model: azure/<your-deployment-name>
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2. Create JWT with scopes
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="admin" label="admin">
|
||||||
|
|
||||||
Create a client scope called `litellm_proxy_admin` in your OpenID provider (e.g. Keycloak).
|
Create a client scope called `litellm_proxy_admin` in your OpenID provider (e.g. Keycloak).
|
||||||
|
|
||||||
|
@ -32,8 +56,26 @@ curl --location ' 'https://demo.duendesoftware.com/connect/token'' \
|
||||||
--data-urlencode 'grant_type=password' \
|
--data-urlencode 'grant_type=password' \
|
||||||
--data-urlencode 'scope=litellm_proxy_admin' # 👈 grant this scope
|
--data-urlencode 'scope=litellm_proxy_admin' # 👈 grant this scope
|
||||||
```
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="project" label="project">
|
||||||
|
|
||||||
## Step 3. Create a proxy key with JWT
|
Create a JWT for your project on your OpenID provider (e.g. Keycloak).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location ' 'https://demo.duendesoftware.com/connect/token'' \
|
||||||
|
--header 'Content-Type: application/x-www-form-urlencoded' \
|
||||||
|
--data-urlencode 'client_id={CLIENT_ID}' \ # 👈 project id
|
||||||
|
--data-urlencode 'client_secret={CLIENT_SECRET}' \
|
||||||
|
--data-urlencode 'grant_type=client_credential' \
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
### Step 3. Test your JWT
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="key" label="/key/generate">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl --location '{proxy_base_url}/key/generate' \
|
curl --location '{proxy_base_url}/key/generate' \
|
||||||
|
@ -41,3 +83,132 @@ curl --location '{proxy_base_url}/key/generate' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data '{}'
|
--data '{}'
|
||||||
```
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="llm_call" label="/chat/completions">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer eyJhbGciOiJSUzI1...' \
|
||||||
|
--data '{"model": "azure-gpt-3.5", "messages": [ { "role": "user", "content": "What's the weather like in Boston today?" } ]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Advanced - Set Accepted JWT Scope Names
|
||||||
|
|
||||||
|
Change the string in JWT 'scopes', that litellm evaluates to see if a user has admin access.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
enable_jwt_auth: True
|
||||||
|
litellm_jwtauth:
|
||||||
|
admin_jwt_scope: "litellm-proxy-admin"
|
||||||
|
```
|
||||||
|
### JWT Scopes
|
||||||
|
|
||||||
|
Here's what scopes on JWT-Auth tokens look like
|
||||||
|
|
||||||
|
**Can be a list**
|
||||||
|
```
|
||||||
|
scope: ["litellm-proxy-admin",...]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Can be a space-separated string**
|
||||||
|
```
|
||||||
|
scope: "litellm-proxy-admin ..."
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced - Allowed Routes
|
||||||
|
|
||||||
|
Configure which routes a JWT can access via the config.
|
||||||
|
|
||||||
|
By default:
|
||||||
|
|
||||||
|
- Admins: can access only management routes (`/team/*`, `/key/*`, `/user/*`)
|
||||||
|
- Teams: can access only openai routes (`/chat/completions`, etc.)+ info routes (`/*/info`)
|
||||||
|
|
||||||
|
[**See Code**](https://github.com/BerriAI/litellm/blob/b204f0c01c703317d812a1553363ab0cb989d5b6/litellm/proxy/_types.py#L95)
|
||||||
|
|
||||||
|
**Admin Routes**
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
enable_jwt_auth: True
|
||||||
|
litellm_jwtauth:
|
||||||
|
admin_jwt_scope: "litellm-proxy-admin"
|
||||||
|
admin_allowed_routes: ["/v1/embeddings"]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Team Routes**
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
enable_jwt_auth: True
|
||||||
|
litellm_jwtauth:
|
||||||
|
...
|
||||||
|
team_jwt_scope: "litellm-team" # 👈 Set JWT Scope string
|
||||||
|
team_allowed_routes: ["/v1/chat/completions"] # 👈 Set accepted routes
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced - Caching Public Keys
|
||||||
|
|
||||||
|
Control how long public keys are cached for (in seconds).
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
enable_jwt_auth: True
|
||||||
|
litellm_jwtauth:
|
||||||
|
admin_jwt_scope: "litellm-proxy-admin"
|
||||||
|
admin_allowed_routes: ["/v1/embeddings"]
|
||||||
|
public_key_ttl: 600 # 👈 KEY CHANGE
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced - Custom JWT Field
|
||||||
|
|
||||||
|
Set a custom field in which the team_id exists. By default, the 'client_id' field is checked.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
enable_jwt_auth: True
|
||||||
|
litellm_jwtauth:
|
||||||
|
team_id_jwt_field: "client_id" # 👈 KEY CHANGE
|
||||||
|
```
|
||||||
|
|
||||||
|
## All Params
|
||||||
|
|
||||||
|
[**See Code**](https://github.com/BerriAI/litellm/blob/b204f0c01c703317d812a1553363ab0cb989d5b6/litellm/proxy/_types.py#L95)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Advanced - Block Teams
|
||||||
|
|
||||||
|
To block all requests for a certain team id, use `/team/block`
|
||||||
|
|
||||||
|
**Block Team**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/team/block' \
|
||||||
|
--header 'Authorization: Bearer <admin-token>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"team_id": "litellm-test-client-id-new" # 👈 set team id
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Unblock Team**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/team/unblock' \
|
||||||
|
--header 'Authorization: Bearer <admin-token>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"team_id": "litellm-test-client-id-new" # 👈 set team id
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
|
@ -47,8 +47,9 @@ Your Proxy Swagger is available on the root of the Proxy: e.g.: `http://localhos
|
||||||
Set the following in your .env on the Proxy
|
Set the following in your .env on the Proxy
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
UI_USERNAME=ishaan-litellm
|
LITELLM_MASTER_KEY="sk-1234" # this is your master key for using the proxy server
|
||||||
UI_PASSWORD=langchain
|
UI_USERNAME=ishaan-litellm # username to sign in on UI
|
||||||
|
UI_PASSWORD=langchain # password to sign in on UI
|
||||||
```
|
```
|
||||||
|
|
||||||
On accessing the LiteLLM UI, you will be prompted to enter your username, password
|
On accessing the LiteLLM UI, you will be prompted to enter your username, password
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
# 🔑 Virtual Keys, Users
|
import Tabs from '@theme/Tabs';
|
||||||
Track Spend, Set budgets and create virtual keys for the proxy
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
Grant other's temporary access to your proxy, with keys that expire after a set duration.
|
|
||||||
|
|
||||||
|
# 🔑 Virtual Keys
|
||||||
|
Track Spend, and control model access via virtual keys for the proxy
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
- 🔑 [UI to Generate, Edit, Delete Keys (with SSO)](https://docs.litellm.ai/docs/proxy/ui)
|
- 🔑 [UI to Generate, Edit, Delete Keys (with SSO)](https://docs.litellm.ai/docs/proxy/ui)
|
||||||
- [Deploy LiteLLM Proxy with Key Management](https://docs.litellm.ai/docs/proxy/deploy#deploy-with-database)
|
- [Deploy LiteLLM Proxy with Key Management](https://docs.litellm.ai/docs/proxy/deploy#deploy-with-database)
|
||||||
- Dockerfile.database for LiteLLM Proxy + Key Management [here](https://github.com/BerriAI/litellm/blob/main/Dockerfile.database)
|
- [Dockerfile.database for LiteLLM Proxy + Key Management](https://github.com/BerriAI/litellm/blob/main/Dockerfile.database)
|
||||||
|
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
@ -30,7 +30,7 @@ export DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
You can then generate temporary keys by hitting the `/key/generate` endpoint.
|
You can then generate keys by hitting the `/key/generate` endpoint.
|
||||||
|
|
||||||
[**See code**](https://github.com/BerriAI/litellm/blob/7a669a36d2689c7f7890bc9c93e04ff3c2641299/litellm/proxy/proxy_server.py#L672)
|
[**See code**](https://github.com/BerriAI/litellm/blob/7a669a36d2689c7f7890bc9c93e04ff3c2641299/litellm/proxy/proxy_server.py#L672)
|
||||||
|
|
||||||
|
@ -46,8 +46,8 @@ model_list:
|
||||||
model: ollama/llama2
|
model: ollama/llama2
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234 # [OPTIONAL] if set all calls to proxy will require either this key or a valid generated token
|
master_key: sk-1234
|
||||||
database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>"
|
database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # 👈 KEY CHANGE
|
||||||
```
|
```
|
||||||
|
|
||||||
**Step 2: Start litellm**
|
**Step 2: Start litellm**
|
||||||
|
@ -56,62 +56,220 @@ general_settings:
|
||||||
litellm --config /path/to/config.yaml
|
litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
**Step 3: Generate temporary keys**
|
**Step 3: Generate keys**
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:4000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai"}}'
|
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"], "metadata": {"user": "ishaan@berri.ai"}}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Advanced - Spend Tracking
|
||||||
|
|
||||||
## /key/generate
|
Get spend per:
|
||||||
|
- key - via `/key/info` [Swagger](https://litellm-api.up.railway.app/#/key%20management/info_key_fn_key_info_get)
|
||||||
|
- user - via `/user/info` [Swagger](https://litellm-api.up.railway.app/#/user%20management/user_info_user_info_get)
|
||||||
|
- team - via `/team/info` [Swagger](https://litellm-api.up.railway.app/#/team%20management/team_info_team_info_get)
|
||||||
|
- ⏳ end-users - via `/end_user/info` - [Comment on this issue for end-user cost tracking](https://github.com/BerriAI/litellm/issues/2633)
|
||||||
|
|
||||||
### Request
|
**How is it calculated?**
|
||||||
```shell
|
|
||||||
curl 'http://0.0.0.0:4000/key/generate' \
|
The cost per model is stored [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) and calculated by the [`completion_cost`](https://github.com/BerriAI/litellm/blob/db7974f9f216ee50b53c53120d1e3fc064173b60/litellm/utils.py#L3771) function.
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
|
||||||
--header 'Content-Type: application/json' \
|
**How is it tracking?**
|
||||||
--data-raw '{
|
|
||||||
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
|
Spend is automatically tracked for the key in the "LiteLLM_VerificationTokenTable". If the key has an attached 'user_id' or 'team_id', the spend for that user is tracked in the "LiteLLM_UserTable", and team in the "LiteLLM_TeamTable".
|
||||||
"duration": "20m",
|
|
||||||
"metadata": {"user": "ishaan@berri.ai"},
|
<Tabs>
|
||||||
"team_id": "core-infra",
|
<TabItem value="key-info" label="Key Spend">
|
||||||
"max_budget": 10,
|
|
||||||
"soft_budget": 5,
|
You can get spend for a key by using the `/key/info` endpoint.
|
||||||
}'
|
|
||||||
|
```bash
|
||||||
|
curl 'http://0.0.0.0:4000/key/info?key=<user-key>' \
|
||||||
|
-X GET \
|
||||||
|
-H 'Authorization: Bearer <your-master-key>'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
This is automatically updated (in USD) when calls are made to /completions, /chat/completions, /embeddings using litellm's completion_cost() function. [**See Code**](https://github.com/BerriAI/litellm/blob/1a6ea20a0bb66491968907c2bfaabb7fe45fc064/litellm/utils.py#L1654).
|
||||||
|
|
||||||
Request Params:
|
**Sample response**
|
||||||
|
|
||||||
- `duration`: *Optional[str]* - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
|
||||||
- `key_alias`: *Optional[str]* - User defined key alias
|
|
||||||
- `team_id`: *Optional[str]* - The team id of the user
|
|
||||||
- `models`: *Optional[list]* - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
|
||||||
- `aliases`: *Optional[dict]* - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
|
|
||||||
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
|
||||||
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
|
||||||
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
|
||||||
- `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget
|
|
||||||
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
|
|
||||||
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
|
||||||
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
|
||||||
|
|
||||||
|
|
||||||
### Response
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
{
|
{
|
||||||
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
|
"key": "sk-tXL0wt5-lOOVK9sfY2UacA",
|
||||||
"expires": "2023-11-19T01:38:25.834000+00:00" # datetime object
|
"info": {
|
||||||
"key_name": "sk-...7sFA" # abbreviated key string, ONLY stored in db if `allow_user_auth: true` set - [see](./ui.md)
|
"token": "sk-tXL0wt5-lOOVK9sfY2UacA",
|
||||||
...
|
"spend": 0.0001065, # 👈 SPEND
|
||||||
|
"expires": "2023-11-24T23:19:11.131000Z",
|
||||||
|
"models": [
|
||||||
|
"gpt-3.5-turbo",
|
||||||
|
"gpt-4",
|
||||||
|
"claude-2"
|
||||||
|
],
|
||||||
|
"aliases": {
|
||||||
|
"mistral-7b": "gpt-3.5-turbo"
|
||||||
|
},
|
||||||
|
"config": {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Upgrade/Downgrade Models
|
</TabItem>
|
||||||
|
<TabItem value="user-info" label="User Spend">
|
||||||
|
|
||||||
|
**1. Create a user**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://localhost:4000/user/new' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{user_email: "krrish@berri.ai"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"expires": "2023-12-22T09:53:13.861000Z",
|
||||||
|
"user_id": "my-unique-id", # 👈 unique id
|
||||||
|
"max_budget": 0.0
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Create a key for that user**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"], "user_id": "my-unique-id"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns a key - `sk-...`.
|
||||||
|
|
||||||
|
**3. See spend for user**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl 'http://0.0.0.0:4000/user/info?user_id=my-unique-id' \
|
||||||
|
-X GET \
|
||||||
|
-H 'Authorization: Bearer <your-master-key>'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"spend": 0 # 👈 SPEND
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="team-info" label="Team Spend">
|
||||||
|
|
||||||
|
Use teams, if you want keys to be owned by multiple people (e.g. for a production app).
|
||||||
|
|
||||||
|
**1. Create a team**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://localhost:4000/team/new' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{"team_alias": "my-awesome-team"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"expires": "2023-12-22T09:53:13.861000Z",
|
||||||
|
"team_id": "my-unique-id", # 👈 unique id
|
||||||
|
"max_budget": 0.0
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Create a key for that team**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"], "team_id": "my-unique-id"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns a key - `sk-...`.
|
||||||
|
|
||||||
|
**3. See spend for team**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl 'http://0.0.0.0:4000/team/info?team_id=my-unique-id' \
|
||||||
|
-X GET \
|
||||||
|
-H 'Authorization: Bearer <your-master-key>'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"spend": 0 # 👈 SPEND
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Advanced - Model Access
|
||||||
|
|
||||||
|
### Restrict models by `team_id`
|
||||||
|
`litellm-dev` can only access `azure-gpt-3.5`
|
||||||
|
|
||||||
|
**1. Create a team via `/team/new`**
|
||||||
|
```shell
|
||||||
|
curl --location 'http://localhost:4000/team/new' \
|
||||||
|
--header 'Authorization: Bearer <your-master-key>' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{
|
||||||
|
"team_alias": "litellm-dev",
|
||||||
|
"models": ["azure-gpt-3.5"]
|
||||||
|
}'
|
||||||
|
|
||||||
|
# returns {...,"team_id": "my-unique-id"}
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Create a key for team**
|
||||||
|
```shell
|
||||||
|
curl --location 'http://localhost:4000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data-raw '{"team_id": "my-unique-id"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it**
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer sk-qo992IjKOC2CHKZGRoJIGA' \
|
||||||
|
--data '{
|
||||||
|
"model": "BEDROCK_GROUP",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "hi"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
```shell
|
||||||
|
{"error":{"message":"Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n\nTraceback (most recent call last):\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/proxy_server.py\", line 2298, in chat_completion\n _is_valid_team_configs(\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/utils.py\", line 1296, in _is_valid_team_configs\n raise Exception(\nException: Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n","type":"None","param":"None","code":500}}%
|
||||||
|
```
|
||||||
|
|
||||||
|
### Model Aliases
|
||||||
|
|
||||||
If a user is expected to use a given model (i.e. gpt3-5), and you want to:
|
If a user is expected to use a given model (i.e. gpt3-5), and you want to:
|
||||||
|
|
||||||
|
@ -189,419 +347,7 @@ curl --location 'http://localhost:4000/key/generate' \
|
||||||
"max_budget": 0,}'
|
"max_budget": 0,}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Advanced - Custom Auth
|
||||||
## /key/info
|
|
||||||
|
|
||||||
### Request
|
|
||||||
```shell
|
|
||||||
curl -X GET "http://0.0.0.0:4000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
|
|
||||||
-H "Authorization: Bearer sk-1234"
|
|
||||||
```
|
|
||||||
|
|
||||||
Request Params:
|
|
||||||
- key: str - The key you want the info for
|
|
||||||
|
|
||||||
### Response
|
|
||||||
|
|
||||||
`token` is the hashed key (The DB stores the hashed key for security)
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"key": "sk-02Wr4IAlN3NvPXvL5JVvDA",
|
|
||||||
"info": {
|
|
||||||
"token": "80321a12d03412c527f2bd9db5fabd746abead2e1d50b435a534432fbaca9ef5",
|
|
||||||
"spend": 0.0,
|
|
||||||
"expires": "2024-01-18T23:52:09.125000+00:00",
|
|
||||||
"models": ["azure-gpt-3.5", "azure-embedding-model"],
|
|
||||||
"aliases": {},
|
|
||||||
"config": {},
|
|
||||||
"user_id": "ishaan2@berri.ai",
|
|
||||||
"team_id": "None",
|
|
||||||
"max_parallel_requests": null,
|
|
||||||
"metadata": {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
## /key/update
|
|
||||||
|
|
||||||
### Request
|
|
||||||
```shell
|
|
||||||
curl 'http://0.0.0.0:4000/key/update' \
|
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data-raw '{
|
|
||||||
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA",
|
|
||||||
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
|
|
||||||
"metadata": {"user": "ishaan@berri.ai"},
|
|
||||||
"team_id": "core-infra"
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Request Params:
|
|
||||||
- key: str - The key that needs to be updated.
|
|
||||||
|
|
||||||
- models: list or null (optional) - Specify the models a token has access to. If null, then the token has access to all models on the server.
|
|
||||||
|
|
||||||
- metadata: dict or null (optional) - Pass metadata for the updated token. If null, defaults to an empty dictionary.
|
|
||||||
|
|
||||||
- team_id: str or null (optional) - Specify the team_id for the associated key.
|
|
||||||
|
|
||||||
### Response
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA",
|
|
||||||
"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
|
|
||||||
"metadata": {
|
|
||||||
"user": "ishaan@berri.ai"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## /key/delete
|
|
||||||
|
|
||||||
### Request
|
|
||||||
```shell
|
|
||||||
curl 'http://0.0.0.0:4000/key/delete' \
|
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data-raw '{
|
|
||||||
"keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Request Params:
|
|
||||||
- keys: List[str] - List of keys to delete
|
|
||||||
|
|
||||||
### Response
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"deleted_keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## /user/new
|
|
||||||
|
|
||||||
### Request
|
|
||||||
|
|
||||||
All [key/generate params supported](#keygenerate) for creating a user
|
|
||||||
```shell
|
|
||||||
curl 'http://0.0.0.0:4000/user/new' \
|
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data-raw '{
|
|
||||||
"user_id": "ishaan1",
|
|
||||||
"user_email": "ishaan@litellm.ai",
|
|
||||||
"user_role": "admin",
|
|
||||||
"team_id": "cto-team",
|
|
||||||
"max_budget": 20,
|
|
||||||
"budget_duration": "1h"
|
|
||||||
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Request Params:
|
|
||||||
|
|
||||||
- user_id: str (optional - defaults to uuid) - The unique identifier for the user.
|
|
||||||
- user_email: str (optional - defaults to "") - The email address associated with the user.
|
|
||||||
- user_role: str (optional - defaults to "app_user") - The role assigned to the user. Can be "admin", "app_owner", "app_user"
|
|
||||||
|
|
||||||
**Possible `user_role` values**
|
|
||||||
```
|
|
||||||
"admin" - Maintaining the proxy and owning the overall budget
|
|
||||||
"app_owner" - employees maintaining the apps, each owner may own more than one app
|
|
||||||
"app_user" - users who know nothing about the proxy. These users get created when you pass `user` to /chat/completions
|
|
||||||
```
|
|
||||||
- team_id: str (optional - defaults to "") - The identifier for the team to which the user belongs.
|
|
||||||
- max_budget: float (optional - defaults to `null`) - The maximum budget allocated for the user. No budget checks done if `max_budget==null`
|
|
||||||
- budget_duration: str (optional - defaults to `null`) - The duration for which the budget is valid, e.g., "1h", "1d"
|
|
||||||
|
|
||||||
### Response
|
|
||||||
A key will be generated for the new user created
|
|
||||||
|
|
||||||
```shell
|
|
||||||
{
|
|
||||||
"models": [],
|
|
||||||
"spend": 0.0,
|
|
||||||
"max_budget": null,
|
|
||||||
"user_id": "ishaan1",
|
|
||||||
"team_id": null,
|
|
||||||
"max_parallel_requests": null,
|
|
||||||
"metadata": {},
|
|
||||||
"tpm_limit": null,
|
|
||||||
"rpm_limit": null,
|
|
||||||
"budget_duration": null,
|
|
||||||
"allowed_cache_controls": [],
|
|
||||||
"key_alias": null,
|
|
||||||
"duration": null,
|
|
||||||
"aliases": {},
|
|
||||||
"config": {},
|
|
||||||
"key": "sk-JflB33ucTqc2NYvNAgiBCA",
|
|
||||||
"key_name": null,
|
|
||||||
"expires": null
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## /user/info
|
|
||||||
|
|
||||||
### Request
|
|
||||||
|
|
||||||
#### View all Users
|
|
||||||
If you're trying to view all users, we recommend using pagination with the following args
|
|
||||||
- `view_all=true`
|
|
||||||
- `page=0` Optional(int) min = 0, default=0
|
|
||||||
- `page_size=25` Optional(int) min = 1, default = 25
|
|
||||||
```shell
|
|
||||||
curl -X GET "http://0.0.0.0:4000/user/info?view_all=true&page=0&page_size=25" -H "Authorization: Bearer sk-1234"
|
|
||||||
```
|
|
||||||
|
|
||||||
#### View specific user_id
|
|
||||||
```shell
|
|
||||||
curl -X GET "http://0.0.0.0:4000/user/info?user_id=228da235-eef0-4c30-bf53-5d6ac0d278c2" -H "Authorization: Bearer sk-1234"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Response
|
|
||||||
View user spend, budget, models, keys and teams
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"user_id": "228da235-eef0-4c30-bf53-5d6ac0d278c2",
|
|
||||||
"user_info": {
|
|
||||||
"user_id": "228da235-eef0-4c30-bf53-5d6ac0d278c2",
|
|
||||||
"team_id": null,
|
|
||||||
"teams": [],
|
|
||||||
"user_role": "app_user",
|
|
||||||
"max_budget": null,
|
|
||||||
"spend": 200000.0,
|
|
||||||
"user_email": null,
|
|
||||||
"models": [],
|
|
||||||
"max_parallel_requests": null,
|
|
||||||
"tpm_limit": null,
|
|
||||||
"rpm_limit": null,
|
|
||||||
"budget_duration": null,
|
|
||||||
"budget_reset_at": null,
|
|
||||||
"allowed_cache_controls": [],
|
|
||||||
"model_spend": {
|
|
||||||
"chatgpt-v-2": 200000
|
|
||||||
},
|
|
||||||
"model_max_budget": {}
|
|
||||||
},
|
|
||||||
"keys": [
|
|
||||||
{
|
|
||||||
"token": "16c337f9df00a0e6472627e39a2ed02e67bc9a8a760c983c4e9b8cad7954f3c0",
|
|
||||||
"key_name": null,
|
|
||||||
"key_alias": null,
|
|
||||||
"spend": 200000.0,
|
|
||||||
"expires": null,
|
|
||||||
"models": [],
|
|
||||||
"aliases": {},
|
|
||||||
"config": {},
|
|
||||||
"user_id": "228da235-eef0-4c30-bf53-5d6ac0d278c2",
|
|
||||||
"team_id": null,
|
|
||||||
"permissions": {},
|
|
||||||
"max_parallel_requests": null,
|
|
||||||
"metadata": {},
|
|
||||||
"tpm_limit": null,
|
|
||||||
"rpm_limit": null,
|
|
||||||
"max_budget": null,
|
|
||||||
"budget_duration": null,
|
|
||||||
"budget_reset_at": null,
|
|
||||||
"allowed_cache_controls": [],
|
|
||||||
"model_spend": {
|
|
||||||
"chatgpt-v-2": 200000
|
|
||||||
},
|
|
||||||
"model_max_budget": {}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"teams": []
|
|
||||||
}
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
## Advanced
|
|
||||||
### Upperbound /key/generate params
|
|
||||||
Use this, if you need to control the upperbound that users can use for `max_budget`, `budget_duration` or any `key/generate` param per key.
|
|
||||||
|
|
||||||
Set `litellm_settings:upperbound_key_generate_params`:
|
|
||||||
```yaml
|
|
||||||
litellm_settings:
|
|
||||||
upperbound_key_generate_params:
|
|
||||||
max_budget: 100 # upperbound of $100, for all /key/generate requests
|
|
||||||
duration: "30d" # upperbound of 30 days for all /key/generate requests
|
|
||||||
```
|
|
||||||
|
|
||||||
** Expected Behavior **
|
|
||||||
|
|
||||||
- Send a `/key/generate` request with `max_budget=200`
|
|
||||||
- Key will be created with `max_budget=100` since 100 is the upper bound
|
|
||||||
|
|
||||||
### Default /key/generate params
|
|
||||||
Use this, if you need to control the default `max_budget` or any `key/generate` param per key.
|
|
||||||
|
|
||||||
When a `/key/generate` request does not specify `max_budget`, it will use the `max_budget` specified in `default_key_generate_params`
|
|
||||||
|
|
||||||
Set `litellm_settings:default_key_generate_params`:
|
|
||||||
```yaml
|
|
||||||
litellm_settings:
|
|
||||||
default_key_generate_params:
|
|
||||||
max_budget: 1.5000
|
|
||||||
models: ["azure-gpt-3.5"]
|
|
||||||
duration: # blank means `null`
|
|
||||||
metadata: {"setting":"default"}
|
|
||||||
team_id: "core-infra"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Restrict models by `team_id`
|
|
||||||
`litellm-dev` can only access `azure-gpt-3.5`
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
litellm_settings:
|
|
||||||
default_team_settings:
|
|
||||||
- team_id: litellm-dev
|
|
||||||
models: ["azure-gpt-3.5"]
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Create key with team_id="litellm-dev"
|
|
||||||
```shell
|
|
||||||
curl --location 'http://localhost:4000/key/generate' \
|
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data-raw '{"team_id": "litellm-dev"}'
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Use Key to call invalid model - Fails
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--header 'Authorization: Bearer sk-qo992IjKOC2CHKZGRoJIGA' \
|
|
||||||
--data '{
|
|
||||||
"model": "BEDROCK_GROUP",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "hi"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
```shell
|
|
||||||
{"error":{"message":"Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n\nTraceback (most recent call last):\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/proxy_server.py\", line 2298, in chat_completion\n _is_valid_team_configs(\n File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/utils.py\", line 1296, in _is_valid_team_configs\n raise Exception(\nException: Invalid model for team litellm-dev: BEDROCK_GROUP. Valid models for team are: ['azure-gpt-3.5']\n\n","type":"None","param":"None","code":500}}%
|
|
||||||
```
|
|
||||||
|
|
||||||
### Set Budgets - Per Key
|
|
||||||
|
|
||||||
Set `max_budget` in (USD $) param in the `key/generate` request. By default the `max_budget` is set to `null` and is not checked for keys
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl 'http://0.0.0.0:4000/key/generate' \
|
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data-raw '{
|
|
||||||
"metadata": {"user": "ishaan@berri.ai"},
|
|
||||||
"team_id": "core-infra",
|
|
||||||
"max_budget": 10,
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Expected Behaviour
|
|
||||||
- Costs Per key get auto-populated in `LiteLLM_VerificationToken` Table
|
|
||||||
- After the key crosses it's `max_budget`, requests fail
|
|
||||||
|
|
||||||
Example Request to `/chat/completions` when key has crossed budget
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--header 'Authorization: Bearer sk-ULl_IKCVFy2EZRzQB16RUA' \
|
|
||||||
--data ' {
|
|
||||||
"model": "azure-gpt-3.5",
|
|
||||||
"user": "e09b4da8-ed80-4b05-ac93-e16d9eb56fca",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "respond in 50 lines"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
Expected Response from `/chat/completions` when key has crossed budget
|
|
||||||
```shell
|
|
||||||
{
|
|
||||||
"detail":"Authentication Error, ExceededTokenBudget: Current spend for token: 7.2e-05; Max Budget for Token: 2e-07"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Set Budgets - Per User
|
|
||||||
|
|
||||||
LiteLLM exposes a `/user/new` endpoint to create budgets for users, that persist across multiple keys.
|
|
||||||
|
|
||||||
This is documented in the swagger (live on your server root endpoint - e.g. `http://0.0.0.0:4000/`). Here's an example request.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl --location 'http://localhost:4000/user/new' \
|
|
||||||
--header 'Authorization: Bearer <your-master-key>' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data-raw '{"models": ["azure-models"], "max_budget": 0, "user_id": "krrish3@berri.ai"}'
|
|
||||||
```
|
|
||||||
The request is a normal `/key/generate` request body + a `max_budget` field.
|
|
||||||
|
|
||||||
**Sample Response**
|
|
||||||
|
|
||||||
```shell
|
|
||||||
{
|
|
||||||
"key": "sk-YF2OxDbrgd1y2KgwxmEA2w",
|
|
||||||
"expires": "2023-12-22T09:53:13.861000Z",
|
|
||||||
"user_id": "krrish3@berri.ai",
|
|
||||||
"max_budget": 0.0
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Tracking Spend
|
|
||||||
|
|
||||||
You can get spend for a key by using the `/key/info` endpoint.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl 'http://0.0.0.0:4000/key/info?key=<user-key>' \
|
|
||||||
-X GET \
|
|
||||||
-H 'Authorization: Bearer <your-master-key>'
|
|
||||||
```
|
|
||||||
|
|
||||||
This is automatically updated (in USD) when calls are made to /completions, /chat/completions, /embeddings using litellm's completion_cost() function. [**See Code**](https://github.com/BerriAI/litellm/blob/1a6ea20a0bb66491968907c2bfaabb7fe45fc064/litellm/utils.py#L1654).
|
|
||||||
|
|
||||||
**Sample response**
|
|
||||||
|
|
||||||
```python
|
|
||||||
{
|
|
||||||
"key": "sk-tXL0wt5-lOOVK9sfY2UacA",
|
|
||||||
"info": {
|
|
||||||
"token": "sk-tXL0wt5-lOOVK9sfY2UacA",
|
|
||||||
"spend": 0.0001065,
|
|
||||||
"expires": "2023-11-24T23:19:11.131000Z",
|
|
||||||
"models": [
|
|
||||||
"gpt-3.5-turbo",
|
|
||||||
"gpt-4",
|
|
||||||
"claude-2"
|
|
||||||
],
|
|
||||||
"aliases": {
|
|
||||||
"mistral-7b": "gpt-3.5-turbo"
|
|
||||||
},
|
|
||||||
"config": {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Custom Auth
|
|
||||||
|
|
||||||
You can now override the default api key auth.
|
You can now override the default api key auth.
|
||||||
|
|
||||||
|
@ -738,3 +484,55 @@ litellm_settings:
|
||||||
general_settings:
|
general_settings:
|
||||||
custom_key_generate: custom_auth.custom_generate_key_fn
|
custom_key_generate: custom_auth.custom_generate_key_fn
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Upperbound /key/generate params
|
||||||
|
Use this, if you need to set default upperbounds for `max_budget`, `budget_duration` or any `key/generate` param per key.
|
||||||
|
|
||||||
|
Set `litellm_settings:upperbound_key_generate_params`:
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
upperbound_key_generate_params:
|
||||||
|
max_budget: 100 # upperbound of $100, for all /key/generate requests
|
||||||
|
duration: "30d" # upperbound of 30 days for all /key/generate requests
|
||||||
|
```
|
||||||
|
|
||||||
|
** Expected Behavior **
|
||||||
|
|
||||||
|
- Send a `/key/generate` request with `max_budget=200`
|
||||||
|
- Key will be created with `max_budget=100` since 100 is the upper bound
|
||||||
|
|
||||||
|
## Default /key/generate params
|
||||||
|
Use this, if you need to control the default `max_budget` or any `key/generate` param per key.
|
||||||
|
|
||||||
|
When a `/key/generate` request does not specify `max_budget`, it will use the `max_budget` specified in `default_key_generate_params`
|
||||||
|
|
||||||
|
Set `litellm_settings:default_key_generate_params`:
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
default_key_generate_params:
|
||||||
|
max_budget: 1.5000
|
||||||
|
models: ["azure-gpt-3.5"]
|
||||||
|
duration: # blank means `null`
|
||||||
|
metadata: {"setting":"default"}
|
||||||
|
team_id: "core-infra"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
### Keys
|
||||||
|
|
||||||
|
#### [**👉 API REFERENCE DOCS**](https://litellm-api.up.railway.app/#/key%20management/)
|
||||||
|
|
||||||
|
### Users
|
||||||
|
|
||||||
|
#### [**👉 API REFERENCE DOCS**](https://litellm-api.up.railway.app/#/user%20management/)
|
||||||
|
|
||||||
|
|
||||||
|
### Teams
|
||||||
|
|
||||||
|
#### [**👉 API REFERENCE DOCS**](https://litellm-api.up.railway.app/#/team%20management)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -442,6 +442,8 @@ If a call fails after num_retries, fall back to another model group.
|
||||||
|
|
||||||
If the error is a context window exceeded error, fall back to a larger model group (if given).
|
If the error is a context window exceeded error, fall back to a larger model group (if given).
|
||||||
|
|
||||||
|
Fallbacks are done in-order - ["gpt-3.5-turbo, "gpt-4", "gpt-4-32k"], will do 'gpt-3.5-turbo' first, then 'gpt-4', etc.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
|
|
||||||
|
@ -551,6 +553,156 @@ router = Router(model_list: Optional[list] = None,
|
||||||
cache_responses=True)
|
cache_responses=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Pre-Call Checks (Context Window)
|
||||||
|
|
||||||
|
Enable pre-call checks to filter out deployments with context window limit < messages for a call.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
**1. Enable pre-call checks**
|
||||||
|
```python
|
||||||
|
from litellm import Router
|
||||||
|
# ...
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. (Azure-only) Set base model**
|
||||||
|
|
||||||
|
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it!**
|
||||||
|
|
||||||
|
```python
|
||||||
|
"""
|
||||||
|
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||||
|
- Send a 5k prompt
|
||||||
|
- Assert it works
|
||||||
|
"""
|
||||||
|
from litellm import Router
|
||||||
|
import os
|
||||||
|
|
||||||
|
try:
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"base_model": "azure/gpt-35-turbo",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||||
|
|
||||||
|
text = "What is the meaning of 42?" * 5000
|
||||||
|
|
||||||
|
response = router.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": text},
|
||||||
|
{"role": "user", "content": "Who was Alexander?"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="Proxy">
|
||||||
|
|
||||||
|
**1. Setup config**
|
||||||
|
|
||||||
|
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with azure/.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
router_settings:
|
||||||
|
enable_pre_call_checks: true # 1. Enable pre-call checks
|
||||||
|
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
model_info:
|
||||||
|
base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
|
||||||
|
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo-1106
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Start proxy**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it!**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
text = "What is the meaning of 42?" * 5000
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": text},
|
||||||
|
{"role": "user", "content": "Who was Alexander?"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
## Caching across model groups
|
## Caching across model groups
|
||||||
|
|
||||||
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.
|
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.
|
||||||
|
|
|
@ -95,5 +95,4 @@ completion_with_split_tests(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### A/B Testing Dashboard after running code - https://admin.litellm.ai/
|
|
||||||
<Image img={require('../../img/ab_test_logs.png')} />
|
|
||||||
|
|
95
docs/my-website/docs/tutorials/instructor.md
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
# Instructor - Function Calling
|
||||||
|
|
||||||
|
Use LiteLLM Router with [jxnl's instructor library](https://github.com/jxnl/instructor) for function calling in prod.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
from litellm import Router
|
||||||
|
import instructor
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
litellm.set_verbose = True # 👈 print DEBUG LOGS
|
||||||
|
|
||||||
|
client = instructor.patch(
|
||||||
|
Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", openai model name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call - e.g.: https://github.com/BerriAI/litellm/blob/62a591f90c99120e1a51a8445f5c3752586868ea/litellm/router.py#L111
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UserDetail(BaseModel):
|
||||||
|
name: str
|
||||||
|
age: int
|
||||||
|
|
||||||
|
|
||||||
|
user = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
response_model=UserDetail,
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Extract Jason is 25 years old"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(user, UserDetail)
|
||||||
|
assert user.name == "Jason"
|
||||||
|
assert user.age == 25
|
||||||
|
|
||||||
|
print(f"user: {user}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Async Calls
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
from litellm import Router
|
||||||
|
import instructor, asyncio
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
aclient = instructor.apatch(
|
||||||
|
Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
default_litellm_params={"acompletion": True}, # 👈 IMPORTANT - tells litellm to route to async completion function.
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UserExtract(BaseModel):
|
||||||
|
name: str
|
||||||
|
age: int
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
model = await aclient.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
response_model=UserExtract,
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Extract jason is 25 years old"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
print(f"model: {model}")
|
||||||
|
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
|
```
|
Before Width: | Height: | Size: 263 KiB |
Before Width: | Height: | Size: 449 KiB |
Before Width: | Height: | Size: 66 KiB |
Before Width: | Height: | Size: 73 KiB |
Before Width: | Height: | Size: 89 KiB |
Before Width: | Height: | Size: 140 KiB |
Before Width: | Height: | Size: 386 KiB |
Before Width: | Height: | Size: 20 KiB |
BIN
docs/my-website/img/end_user_enforcement.png
Normal file
After Width: | Height: | Size: 180 KiB |
Before Width: | Height: | Size: 429 KiB |
Before Width: | Height: | Size: 505 KiB |
Before Width: | Height: | Size: 468 KiB |
Before Width: | Height: | Size: 123 KiB |
Before Width: | Height: | Size: 203 KiB |
Before Width: | Height: | Size: 81 KiB |
Before Width: | Height: | Size: 82 KiB |
38
docs/my-website/package-lock.json
generated
|
@ -5561,12 +5561,12 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/body-parser": {
|
"node_modules/body-parser": {
|
||||||
"version": "1.20.1",
|
"version": "1.20.2",
|
||||||
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz",
|
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz",
|
||||||
"integrity": "sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw==",
|
"integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"bytes": "3.1.2",
|
"bytes": "3.1.2",
|
||||||
"content-type": "~1.0.4",
|
"content-type": "~1.0.5",
|
||||||
"debug": "2.6.9",
|
"debug": "2.6.9",
|
||||||
"depd": "2.0.0",
|
"depd": "2.0.0",
|
||||||
"destroy": "1.2.0",
|
"destroy": "1.2.0",
|
||||||
|
@ -5574,7 +5574,7 @@
|
||||||
"iconv-lite": "0.4.24",
|
"iconv-lite": "0.4.24",
|
||||||
"on-finished": "2.4.1",
|
"on-finished": "2.4.1",
|
||||||
"qs": "6.11.0",
|
"qs": "6.11.0",
|
||||||
"raw-body": "2.5.1",
|
"raw-body": "2.5.2",
|
||||||
"type-is": "~1.6.18",
|
"type-is": "~1.6.18",
|
||||||
"unpipe": "1.0.0"
|
"unpipe": "1.0.0"
|
||||||
},
|
},
|
||||||
|
@ -6707,9 +6707,9 @@
|
||||||
"integrity": "sha512-ASFBup0Mz1uyiIjANan1jzLQami9z1PoYSZCiiYW2FczPbenXc45FZdBZLzOT+r6+iciuEModtmCti+hjaAk0A=="
|
"integrity": "sha512-ASFBup0Mz1uyiIjANan1jzLQami9z1PoYSZCiiYW2FczPbenXc45FZdBZLzOT+r6+iciuEModtmCti+hjaAk0A=="
|
||||||
},
|
},
|
||||||
"node_modules/cookie": {
|
"node_modules/cookie": {
|
||||||
"version": "0.5.0",
|
"version": "0.6.0",
|
||||||
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz",
|
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
|
||||||
"integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==",
|
"integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">= 0.6"
|
"node": ">= 0.6"
|
||||||
}
|
}
|
||||||
|
@ -10411,16 +10411,16 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/express": {
|
"node_modules/express": {
|
||||||
"version": "4.18.2",
|
"version": "4.19.2",
|
||||||
"resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz",
|
"resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz",
|
||||||
"integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==",
|
"integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"accepts": "~1.3.8",
|
"accepts": "~1.3.8",
|
||||||
"array-flatten": "1.1.1",
|
"array-flatten": "1.1.1",
|
||||||
"body-parser": "1.20.1",
|
"body-parser": "1.20.2",
|
||||||
"content-disposition": "0.5.4",
|
"content-disposition": "0.5.4",
|
||||||
"content-type": "~1.0.4",
|
"content-type": "~1.0.4",
|
||||||
"cookie": "0.5.0",
|
"cookie": "0.6.0",
|
||||||
"cookie-signature": "1.0.6",
|
"cookie-signature": "1.0.6",
|
||||||
"debug": "2.6.9",
|
"debug": "2.6.9",
|
||||||
"depd": "2.0.0",
|
"depd": "2.0.0",
|
||||||
|
@ -17016,9 +17016,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/raw-body": {
|
"node_modules/raw-body": {
|
||||||
"version": "2.5.1",
|
"version": "2.5.2",
|
||||||
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz",
|
||||||
"integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==",
|
"integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"bytes": "3.1.2",
|
"bytes": "3.1.2",
|
||||||
"http-errors": "2.0.0",
|
"http-errors": "2.0.0",
|
||||||
|
@ -21554,9 +21554,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/webpack-dev-middleware": {
|
"node_modules/webpack-dev-middleware": {
|
||||||
"version": "5.3.3",
|
"version": "5.3.4",
|
||||||
"resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz",
|
"resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.4.tgz",
|
||||||
"integrity": "sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA==",
|
"integrity": "sha512-BVdTqhhs+0IfoeAf7EoH5WE+exCmqGerHfDM0IL096Px60Tq2Mn9MAbnaGUe6HiMa41KMCYF19gyzZmBcq/o4Q==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"colorette": "^2.0.10",
|
"colorette": "^2.0.10",
|
||||||
"memfs": "^3.4.3",
|
"memfs": "^3.4.3",
|
||||||
|
|
|
@ -30,6 +30,7 @@ const sidebars = {
|
||||||
items: [
|
items: [
|
||||||
"proxy/quick_start",
|
"proxy/quick_start",
|
||||||
"proxy/deploy",
|
"proxy/deploy",
|
||||||
|
"proxy/prod",
|
||||||
"proxy/configs",
|
"proxy/configs",
|
||||||
{
|
{
|
||||||
type: "link",
|
type: "link",
|
||||||
|
@ -42,7 +43,6 @@ const sidebars = {
|
||||||
"proxy/users",
|
"proxy/users",
|
||||||
"proxy/team_based_routing",
|
"proxy/team_based_routing",
|
||||||
"proxy/ui",
|
"proxy/ui",
|
||||||
"proxy/budget_alerts",
|
|
||||||
"proxy/cost_tracking",
|
"proxy/cost_tracking",
|
||||||
"proxy/token_auth",
|
"proxy/token_auth",
|
||||||
{
|
{
|
||||||
|
@ -61,6 +61,7 @@ const sidebars = {
|
||||||
label: "Logging, Alerting",
|
label: "Logging, Alerting",
|
||||||
items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
|
items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
|
||||||
},
|
},
|
||||||
|
"proxy/grafana_metrics",
|
||||||
"proxy/call_hooks",
|
"proxy/call_hooks",
|
||||||
"proxy/rules",
|
"proxy/rules",
|
||||||
"proxy/cli",
|
"proxy/cli",
|
||||||
|
@ -180,8 +181,9 @@ const sidebars = {
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Tutorials",
|
label: "Tutorials",
|
||||||
items: [
|
items: [
|
||||||
"tutorials/azure_openai",
|
'tutorials/azure_openai',
|
||||||
"tutorials/oobabooga",
|
'tutorials/instructor',
|
||||||
|
'tutorials/oobabooga',
|
||||||
"tutorials/gradio_integration",
|
"tutorials/gradio_integration",
|
||||||
"tutorials/huggingface_codellama",
|
"tutorials/huggingface_codellama",
|
||||||
"tutorials/huggingface_tutorial",
|
"tutorials/huggingface_tutorial",
|
||||||
|
|
|
@ -3138,13 +3138,13 @@ bluebird@~3.4.1:
|
||||||
resolved "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz"
|
resolved "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz"
|
||||||
integrity sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==
|
integrity sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==
|
||||||
|
|
||||||
body-parser@1.20.1:
|
body-parser@1.20.2:
|
||||||
version "1.20.1"
|
version "1.20.2"
|
||||||
resolved "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz"
|
resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.2.tgz#6feb0e21c4724d06de7ff38da36dad4f57a747fd"
|
||||||
integrity sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw==
|
integrity sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==
|
||||||
dependencies:
|
dependencies:
|
||||||
bytes "3.1.2"
|
bytes "3.1.2"
|
||||||
content-type "~1.0.4"
|
content-type "~1.0.5"
|
||||||
debug "2.6.9"
|
debug "2.6.9"
|
||||||
depd "2.0.0"
|
depd "2.0.0"
|
||||||
destroy "1.2.0"
|
destroy "1.2.0"
|
||||||
|
@ -3152,7 +3152,7 @@ body-parser@1.20.1:
|
||||||
iconv-lite "0.4.24"
|
iconv-lite "0.4.24"
|
||||||
on-finished "2.4.1"
|
on-finished "2.4.1"
|
||||||
qs "6.11.0"
|
qs "6.11.0"
|
||||||
raw-body "2.5.1"
|
raw-body "2.5.2"
|
||||||
type-is "~1.6.18"
|
type-is "~1.6.18"
|
||||||
unpipe "1.0.0"
|
unpipe "1.0.0"
|
||||||
|
|
||||||
|
@ -3921,7 +3921,7 @@ content-disposition@0.5.4:
|
||||||
dependencies:
|
dependencies:
|
||||||
safe-buffer "5.2.1"
|
safe-buffer "5.2.1"
|
||||||
|
|
||||||
content-type@~1.0.4:
|
content-type@~1.0.4, content-type@~1.0.5:
|
||||||
version "1.0.5"
|
version "1.0.5"
|
||||||
resolved "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz"
|
resolved "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz"
|
||||||
integrity sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==
|
integrity sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==
|
||||||
|
@ -3941,10 +3941,10 @@ cookie-signature@1.0.6:
|
||||||
resolved "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz"
|
resolved "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz"
|
||||||
integrity sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==
|
integrity sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==
|
||||||
|
|
||||||
cookie@0.5.0:
|
cookie@0.6.0:
|
||||||
version "0.5.0"
|
version "0.6.0"
|
||||||
resolved "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz"
|
resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.6.0.tgz#2798b04b071b0ecbff0dbb62a505a8efa4e19051"
|
||||||
integrity sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==
|
integrity sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==
|
||||||
|
|
||||||
copy-descriptor@^0.1.0:
|
copy-descriptor@^0.1.0:
|
||||||
version "0.1.1"
|
version "0.1.1"
|
||||||
|
@ -5325,16 +5325,16 @@ expand-template@^2.0.3:
|
||||||
integrity sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==
|
integrity sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==
|
||||||
|
|
||||||
express@^4.17.1, express@^4.17.3:
|
express@^4.17.1, express@^4.17.3:
|
||||||
version "4.18.2"
|
version "4.19.2"
|
||||||
resolved "https://registry.npmjs.org/express/-/express-4.18.2.tgz"
|
resolved "https://registry.yarnpkg.com/express/-/express-4.19.2.tgz#e25437827a3aa7f2a827bc8171bbbb664a356465"
|
||||||
integrity sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==
|
integrity sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==
|
||||||
dependencies:
|
dependencies:
|
||||||
accepts "~1.3.8"
|
accepts "~1.3.8"
|
||||||
array-flatten "1.1.1"
|
array-flatten "1.1.1"
|
||||||
body-parser "1.20.1"
|
body-parser "1.20.2"
|
||||||
content-disposition "0.5.4"
|
content-disposition "0.5.4"
|
||||||
content-type "~1.0.4"
|
content-type "~1.0.4"
|
||||||
cookie "0.5.0"
|
cookie "0.6.0"
|
||||||
cookie-signature "1.0.6"
|
cookie-signature "1.0.6"
|
||||||
debug "2.6.9"
|
debug "2.6.9"
|
||||||
depd "2.0.0"
|
depd "2.0.0"
|
||||||
|
@ -9924,10 +9924,10 @@ range-parser@^1.2.1, range-parser@~1.2.1:
|
||||||
resolved "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz"
|
resolved "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz"
|
||||||
integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==
|
integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==
|
||||||
|
|
||||||
raw-body@2.5.1:
|
raw-body@2.5.2:
|
||||||
version "2.5.1"
|
version "2.5.2"
|
||||||
resolved "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz"
|
resolved "https://registry.yarnpkg.com/raw-body/-/raw-body-2.5.2.tgz#99febd83b90e08975087e8f1f9419a149366b68a"
|
||||||
integrity sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==
|
integrity sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==
|
||||||
dependencies:
|
dependencies:
|
||||||
bytes "3.1.2"
|
bytes "3.1.2"
|
||||||
http-errors "2.0.0"
|
http-errors "2.0.0"
|
||||||
|
@ -12406,9 +12406,9 @@ webpack-bundle-analyzer@^4.5.0:
|
||||||
ws "^7.3.1"
|
ws "^7.3.1"
|
||||||
|
|
||||||
webpack-dev-middleware@^5.3.1:
|
webpack-dev-middleware@^5.3.1:
|
||||||
version "5.3.3"
|
version "5.3.4"
|
||||||
resolved "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz"
|
resolved "https://registry.yarnpkg.com/webpack-dev-middleware/-/webpack-dev-middleware-5.3.4.tgz#eb7b39281cbce10e104eb2b8bf2b63fce49a3517"
|
||||||
integrity sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA==
|
integrity sha512-BVdTqhhs+0IfoeAf7EoH5WE+exCmqGerHfDM0IL096Px60Tq2Mn9MAbnaGUe6HiMa41KMCYF19gyzZmBcq/o4Q==
|
||||||
dependencies:
|
dependencies:
|
||||||
colorette "^2.0.10"
|
colorette "^2.0.10"
|
||||||
memfs "^3.4.3"
|
memfs "^3.4.3"
|
||||||
|
|
|
@ -96,6 +96,8 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger):
|
||||||
async def async_moderation_hook(
|
async def async_moderation_hook(
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
call_type: Literal["completion", "embeddings", "image_generation"],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- Calls Google's Text Moderation API
|
- Calls Google's Text Moderation API
|
||||||
|
|
|
@ -99,6 +99,8 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
|
||||||
async def async_moderation_hook(
|
async def async_moderation_hook(
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
call_type: Literal["completion", "embeddings", "image_generation"],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- Calls the Llama Guard Endpoint
|
- Calls the Llama Guard Endpoint
|
||||||
|
|
|
@ -22,6 +22,7 @@ from litellm.utils import (
|
||||||
)
|
)
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import aiohttp, asyncio
|
import aiohttp, asyncio
|
||||||
|
from litellm.utils import get_formatted_prompt
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
@ -29,9 +30,12 @@ litellm.set_verbose = True
|
||||||
class _ENTERPRISE_LLMGuard(CustomLogger):
|
class _ENTERPRISE_LLMGuard(CustomLogger):
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
def __init__(
|
def __init__(
|
||||||
self, mock_testing: bool = False, mock_redacted_text: Optional[dict] = None
|
self,
|
||||||
|
mock_testing: bool = False,
|
||||||
|
mock_redacted_text: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
self.mock_redacted_text = mock_redacted_text
|
self.mock_redacted_text = mock_redacted_text
|
||||||
|
self.llm_guard_mode = litellm.llm_guard_mode
|
||||||
if mock_testing == True: # for testing purposes only
|
if mock_testing == True: # for testing purposes only
|
||||||
return
|
return
|
||||||
self.llm_guard_api_base = litellm.get_secret("LLM_GUARD_API_BASE", None)
|
self.llm_guard_api_base = litellm.get_secret("LLM_GUARD_API_BASE", None)
|
||||||
|
@ -59,7 +63,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
|
||||||
else:
|
else:
|
||||||
# Make the first request to /analyze
|
# Make the first request to /analyze
|
||||||
analyze_url = f"{self.llm_guard_api_base}analyze/prompt"
|
analyze_url = f"{self.llm_guard_api_base}analyze/prompt"
|
||||||
verbose_proxy_logger.debug(f"Making request to: {analyze_url}")
|
verbose_proxy_logger.debug("Making request to: %s", analyze_url)
|
||||||
analyze_payload = {"prompt": text}
|
analyze_payload = {"prompt": text}
|
||||||
redacted_text = None
|
redacted_text = None
|
||||||
async with session.post(
|
async with session.post(
|
||||||
|
@ -72,7 +76,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
|
||||||
if redacted_text is not None:
|
if redacted_text is not None:
|
||||||
if (
|
if (
|
||||||
redacted_text.get("is_valid", None) is not None
|
redacted_text.get("is_valid", None) is not None
|
||||||
and redacted_text["is_valid"] == "True"
|
and redacted_text["is_valid"] != True
|
||||||
):
|
):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
|
@ -91,9 +95,26 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
def should_proceed(self, user_api_key_dict: UserAPIKeyAuth) -> bool:
|
||||||
|
if self.llm_guard_mode == "key-specific":
|
||||||
|
# check if llm guard enabled for specific keys only
|
||||||
|
self.print_verbose(
|
||||||
|
f"user_api_key_dict.permissions: {user_api_key_dict.permissions}"
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
user_api_key_dict.permissions.get("enable_llm_guard_check", False)
|
||||||
|
== True
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
elif self.llm_guard_mode == "all":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
async def async_moderation_hook(
|
async def async_moderation_hook(
|
||||||
self,
|
self,
|
||||||
data: dict,
|
data: dict,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
call_type: Literal["completion", "embeddings", "image_generation"],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- Calls the LLM Guard Endpoint
|
- Calls the LLM Guard Endpoint
|
||||||
|
@ -101,8 +122,33 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
|
||||||
- Use the sanitized prompt returned
|
- Use the sanitized prompt returned
|
||||||
- LLM Guard can handle things like PII Masking, etc.
|
- LLM Guard can handle things like PII Masking, etc.
|
||||||
"""
|
"""
|
||||||
|
self.print_verbose(
|
||||||
|
f"Inside LLM Guard Pre-Call Hook - llm_guard_mode={self.llm_guard_mode}"
|
||||||
|
)
|
||||||
|
|
||||||
|
_proceed = self.should_proceed(user_api_key_dict=user_api_key_dict)
|
||||||
|
if _proceed == False:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.print_verbose("Makes LLM Guard Check")
|
||||||
|
try:
|
||||||
|
assert call_type in [
|
||||||
|
"completion",
|
||||||
|
"embeddings",
|
||||||
|
"image_generation",
|
||||||
|
"moderation",
|
||||||
|
"audio_transcription",
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
self.print_verbose(
|
||||||
|
f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
|
||||||
|
)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
formatted_prompt = get_formatted_prompt(data=data, call_type=call_type) # type: ignore
|
||||||
|
self.print_verbose(f"LLM Guard, formatted_prompt: {formatted_prompt}")
|
||||||
|
return await self.moderation_check(text=formatted_prompt)
|
||||||
|
|
||||||
async def async_post_call_streaming_hook(
|
async def async_post_call_streaming_hook(
|
||||||
self, user_api_key_dict: UserAPIKeyAuth, response: str
|
self, user_api_key_dict: UserAPIKeyAuth, response: str
|
||||||
):
|
):
|
||||||
|
|
8
litellm-js/proxy/README.md
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
```
|
||||||
|
npm install
|
||||||
|
npm run dev
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
npm run deploy
|
||||||
|
```
|
14
litellm-js/proxy/package.json
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
{
|
||||||
|
"scripts": {
|
||||||
|
"dev": "wrangler dev src/index.ts",
|
||||||
|
"deploy": "wrangler deploy --minify src/index.ts"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"hono": "^4.1.4",
|
||||||
|
"openai": "^4.29.2"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@cloudflare/workers-types": "^4.20240208.0",
|
||||||
|
"wrangler": "^3.32.0"
|
||||||
|
}
|
||||||
|
}
|
59
litellm-js/proxy/src/index.ts
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
import { Hono } from 'hono'
|
||||||
|
import { Context } from 'hono';
|
||||||
|
import { bearerAuth } from 'hono/bearer-auth'
|
||||||
|
import OpenAI from "openai";
|
||||||
|
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: "sk-1234",
|
||||||
|
baseURL: "https://openai-endpoint.ishaanjaffer0324.workers.dev"
|
||||||
|
});
|
||||||
|
|
||||||
|
async function call_proxy() {
|
||||||
|
const completion = await openai.chat.completions.create({
|
||||||
|
messages: [{ role: "system", content: "You are a helpful assistant." }],
|
||||||
|
model: "gpt-3.5-turbo",
|
||||||
|
});
|
||||||
|
|
||||||
|
return completion
|
||||||
|
}
|
||||||
|
|
||||||
|
const app = new Hono()
|
||||||
|
|
||||||
|
// Middleware for API Key Authentication
|
||||||
|
const apiKeyAuth = async (c: Context, next: Function) => {
|
||||||
|
const apiKey = c.req.header('Authorization');
|
||||||
|
if (!apiKey || apiKey !== 'Bearer sk-1234') {
|
||||||
|
return c.text('Unauthorized', 401);
|
||||||
|
}
|
||||||
|
await next();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
app.use('/*', apiKeyAuth)
|
||||||
|
|
||||||
|
|
||||||
|
app.get('/', (c) => {
|
||||||
|
return c.text('Hello Hono!')
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Handler for chat completions
|
||||||
|
const chatCompletionHandler = async (c: Context) => {
|
||||||
|
// Assuming your logic for handling chat completion goes here
|
||||||
|
// For demonstration, just returning a simple JSON response
|
||||||
|
const response = await call_proxy()
|
||||||
|
return c.json(response);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Register the above handler for different POST routes with the apiKeyAuth middleware
|
||||||
|
app.post('/v1/chat/completions', chatCompletionHandler);
|
||||||
|
app.post('/chat/completions', chatCompletionHandler);
|
||||||
|
|
||||||
|
// Example showing how you might handle dynamic segments within the URL
|
||||||
|
// Here, using ':model*' to capture the rest of the path as a parameter 'model'
|
||||||
|
app.post('/openai/deployments/:model*/chat/completions', chatCompletionHandler);
|
||||||
|
|
||||||
|
|
||||||
|
export default app
|
16
litellm-js/proxy/tsconfig.json
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ESNext",
|
||||||
|
"module": "ESNext",
|
||||||
|
"moduleResolution": "Bundler",
|
||||||
|
"strict": true,
|
||||||
|
"lib": [
|
||||||
|
"ESNext"
|
||||||
|
],
|
||||||
|
"types": [
|
||||||
|
"@cloudflare/workers-types"
|
||||||
|
],
|
||||||
|
"jsx": "react-jsx",
|
||||||
|
"jsxImportSource": "hono/jsx"
|
||||||
|
},
|
||||||
|
}
|
18
litellm-js/proxy/wrangler.toml
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
name = "my-app"
|
||||||
|
compatibility_date = "2023-12-01"
|
||||||
|
|
||||||
|
# [vars]
|
||||||
|
# MY_VAR = "my-variable"
|
||||||
|
|
||||||
|
# [[kv_namespaces]]
|
||||||
|
# binding = "MY_KV_NAMESPACE"
|
||||||
|
# id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||||
|
|
||||||
|
# [[r2_buckets]]
|
||||||
|
# binding = "MY_BUCKET"
|
||||||
|
# bucket_name = "my-bucket"
|
||||||
|
|
||||||
|
# [[d1_databases]]
|
||||||
|
# binding = "DB"
|
||||||
|
# database_name = "my-database"
|
||||||
|
# database_id = ""
|
26
litellm-js/spend-logs/Dockerfile
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# Use the specific Node.js v20.11.0 image
|
||||||
|
FROM node:20.11.0
|
||||||
|
|
||||||
|
# Set the working directory inside the container
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package.json and package-lock.json to the working directory
|
||||||
|
COPY ./litellm-js/spend-logs/package*.json ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN npm install
|
||||||
|
|
||||||
|
# Install Prisma globally
|
||||||
|
RUN npm install -g prisma
|
||||||
|
|
||||||
|
# Copy the rest of the application code
|
||||||
|
COPY ./litellm-js/spend-logs .
|
||||||
|
|
||||||
|
# Generate Prisma client
|
||||||
|
RUN npx prisma generate
|
||||||
|
|
||||||
|
# Expose the port that the Node.js server will run on
|
||||||
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Command to run the Node.js app with npm run dev
|
||||||
|
CMD ["npm", "run", "dev"]
|
8
litellm-js/spend-logs/README.md
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
```
|
||||||
|
npm install
|
||||||
|
npm run dev
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
open http://localhost:3000
|
||||||
|
```
|
508
litellm-js/spend-logs/package-lock.json
generated
Normal file
|
@ -0,0 +1,508 @@
|
||||||
|
{
|
||||||
|
"name": "spend-logs",
|
||||||
|
"lockfileVersion": 3,
|
||||||
|
"requires": true,
|
||||||
|
"packages": {
|
||||||
|
"": {
|
||||||
|
"dependencies": {
|
||||||
|
"@hono/node-server": "^1.9.0",
|
||||||
|
"hono": "^4.1.5"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^20.11.17",
|
||||||
|
"tsx": "^4.7.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/aix-ppc64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==",
|
||||||
|
"cpu": [
|
||||||
|
"ppc64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"aix"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/android-arm": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==",
|
||||||
|
"cpu": [
|
||||||
|
"arm"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"android"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/android-arm64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"android"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/android-x64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"android"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/darwin-arm64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"darwin"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/darwin-x64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"darwin"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/freebsd-arm64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"freebsd"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/freebsd-x64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"freebsd"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-arm": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==",
|
||||||
|
"cpu": [
|
||||||
|
"arm"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-arm64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-ia32": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==",
|
||||||
|
"cpu": [
|
||||||
|
"ia32"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-loong64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==",
|
||||||
|
"cpu": [
|
||||||
|
"loong64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-mips64el": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==",
|
||||||
|
"cpu": [
|
||||||
|
"mips64el"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-ppc64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==",
|
||||||
|
"cpu": [
|
||||||
|
"ppc64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-riscv64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==",
|
||||||
|
"cpu": [
|
||||||
|
"riscv64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-s390x": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==",
|
||||||
|
"cpu": [
|
||||||
|
"s390x"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/linux-x64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/netbsd-x64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"netbsd"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/openbsd-x64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"openbsd"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/sunos-x64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"sunos"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/win32-arm64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"win32"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/win32-ia32": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==",
|
||||||
|
"cpu": [
|
||||||
|
"ia32"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"win32"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@esbuild/win32-x64": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"dev": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"win32"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@hono/node-server": {
|
||||||
|
"version": "1.9.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.9.0.tgz",
|
||||||
|
"integrity": "sha512-oJjk7WXBlENeHhWiMqSyxPIZ3Kmf5ZYxqdlcSIXyN8Rn50bNJsPl99G4POBS03Jxh56FdfRJ0SEnC8mAVIiavQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.14.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@types/node": {
|
||||||
|
"version": "20.11.30",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.11.30.tgz",
|
||||||
|
"integrity": "sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~5.26.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/esbuild": {
|
||||||
|
"version": "0.19.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.12.tgz",
|
||||||
|
"integrity": "sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==",
|
||||||
|
"dev": true,
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"bin": {
|
||||||
|
"esbuild": "bin/esbuild"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"@esbuild/aix-ppc64": "0.19.12",
|
||||||
|
"@esbuild/android-arm": "0.19.12",
|
||||||
|
"@esbuild/android-arm64": "0.19.12",
|
||||||
|
"@esbuild/android-x64": "0.19.12",
|
||||||
|
"@esbuild/darwin-arm64": "0.19.12",
|
||||||
|
"@esbuild/darwin-x64": "0.19.12",
|
||||||
|
"@esbuild/freebsd-arm64": "0.19.12",
|
||||||
|
"@esbuild/freebsd-x64": "0.19.12",
|
||||||
|
"@esbuild/linux-arm": "0.19.12",
|
||||||
|
"@esbuild/linux-arm64": "0.19.12",
|
||||||
|
"@esbuild/linux-ia32": "0.19.12",
|
||||||
|
"@esbuild/linux-loong64": "0.19.12",
|
||||||
|
"@esbuild/linux-mips64el": "0.19.12",
|
||||||
|
"@esbuild/linux-ppc64": "0.19.12",
|
||||||
|
"@esbuild/linux-riscv64": "0.19.12",
|
||||||
|
"@esbuild/linux-s390x": "0.19.12",
|
||||||
|
"@esbuild/linux-x64": "0.19.12",
|
||||||
|
"@esbuild/netbsd-x64": "0.19.12",
|
||||||
|
"@esbuild/openbsd-x64": "0.19.12",
|
||||||
|
"@esbuild/sunos-x64": "0.19.12",
|
||||||
|
"@esbuild/win32-arm64": "0.19.12",
|
||||||
|
"@esbuild/win32-ia32": "0.19.12",
|
||||||
|
"@esbuild/win32-x64": "0.19.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/fsevents": {
|
||||||
|
"version": "2.3.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||||
|
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||||
|
"dev": true,
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"darwin"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/get-tsconfig": {
|
||||||
|
"version": "4.7.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.3.tgz",
|
||||||
|
"integrity": "sha512-ZvkrzoUA0PQZM6fy6+/Hce561s+faD1rsNwhnO5FelNjyy7EMGJ3Rz1AQ8GYDWjhRs/7dBLOEJvhK8MiEJOAFg==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"resolve-pkg-maps": "^1.0.0"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/hono": {
|
||||||
|
"version": "4.1.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/hono/-/hono-4.1.5.tgz",
|
||||||
|
"integrity": "sha512-3ChJiIoeCxvkt6vnkxJagplrt1YZg3NyNob7ssVeK2PUqEINp4q1F94HzFnvY9QE8asVmbW5kkTDlyWylfg2vg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=16.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/resolve-pkg-maps": {
|
||||||
|
"version": "1.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||||
|
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
|
||||||
|
"dev": true,
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/tsx": {
|
||||||
|
"version": "4.7.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.7.1.tgz",
|
||||||
|
"integrity": "sha512-8d6VuibXHtlN5E3zFkgY8u4DX7Y3Z27zvvPKVmLon/D4AjuKzarkUBTLDBgj9iTQ0hg5xM7c/mYiRVM+HETf0g==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"esbuild": "~0.19.10",
|
||||||
|
"get-tsconfig": "^4.7.2"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"tsx": "dist/cli.mjs"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"fsevents": "~2.3.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/undici-types": {
|
||||||
|
"version": "5.26.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||||
|
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||||
|
"dev": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
13
litellm-js/spend-logs/package.json
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"scripts": {
|
||||||
|
"dev": "tsx watch src/index.ts"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@hono/node-server": "^1.9.0",
|
||||||
|
"hono": "^4.1.5"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^20.11.17",
|
||||||
|
"tsx": "^4.7.1"
|
||||||
|
}
|
||||||
|
}
|
29
litellm-js/spend-logs/schema.prisma
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
generator client {
|
||||||
|
provider = "prisma-client-js"
|
||||||
|
}
|
||||||
|
|
||||||
|
datasource client {
|
||||||
|
provider = "postgresql"
|
||||||
|
url = env("DATABASE_URL")
|
||||||
|
}
|
||||||
|
|
||||||
|
model LiteLLM_SpendLogs {
|
||||||
|
request_id String @id
|
||||||
|
call_type String
|
||||||
|
api_key String @default("")
|
||||||
|
spend Float @default(0.0)
|
||||||
|
total_tokens Int @default(0)
|
||||||
|
prompt_tokens Int @default(0)
|
||||||
|
completion_tokens Int @default(0)
|
||||||
|
startTime DateTime
|
||||||
|
endTime DateTime
|
||||||
|
model String @default("")
|
||||||
|
api_base String @default("")
|
||||||
|
user String @default("")
|
||||||
|
metadata Json @default("{}")
|
||||||
|
cache_hit String @default("")
|
||||||
|
cache_key String @default("")
|
||||||
|
request_tags Json @default("[]")
|
||||||
|
team_id String?
|
||||||
|
end_user String?
|
||||||
|
}
|
32
litellm-js/spend-logs/src/_types.ts
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
export type LiteLLM_IncrementSpend = {
|
||||||
|
key_transactions: Array<LiteLLM_IncrementObject>, // [{"key": spend},..]
|
||||||
|
user_transactions: Array<LiteLLM_IncrementObject>,
|
||||||
|
team_transactions: Array<LiteLLM_IncrementObject>,
|
||||||
|
spend_logs_transactions: Array<LiteLLM_SpendLogs>
|
||||||
|
}
|
||||||
|
|
||||||
|
export type LiteLLM_IncrementObject = {
|
||||||
|
key: string,
|
||||||
|
spend: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export type LiteLLM_SpendLogs = {
|
||||||
|
request_id: string; // @id means it's a unique identifier
|
||||||
|
call_type: string;
|
||||||
|
api_key: string; // @default("") means it defaults to an empty string if not provided
|
||||||
|
spend: number; // Float in Prisma corresponds to number in TypeScript
|
||||||
|
total_tokens: number; // Int in Prisma corresponds to number in TypeScript
|
||||||
|
prompt_tokens: number;
|
||||||
|
completion_tokens: number;
|
||||||
|
startTime: Date; // DateTime in Prisma corresponds to Date in TypeScript
|
||||||
|
endTime: Date;
|
||||||
|
model: string; // @default("") means it defaults to an empty string if not provided
|
||||||
|
api_base: string;
|
||||||
|
user: string;
|
||||||
|
metadata: any; // Json type in Prisma is represented by any in TypeScript; could also use a more specific type if the structure of JSON is known
|
||||||
|
cache_hit: string;
|
||||||
|
cache_key: string;
|
||||||
|
request_tags: any; // Similarly, this could be an array or a more specific type depending on the expected structure
|
||||||
|
team_id?: string | null; // ? indicates it's optional and can be undefined, but could also be null if not provided
|
||||||
|
end_user?: string | null;
|
||||||
|
};
|
84
litellm-js/spend-logs/src/index.ts
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
import { serve } from '@hono/node-server'
|
||||||
|
import { Hono } from 'hono'
|
||||||
|
import { PrismaClient } from '@prisma/client'
|
||||||
|
import {LiteLLM_SpendLogs, LiteLLM_IncrementSpend, LiteLLM_IncrementObject} from './_types'
|
||||||
|
|
||||||
|
const app = new Hono()
|
||||||
|
const prisma = new PrismaClient()
|
||||||
|
// In-memory storage for logs
|
||||||
|
let spend_logs: LiteLLM_SpendLogs[] = [];
|
||||||
|
const key_logs: LiteLLM_IncrementObject[] = [];
|
||||||
|
const user_logs: LiteLLM_IncrementObject[] = [];
|
||||||
|
const transaction_logs: LiteLLM_IncrementObject[] = [];
|
||||||
|
|
||||||
|
|
||||||
|
app.get('/', (c) => {
|
||||||
|
return c.text('Hello Hono!')
|
||||||
|
})
|
||||||
|
|
||||||
|
const MIN_LOGS = 1; // Minimum number of logs needed to initiate a flush
|
||||||
|
const FLUSH_INTERVAL = 5000; // Time in ms to wait before trying to flush again
|
||||||
|
const BATCH_SIZE = 100; // Preferred size of each batch to write to the database
|
||||||
|
const MAX_LOGS_PER_INTERVAL = 1000; // Maximum number of logs to flush in a single interval
|
||||||
|
|
||||||
|
const flushLogsToDb = async () => {
|
||||||
|
if (spend_logs.length >= MIN_LOGS) {
|
||||||
|
// Limit the logs to process in this interval to MAX_LOGS_PER_INTERVAL or less
|
||||||
|
const logsToProcess = spend_logs.slice(0, MAX_LOGS_PER_INTERVAL);
|
||||||
|
|
||||||
|
for (let i = 0; i < logsToProcess.length; i += BATCH_SIZE) {
|
||||||
|
// Create subarray for current batch, ensuring it doesn't exceed the BATCH_SIZE
|
||||||
|
const batch = logsToProcess.slice(i, i + BATCH_SIZE);
|
||||||
|
|
||||||
|
// Convert datetime strings to Date objects
|
||||||
|
const batchWithDates = batch.map(entry => ({
|
||||||
|
...entry,
|
||||||
|
startTime: new Date(entry.startTime),
|
||||||
|
endTime: new Date(entry.endTime),
|
||||||
|
// Repeat for any other DateTime fields you may have
|
||||||
|
}));
|
||||||
|
|
||||||
|
await prisma.liteLLM_SpendLogs.createMany({
|
||||||
|
data: batchWithDates,
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Flushed ${batch.length} logs to the DB.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the processed logs from spend_logs
|
||||||
|
spend_logs = spend_logs.slice(logsToProcess.length);
|
||||||
|
|
||||||
|
console.log(`${logsToProcess.length} logs processed. Remaining in queue: ${spend_logs.length}`);
|
||||||
|
} else {
|
||||||
|
// This will ensure it doesn't falsely claim "No logs to flush." when it's merely below the MIN_LOGS threshold.
|
||||||
|
if(spend_logs.length > 0) {
|
||||||
|
console.log(`Accumulating logs. Currently at ${spend_logs.length}, waiting for at least ${MIN_LOGS}.`);
|
||||||
|
} else {
|
||||||
|
console.log("No logs to flush.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Setup interval for attempting to flush the logs
|
||||||
|
setInterval(flushLogsToDb, FLUSH_INTERVAL);
|
||||||
|
|
||||||
|
// Route to receive log messages
|
||||||
|
app.post('/spend/update', async (c) => {
|
||||||
|
const incomingLogs = await c.req.json<LiteLLM_SpendLogs[]>();
|
||||||
|
|
||||||
|
spend_logs.push(...incomingLogs);
|
||||||
|
|
||||||
|
console.log(`Received and stored ${incomingLogs.length} logs. Total logs in memory: ${spend_logs.length}`);
|
||||||
|
|
||||||
|
return c.json({ message: `Successfully stored ${incomingLogs.length} logs` });
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
const port = 3000
|
||||||
|
console.log(`Server is running on port ${port}`)
|
||||||
|
|
||||||
|
serve({
|
||||||
|
fetch: app.fetch,
|
||||||
|
port
|
||||||
|
})
|
13
litellm-js/spend-logs/tsconfig.json
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ESNext",
|
||||||
|
"module": "ESNext",
|
||||||
|
"moduleResolution": "Bundler",
|
||||||
|
"strict": true,
|
||||||
|
"types": [
|
||||||
|
"node"
|
||||||
|
],
|
||||||
|
"jsx": "react-jsx",
|
||||||
|
"jsxImportSource": "hono/jsx",
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
### INIT VARIABLES ###
|
### INIT VARIABLES ###
|
||||||
import threading, requests, os
|
import threading, requests, os
|
||||||
from typing import Callable, List, Optional, Dict, Union, Any
|
from typing import Callable, List, Optional, Dict, Union, Any, Literal
|
||||||
from litellm.caching import Cache
|
from litellm.caching import Cache
|
||||||
from litellm._logging import set_verbose, _turn_on_debug, verbose_logger
|
from litellm._logging import set_verbose, _turn_on_debug, verbose_logger
|
||||||
from litellm.proxy._types import KeyManagementSystem, KeyManagementSettings
|
from litellm.proxy._types import KeyManagementSystem, KeyManagementSettings
|
||||||
|
@ -56,6 +56,7 @@ baseten_key: Optional[str] = None
|
||||||
aleph_alpha_key: Optional[str] = None
|
aleph_alpha_key: Optional[str] = None
|
||||||
nlp_cloud_key: Optional[str] = None
|
nlp_cloud_key: Optional[str] = None
|
||||||
use_client: bool = False
|
use_client: bool = False
|
||||||
|
disable_streaming_logging: bool = False
|
||||||
### GUARDRAILS ###
|
### GUARDRAILS ###
|
||||||
llamaguard_model_name: Optional[str] = None
|
llamaguard_model_name: Optional[str] = None
|
||||||
presidio_ad_hoc_recognizers: Optional[str] = None
|
presidio_ad_hoc_recognizers: Optional[str] = None
|
||||||
|
@ -63,6 +64,7 @@ google_moderation_confidence_threshold: Optional[float] = None
|
||||||
llamaguard_unsafe_content_categories: Optional[str] = None
|
llamaguard_unsafe_content_categories: Optional[str] = None
|
||||||
blocked_user_list: Optional[Union[str, List]] = None
|
blocked_user_list: Optional[Union[str, List]] = None
|
||||||
banned_keywords_list: Optional[Union[str, List]] = None
|
banned_keywords_list: Optional[Union[str, List]] = None
|
||||||
|
llm_guard_mode: Literal["all", "key-specific"] = "all"
|
||||||
##################
|
##################
|
||||||
logging: bool = True
|
logging: bool = True
|
||||||
caching: bool = (
|
caching: bool = (
|
||||||
|
@ -172,6 +174,7 @@ upperbound_key_generate_params: Optional[Dict] = None
|
||||||
default_user_params: Optional[Dict] = None
|
default_user_params: Optional[Dict] = None
|
||||||
default_team_settings: Optional[List] = None
|
default_team_settings: Optional[List] = None
|
||||||
max_user_budget: Optional[float] = None
|
max_user_budget: Optional[float] = None
|
||||||
|
max_end_user_budget: Optional[float] = None
|
||||||
#### RELIABILITY ####
|
#### RELIABILITY ####
|
||||||
request_timeout: Optional[float] = 6000
|
request_timeout: Optional[float] = 6000
|
||||||
num_retries: Optional[int] = None # per model endpoint
|
num_retries: Optional[int] = None # per model endpoint
|
||||||
|
|
|
@ -38,6 +38,9 @@ class BaseCache:
|
||||||
async def async_get_cache(self, key, **kwargs):
|
async def async_get_cache(self, key, **kwargs):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
async def batch_cache_write(self, result, *args, **kwargs):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
async def disconnect(self):
|
async def disconnect(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@ -96,7 +99,9 @@ class InMemoryCache(BaseCache):
|
||||||
class RedisCache(BaseCache):
|
class RedisCache(BaseCache):
|
||||||
# if users don't provider one, use the default litellm cache
|
# if users don't provider one, use the default litellm cache
|
||||||
|
|
||||||
def __init__(self, host=None, port=None, password=None, **kwargs):
|
def __init__(
|
||||||
|
self, host=None, port=None, password=None, redis_flush_size=100, **kwargs
|
||||||
|
):
|
||||||
from ._redis import get_redis_client, get_redis_connection_pool
|
from ._redis import get_redis_client, get_redis_connection_pool
|
||||||
|
|
||||||
redis_kwargs = {}
|
redis_kwargs = {}
|
||||||
|
@ -111,6 +116,10 @@ class RedisCache(BaseCache):
|
||||||
self.redis_client = get_redis_client(**redis_kwargs)
|
self.redis_client = get_redis_client(**redis_kwargs)
|
||||||
self.redis_kwargs = redis_kwargs
|
self.redis_kwargs = redis_kwargs
|
||||||
self.async_redis_conn_pool = get_redis_connection_pool(**redis_kwargs)
|
self.async_redis_conn_pool = get_redis_connection_pool(**redis_kwargs)
|
||||||
|
|
||||||
|
# for high traffic, we store the redis results in memory and then batch write to redis
|
||||||
|
self.redis_batch_writing_buffer = []
|
||||||
|
self.redis_flush_size = redis_flush_size
|
||||||
self.redis_version = "Unknown"
|
self.redis_version = "Unknown"
|
||||||
try:
|
try:
|
||||||
self.redis_version = self.redis_client.info()["redis_version"]
|
self.redis_version = self.redis_client.info()["redis_version"]
|
||||||
|
@ -161,8 +170,10 @@ class RedisCache(BaseCache):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
print_verbose(
|
verbose_logger.error(
|
||||||
f"LiteLLM Redis Caching: async set() - Got exception from REDIS : {str(e)}"
|
"LiteLLM Redis Caching: async set() - Got exception from REDIS %s, Writing value=%s",
|
||||||
|
str(e),
|
||||||
|
value,
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
@ -191,7 +202,27 @@ class RedisCache(BaseCache):
|
||||||
# Optionally, you could process 'results' to make sure that all set operations were successful.
|
# Optionally, you could process 'results' to make sure that all set operations were successful.
|
||||||
return results
|
return results
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Error occurred in pipeline write - {str(e)}")
|
verbose_logger.error(
|
||||||
|
"LiteLLM Redis Caching: async set_cache_pipeline() - Got exception from REDIS %s, Writing value=%s",
|
||||||
|
str(e),
|
||||||
|
cache_value,
|
||||||
|
)
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
async def batch_cache_write(self, key, value, **kwargs):
|
||||||
|
print_verbose(
|
||||||
|
f"in batch cache writing for redis buffer size={len(self.redis_batch_writing_buffer)}",
|
||||||
|
)
|
||||||
|
self.redis_batch_writing_buffer.append((key, value))
|
||||||
|
if len(self.redis_batch_writing_buffer) >= self.redis_flush_size:
|
||||||
|
await self.flush_cache_buffer()
|
||||||
|
|
||||||
|
async def flush_cache_buffer(self):
|
||||||
|
print_verbose(
|
||||||
|
f"flushing to redis....reached size of buffer {len(self.redis_batch_writing_buffer)}"
|
||||||
|
)
|
||||||
|
await self.async_set_cache_pipeline(self.redis_batch_writing_buffer)
|
||||||
|
self.redis_batch_writing_buffer = []
|
||||||
|
|
||||||
def _get_cache_logic(self, cached_response: Any):
|
def _get_cache_logic(self, cached_response: Any):
|
||||||
"""
|
"""
|
||||||
|
@ -287,6 +318,9 @@ class RedisCache(BaseCache):
|
||||||
def flush_cache(self):
|
def flush_cache(self):
|
||||||
self.redis_client.flushall()
|
self.redis_client.flushall()
|
||||||
|
|
||||||
|
def flushall(self):
|
||||||
|
self.redis_client.flushall()
|
||||||
|
|
||||||
async def disconnect(self):
|
async def disconnect(self):
|
||||||
await self.async_redis_conn_pool.disconnect(inuse_connections=True)
|
await self.async_redis_conn_pool.disconnect(inuse_connections=True)
|
||||||
|
|
||||||
|
@ -874,6 +908,7 @@ class Cache:
|
||||||
port: Optional[str] = None,
|
port: Optional[str] = None,
|
||||||
password: Optional[str] = None,
|
password: Optional[str] = None,
|
||||||
namespace: Optional[str] = None,
|
namespace: Optional[str] = None,
|
||||||
|
ttl: Optional[float] = None,
|
||||||
similarity_threshold: Optional[float] = None,
|
similarity_threshold: Optional[float] = None,
|
||||||
supported_call_types: Optional[
|
supported_call_types: Optional[
|
||||||
List[
|
List[
|
||||||
|
@ -908,6 +943,7 @@ class Cache:
|
||||||
s3_path: Optional[str] = None,
|
s3_path: Optional[str] = None,
|
||||||
redis_semantic_cache_use_async=False,
|
redis_semantic_cache_use_async=False,
|
||||||
redis_semantic_cache_embedding_model="text-embedding-ada-002",
|
redis_semantic_cache_embedding_model="text-embedding-ada-002",
|
||||||
|
redis_flush_size=None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -930,7 +966,9 @@ class Cache:
|
||||||
None. Cache is set as a litellm param
|
None. Cache is set as a litellm param
|
||||||
"""
|
"""
|
||||||
if type == "redis":
|
if type == "redis":
|
||||||
self.cache: BaseCache = RedisCache(host, port, password, **kwargs)
|
self.cache: BaseCache = RedisCache(
|
||||||
|
host, port, password, redis_flush_size, **kwargs
|
||||||
|
)
|
||||||
elif type == "redis-semantic":
|
elif type == "redis-semantic":
|
||||||
self.cache = RedisSemanticCache(
|
self.cache = RedisSemanticCache(
|
||||||
host,
|
host,
|
||||||
|
@ -967,6 +1005,8 @@ class Cache:
|
||||||
self.supported_call_types = supported_call_types # default to ["completion", "acompletion", "embedding", "aembedding"]
|
self.supported_call_types = supported_call_types # default to ["completion", "acompletion", "embedding", "aembedding"]
|
||||||
self.type = type
|
self.type = type
|
||||||
self.namespace = namespace
|
self.namespace = namespace
|
||||||
|
self.redis_flush_size = redis_flush_size
|
||||||
|
self.ttl = ttl
|
||||||
|
|
||||||
def get_cache_key(self, *args, **kwargs):
|
def get_cache_key(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
@ -1206,6 +1246,9 @@ class Cache:
|
||||||
if isinstance(result, OpenAIObject):
|
if isinstance(result, OpenAIObject):
|
||||||
result = result.model_dump_json()
|
result = result.model_dump_json()
|
||||||
|
|
||||||
|
## DEFAULT TTL ##
|
||||||
|
if self.ttl is not None:
|
||||||
|
kwargs["ttl"] = self.ttl
|
||||||
## Get Cache-Controls ##
|
## Get Cache-Controls ##
|
||||||
if kwargs.get("cache", None) is not None and isinstance(
|
if kwargs.get("cache", None) is not None and isinstance(
|
||||||
kwargs.get("cache"), dict
|
kwargs.get("cache"), dict
|
||||||
|
@ -1213,6 +1256,7 @@ class Cache:
|
||||||
for k, v in kwargs.get("cache").items():
|
for k, v in kwargs.get("cache").items():
|
||||||
if k == "ttl":
|
if k == "ttl":
|
||||||
kwargs["ttl"] = v
|
kwargs["ttl"] = v
|
||||||
|
|
||||||
cached_data = {"timestamp": time.time(), "response": result}
|
cached_data = {"timestamp": time.time(), "response": result}
|
||||||
return cache_key, cached_data, kwargs
|
return cache_key, cached_data, kwargs
|
||||||
else:
|
else:
|
||||||
|
@ -1246,6 +1290,10 @@ class Cache:
|
||||||
Async implementation of add_cache
|
Async implementation of add_cache
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
if self.type == "redis" and self.redis_flush_size is not None:
|
||||||
|
# high traffic - fill in results in memory and then flush
|
||||||
|
await self.batch_cache_write(result, *args, **kwargs)
|
||||||
|
else:
|
||||||
cache_key, cached_data, kwargs = self._add_cache_logic(
|
cache_key, cached_data, kwargs = self._add_cache_logic(
|
||||||
result=result, *args, **kwargs
|
result=result, *args, **kwargs
|
||||||
)
|
)
|
||||||
|
@ -1287,6 +1335,12 @@ class Cache:
|
||||||
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
async def batch_cache_write(self, result, *args, **kwargs):
|
||||||
|
cache_key, cached_data, kwargs = self._add_cache_logic(
|
||||||
|
result=result, *args, **kwargs
|
||||||
|
)
|
||||||
|
await self.cache.batch_cache_write(cache_key, cached_data, **kwargs)
|
||||||
|
|
||||||
async def ping(self):
|
async def ping(self):
|
||||||
if hasattr(self.cache, "ping"):
|
if hasattr(self.cache, "ping"):
|
||||||
return await self.cache.ping()
|
return await self.cache.ping()
|
||||||
|
|
|
@ -10,7 +10,7 @@ class AthinaLogger:
|
||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json"
|
||||||
}
|
}
|
||||||
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
|
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
|
||||||
self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response"]
|
self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response", "user_query"]
|
||||||
|
|
||||||
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
||||||
import requests
|
import requests
|
||||||
|
@ -32,8 +32,6 @@ class AthinaLogger:
|
||||||
|
|
||||||
if "messages" in kwargs:
|
if "messages" in kwargs:
|
||||||
data["prompt"] = kwargs.get("messages", None)
|
data["prompt"] = kwargs.get("messages", None)
|
||||||
if kwargs.get("messages") and len(kwargs.get("messages")) > 0:
|
|
||||||
data["user_query"] = kwargs.get("messages")[0].get("content", None)
|
|
||||||
|
|
||||||
# Directly add tools or functions if present
|
# Directly add tools or functions if present
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
|
|
@ -72,7 +72,12 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_moderation_hook(self, data: dict):
|
async def async_moderation_hook(
|
||||||
|
self,
|
||||||
|
data: dict,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
call_type: Literal["completion", "embeddings", "image_generation"],
|
||||||
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_post_call_streaming_hook(
|
async def async_post_call_streaming_hook(
|
||||||
|
|
|
@ -246,13 +246,13 @@ class LangFuseLogger:
|
||||||
metadata_tags = metadata.get("tags", [])
|
metadata_tags = metadata.get("tags", [])
|
||||||
tags = metadata_tags
|
tags = metadata_tags
|
||||||
|
|
||||||
generation_name = metadata.get("generation_name", None)
|
trace_name = metadata.get("trace_name", None)
|
||||||
if generation_name is None:
|
if trace_name is None:
|
||||||
# just log `litellm-{call_type}` as the generation name
|
# just log `litellm-{call_type}` as the trace name
|
||||||
generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
|
trace_name = f"litellm-{kwargs.get('call_type', 'completion')}"
|
||||||
|
|
||||||
trace_params = {
|
trace_params = {
|
||||||
"name": generation_name,
|
"name": trace_name,
|
||||||
"input": input,
|
"input": input,
|
||||||
"user_id": metadata.get("trace_user_id", user_id),
|
"user_id": metadata.get("trace_user_id", user_id),
|
||||||
"id": metadata.get("trace_id", None),
|
"id": metadata.get("trace_id", None),
|
||||||
|
@ -311,6 +311,11 @@ class LangFuseLogger:
|
||||||
"completion_tokens": response_obj["usage"]["completion_tokens"],
|
"completion_tokens": response_obj["usage"]["completion_tokens"],
|
||||||
"total_cost": cost if supports_costs else None,
|
"total_cost": cost if supports_costs else None,
|
||||||
}
|
}
|
||||||
|
generation_name = metadata.get("generation_name", None)
|
||||||
|
if generation_name is None:
|
||||||
|
# just log `litellm-{call_type}` as the generation name
|
||||||
|
generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
|
||||||
|
|
||||||
generation_params = {
|
generation_params = {
|
||||||
"name": generation_name,
|
"name": generation_name,
|
||||||
"id": metadata.get("generation_id", generation_id),
|
"id": metadata.get("generation_id", generation_id),
|
||||||
|
|
|
@ -131,18 +131,24 @@ def completion(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Separate system prompt from rest of message
|
# Separate system prompt from rest of message
|
||||||
system_prompt_idx: Optional[int] = None
|
system_prompt_indices = []
|
||||||
|
system_prompt = ""
|
||||||
for idx, message in enumerate(messages):
|
for idx, message in enumerate(messages):
|
||||||
if message["role"] == "system":
|
if message["role"] == "system":
|
||||||
optional_params["system"] = message["content"]
|
system_prompt += message["content"]
|
||||||
system_prompt_idx = idx
|
system_prompt_indices.append(idx)
|
||||||
break
|
if len(system_prompt_indices) > 0:
|
||||||
if system_prompt_idx is not None:
|
for idx in reversed(system_prompt_indices):
|
||||||
messages.pop(system_prompt_idx)
|
messages.pop(idx)
|
||||||
|
if len(system_prompt) > 0:
|
||||||
|
optional_params["system"] = system_prompt
|
||||||
# Format rest of message according to anthropic guidelines
|
# Format rest of message according to anthropic guidelines
|
||||||
|
try:
|
||||||
messages = prompt_factory(
|
messages = prompt_factory(
|
||||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise AnthropicError(status_code=400, message=str(e))
|
||||||
|
|
||||||
## Load Config
|
## Load Config
|
||||||
config = litellm.AnthropicConfig.get_config()
|
config = litellm.AnthropicConfig.get_config()
|
||||||
|
@ -295,7 +301,7 @@ def completion(
|
||||||
)
|
)
|
||||||
streaming_choice.delta = delta_obj
|
streaming_choice.delta = delta_obj
|
||||||
streaming_model_response.choices = [streaming_choice]
|
streaming_model_response.choices = [streaming_choice]
|
||||||
completion_stream = model_response_iterator(
|
completion_stream = ModelResponseIterator(
|
||||||
model_response=streaming_model_response
|
model_response=streaming_model_response
|
||||||
)
|
)
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -324,8 +330,30 @@ def completion(
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
def model_response_iterator(model_response):
|
class ModelResponseIterator:
|
||||||
yield model_response
|
def __init__(self, model_response):
|
||||||
|
self.model_response = model_response
|
||||||
|
self.is_done = False
|
||||||
|
|
||||||
|
# Sync iterator
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
if self.is_done:
|
||||||
|
raise StopIteration
|
||||||
|
self.is_done = True
|
||||||
|
return self.model_response
|
||||||
|
|
||||||
|
# Async iterator
|
||||||
|
def __aiter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __anext__(self):
|
||||||
|
if self.is_done:
|
||||||
|
raise StopAsyncIteration
|
||||||
|
self.is_done = True
|
||||||
|
return self.model_response
|
||||||
|
|
||||||
|
|
||||||
def embedding():
|
def embedding():
|
||||||
|
|
|
@ -11,6 +11,7 @@ from .prompt_templates.factory import (
|
||||||
construct_tool_use_system_prompt,
|
construct_tool_use_system_prompt,
|
||||||
extract_between_tags,
|
extract_between_tags,
|
||||||
parse_xml_params,
|
parse_xml_params,
|
||||||
|
contains_tag,
|
||||||
)
|
)
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
@ -78,11 +79,13 @@ class AmazonTitanConfig:
|
||||||
|
|
||||||
class AmazonAnthropicClaude3Config:
|
class AmazonAnthropicClaude3Config:
|
||||||
"""
|
"""
|
||||||
Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
|
Reference:
|
||||||
|
https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
|
||||||
|
https://docs.anthropic.com/claude/docs/models-overview#model-comparison
|
||||||
|
|
||||||
Supported Params for the Amazon / Anthropic Claude 3 models:
|
Supported Params for the Amazon / Anthropic Claude 3 models:
|
||||||
|
|
||||||
- `max_tokens` Required (integer) max tokens,
|
- `max_tokens` Required (integer) max tokens. Default is 4096
|
||||||
- `anthropic_version` Required (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
|
- `anthropic_version` Required (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
|
||||||
- `system` Optional (string) the system prompt, conversion from openai format to this is handled in factory.py
|
- `system` Optional (string) the system prompt, conversion from openai format to this is handled in factory.py
|
||||||
- `temperature` Optional (float) The amount of randomness injected into the response
|
- `temperature` Optional (float) The amount of randomness injected into the response
|
||||||
|
@ -91,7 +94,7 @@ class AmazonAnthropicClaude3Config:
|
||||||
- `stop_sequences` Optional (List[str]) Custom text sequences that cause the model to stop generating
|
- `stop_sequences` Optional (List[str]) Custom text sequences that cause the model to stop generating
|
||||||
"""
|
"""
|
||||||
|
|
||||||
max_tokens: Optional[int] = litellm.max_tokens
|
max_tokens: Optional[int] = 4096 # Opus, Sonnet, and Haiku default
|
||||||
anthropic_version: Optional[str] = "bedrock-2023-05-31"
|
anthropic_version: Optional[str] = "bedrock-2023-05-31"
|
||||||
system: Optional[str] = None
|
system: Optional[str] = None
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
|
@ -128,7 +131,15 @@ class AmazonAnthropicClaude3Config:
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_supported_openai_params(self):
|
def get_supported_openai_params(self):
|
||||||
return ["max_tokens", "tools", "tool_choice", "stream"]
|
return [
|
||||||
|
"max_tokens",
|
||||||
|
"tools",
|
||||||
|
"tool_choice",
|
||||||
|
"stream",
|
||||||
|
"stop",
|
||||||
|
"temperature",
|
||||||
|
"top_p",
|
||||||
|
]
|
||||||
|
|
||||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
|
@ -679,6 +690,7 @@ def completion(
|
||||||
timeout=None,
|
timeout=None,
|
||||||
):
|
):
|
||||||
exception_mapping_worked = False
|
exception_mapping_worked = False
|
||||||
|
_is_function_call = False
|
||||||
try:
|
try:
|
||||||
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
||||||
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
||||||
|
@ -727,8 +739,10 @@ def completion(
|
||||||
system_messages.append(message["content"])
|
system_messages.append(message["content"])
|
||||||
system_prompt_idx.append(idx)
|
system_prompt_idx.append(idx)
|
||||||
if len(system_prompt_idx) > 0:
|
if len(system_prompt_idx) > 0:
|
||||||
inference_params["system"] = '\n'.join(system_messages)
|
inference_params["system"] = "\n".join(system_messages)
|
||||||
messages = [i for j, i in enumerate(messages) if j not in system_prompt_idx]
|
messages = [
|
||||||
|
i for j, i in enumerate(messages) if j not in system_prompt_idx
|
||||||
|
]
|
||||||
# Format rest of message according to anthropic guidelines
|
# Format rest of message according to anthropic guidelines
|
||||||
messages = prompt_factory(
|
messages = prompt_factory(
|
||||||
model=model, messages=messages, custom_llm_provider="anthropic"
|
model=model, messages=messages, custom_llm_provider="anthropic"
|
||||||
|
@ -742,6 +756,7 @@ def completion(
|
||||||
inference_params[k] = v
|
inference_params[k] = v
|
||||||
## Handle Tool Calling
|
## Handle Tool Calling
|
||||||
if "tools" in inference_params:
|
if "tools" in inference_params:
|
||||||
|
_is_function_call = True
|
||||||
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
||||||
tools=inference_params["tools"]
|
tools=inference_params["tools"]
|
||||||
)
|
)
|
||||||
|
@ -823,7 +838,7 @@ def completion(
|
||||||
## COMPLETION CALL
|
## COMPLETION CALL
|
||||||
accept = "application/json"
|
accept = "application/json"
|
||||||
contentType = "application/json"
|
contentType = "application/json"
|
||||||
if stream == True:
|
if stream == True and _is_function_call == False:
|
||||||
if provider == "ai21":
|
if provider == "ai21":
|
||||||
## LOGGING
|
## LOGGING
|
||||||
request_str = f"""
|
request_str = f"""
|
||||||
|
@ -918,7 +933,9 @@ def completion(
|
||||||
elif provider == "anthropic":
|
elif provider == "anthropic":
|
||||||
if model.startswith("anthropic.claude-3"):
|
if model.startswith("anthropic.claude-3"):
|
||||||
outputText = response_body.get("content")[0].get("text", None)
|
outputText = response_body.get("content")[0].get("text", None)
|
||||||
if "<invoke>" in outputText: # OUTPUT PARSE FUNCTION CALL
|
if outputText is not None and contains_tag(
|
||||||
|
"invoke", outputText
|
||||||
|
): # OUTPUT PARSE FUNCTION CALL
|
||||||
function_name = extract_between_tags("tool_name", outputText)[0]
|
function_name = extract_between_tags("tool_name", outputText)[0]
|
||||||
function_arguments_str = extract_between_tags("invoke", outputText)[
|
function_arguments_str = extract_between_tags("invoke", outputText)[
|
||||||
0
|
0
|
||||||
|
@ -941,6 +958,56 @@ def completion(
|
||||||
content=None,
|
content=None,
|
||||||
)
|
)
|
||||||
model_response.choices[0].message = _message # type: ignore
|
model_response.choices[0].message = _message # type: ignore
|
||||||
|
if _is_function_call == True and stream is not None and stream == True:
|
||||||
|
print_verbose(
|
||||||
|
f"INSIDE BEDROCK STREAMING TOOL CALLING CONDITION BLOCK"
|
||||||
|
)
|
||||||
|
# return an iterator
|
||||||
|
streaming_model_response = ModelResponse(stream=True)
|
||||||
|
streaming_model_response.choices[0].finish_reason = (
|
||||||
|
model_response.choices[0].finish_reason
|
||||||
|
)
|
||||||
|
# streaming_model_response.choices = [litellm.utils.StreamingChoices()]
|
||||||
|
streaming_choice = litellm.utils.StreamingChoices()
|
||||||
|
streaming_choice.index = model_response.choices[0].index
|
||||||
|
_tool_calls = []
|
||||||
|
print_verbose(
|
||||||
|
f"type of model_response.choices[0]: {type(model_response.choices[0])}"
|
||||||
|
)
|
||||||
|
print_verbose(f"type of streaming_choice: {type(streaming_choice)}")
|
||||||
|
if isinstance(model_response.choices[0], litellm.Choices):
|
||||||
|
if getattr(
|
||||||
|
model_response.choices[0].message, "tool_calls", None
|
||||||
|
) is not None and isinstance(
|
||||||
|
model_response.choices[0].message.tool_calls, list
|
||||||
|
):
|
||||||
|
for tool_call in model_response.choices[
|
||||||
|
0
|
||||||
|
].message.tool_calls:
|
||||||
|
_tool_call = {**tool_call.dict(), "index": 0}
|
||||||
|
_tool_calls.append(_tool_call)
|
||||||
|
delta_obj = litellm.utils.Delta(
|
||||||
|
content=getattr(
|
||||||
|
model_response.choices[0].message, "content", None
|
||||||
|
),
|
||||||
|
role=model_response.choices[0].message.role,
|
||||||
|
tool_calls=_tool_calls,
|
||||||
|
)
|
||||||
|
streaming_choice.delta = delta_obj
|
||||||
|
streaming_model_response.choices = [streaming_choice]
|
||||||
|
completion_stream = model_response_iterator(
|
||||||
|
model_response=streaming_model_response
|
||||||
|
)
|
||||||
|
print_verbose(
|
||||||
|
f"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
|
||||||
|
)
|
||||||
|
return litellm.CustomStreamWrapper(
|
||||||
|
completion_stream=completion_stream,
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="cached_response",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
|
||||||
model_response["finish_reason"] = response_body["stop_reason"]
|
model_response["finish_reason"] = response_body["stop_reason"]
|
||||||
_usage = litellm.Usage(
|
_usage = litellm.Usage(
|
||||||
prompt_tokens=response_body["usage"]["input_tokens"],
|
prompt_tokens=response_body["usage"]["input_tokens"],
|
||||||
|
@ -1029,6 +1096,10 @@ def completion(
|
||||||
raise BedrockError(status_code=500, message=traceback.format_exc())
|
raise BedrockError(status_code=500, message=traceback.format_exc())
|
||||||
|
|
||||||
|
|
||||||
|
async def model_response_iterator(model_response):
|
||||||
|
yield model_response
|
||||||
|
|
||||||
|
|
||||||
def _embedding_func_single(
|
def _embedding_func_single(
|
||||||
model: str,
|
model: str,
|
||||||
input: str,
|
input: str,
|
||||||
|
|
38
litellm/llms/custom_httpx/httpx_handler.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
from typing import Optional
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPHandler:
|
||||||
|
def __init__(self, concurrent_limit=1000):
|
||||||
|
# Create a client with a connection pool
|
||||||
|
self.client = httpx.AsyncClient(
|
||||||
|
limits=httpx.Limits(
|
||||||
|
max_connections=concurrent_limit,
|
||||||
|
max_keepalive_connections=concurrent_limit,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
# Close the client when you're done with it
|
||||||
|
await self.client.aclose()
|
||||||
|
|
||||||
|
async def get(
|
||||||
|
self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None
|
||||||
|
):
|
||||||
|
response = await self.client.get(url, params=params, headers=headers)
|
||||||
|
return response
|
||||||
|
|
||||||
|
async def post(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
data: Optional[dict] = None,
|
||||||
|
params: Optional[dict] = None,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
response = await self.client.post(
|
||||||
|
url, data=data, params=params, headers=headers
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
|
@ -118,7 +118,7 @@ def completion(
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
import google.generativeai as genai
|
import google.generativeai as genai # type: ignore
|
||||||
except:
|
except:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Importing google.generativeai failed, please run 'pip install -q google-generativeai"
|
"Importing google.generativeai failed, please run 'pip install -q google-generativeai"
|
||||||
|
@ -308,7 +308,7 @@ async def async_completion(
|
||||||
messages,
|
messages,
|
||||||
encoding,
|
encoding,
|
||||||
):
|
):
|
||||||
import google.generativeai as genai
|
import google.generativeai as genai # type: ignore
|
||||||
|
|
||||||
response = await _model.generate_content_async(
|
response = await _model.generate_content_async(
|
||||||
contents=prompt,
|
contents=prompt,
|
||||||
|
|
|
@ -68,9 +68,9 @@ class OllamaConfig:
|
||||||
repeat_last_n: Optional[int] = None
|
repeat_last_n: Optional[int] = None
|
||||||
repeat_penalty: Optional[float] = None
|
repeat_penalty: Optional[float] = None
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
stop: Optional[
|
stop: Optional[list] = (
|
||||||
list
|
None # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
|
||||||
] = None # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
|
)
|
||||||
tfs_z: Optional[float] = None
|
tfs_z: Optional[float] = None
|
||||||
num_predict: Optional[int] = None
|
num_predict: Optional[int] = None
|
||||||
top_k: Optional[int] = None
|
top_k: Optional[int] = None
|
||||||
|
@ -344,9 +344,9 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
||||||
|
|
||||||
|
|
||||||
async def ollama_aembeddings(
|
async def ollama_aembeddings(
|
||||||
api_base="http://localhost:11434",
|
api_base: str,
|
||||||
model="llama2",
|
model: str,
|
||||||
prompt="Why is the sky blue?",
|
prompts: list,
|
||||||
optional_params=None,
|
optional_params=None,
|
||||||
logging_obj=None,
|
logging_obj=None,
|
||||||
model_response=None,
|
model_response=None,
|
||||||
|
@ -365,6 +365,11 @@ async def ollama_aembeddings(
|
||||||
): # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
|
): # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||||
optional_params[k] = v
|
optional_params[k] = v
|
||||||
|
|
||||||
|
total_input_tokens = 0
|
||||||
|
output_data = []
|
||||||
|
timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
for idx, prompt in enumerate(prompts):
|
||||||
data = {
|
data = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
|
@ -373,12 +378,14 @@ async def ollama_aembeddings(
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=None,
|
input=None,
|
||||||
api_key=None,
|
api_key=None,
|
||||||
additional_args={"api_base": url, "complete_input_dict": data, "headers": {}},
|
additional_args={
|
||||||
|
"api_base": url,
|
||||||
|
"complete_input_dict": data,
|
||||||
|
"headers": {},
|
||||||
|
},
|
||||||
)
|
)
|
||||||
timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes
|
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
||||||
response = await session.post(url, json=data)
|
|
||||||
|
|
||||||
|
response = await session.post(url, json=data)
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
text = await response.text()
|
text = await response.text()
|
||||||
raise OllamaError(status_code=response.status, message=text)
|
raise OllamaError(status_code=response.status, message=text)
|
||||||
|
@ -395,21 +402,19 @@ async def ollama_aembeddings(
|
||||||
)
|
)
|
||||||
|
|
||||||
response_json = await response.json()
|
response_json = await response.json()
|
||||||
embeddings = response_json["embedding"]
|
embeddings: list[float] = response_json["embedding"]
|
||||||
## RESPONSE OBJECT
|
|
||||||
output_data = []
|
|
||||||
for idx, embedding in enumerate(embeddings):
|
|
||||||
output_data.append(
|
output_data.append(
|
||||||
{"object": "embedding", "index": idx, "embedding": embedding}
|
{"object": "embedding", "index": idx, "embedding": embeddings}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
input_tokens = len(encoding.encode(prompt))
|
||||||
|
total_input_tokens += input_tokens
|
||||||
|
|
||||||
model_response["object"] = "list"
|
model_response["object"] = "list"
|
||||||
model_response["data"] = output_data
|
model_response["data"] = output_data
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
|
|
||||||
input_tokens = len(encoding.encode(prompt))
|
|
||||||
|
|
||||||
model_response["usage"] = {
|
model_response["usage"] = {
|
||||||
"prompt_tokens": input_tokens,
|
"prompt_tokens": total_input_tokens,
|
||||||
"total_tokens": input_tokens,
|
"total_tokens": total_input_tokens,
|
||||||
}
|
}
|
||||||
return model_response
|
return model_response
|
||||||
|
|
|
@ -173,10 +173,11 @@ class OllamaChatConfig:
|
||||||
litellm.add_function_to_prompt = (
|
litellm.add_function_to_prompt = (
|
||||||
True # so that main.py adds the function call to the prompt
|
True # so that main.py adds the function call to the prompt
|
||||||
)
|
)
|
||||||
optional_params["functions_unsupported_model"] = non_default_params.pop(
|
optional_params["functions_unsupported_model"] = non_default_params.get(
|
||||||
"functions"
|
"functions"
|
||||||
)
|
)
|
||||||
non_default_params.pop("tool_choice", None) # causes ollama requests to hang
|
non_default_params.pop("tool_choice", None) # causes ollama requests to hang
|
||||||
|
non_default_params.pop("functions", None) # causes ollama requests to hang
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -98,7 +98,7 @@ def completion(
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
import google.generativeai as palm
|
import google.generativeai as palm # type: ignore
|
||||||
except:
|
except:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Importing google.generativeai failed, please run 'pip install -q google-generativeai"
|
"Importing google.generativeai failed, please run 'pip install -q google-generativeai"
|
||||||
|
|
|
@ -5,12 +5,17 @@ from jinja2 import Template, exceptions, Environment, meta
|
||||||
from typing import Optional, Any
|
from typing import Optional, Any
|
||||||
import imghdr, base64
|
import imghdr, base64
|
||||||
from typing import List
|
from typing import List
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
def default_pt(messages):
|
def default_pt(messages):
|
||||||
return " ".join(message["content"] for message in messages)
|
return " ".join(message["content"] for message in messages)
|
||||||
|
|
||||||
|
|
||||||
|
def prompt_injection_detection_default_pt():
|
||||||
|
return """Detect if a prompt is safe to run. Return 'UNSAFE' if not."""
|
||||||
|
|
||||||
|
|
||||||
# alpaca prompt template - for models like mythomax, etc.
|
# alpaca prompt template - for models like mythomax, etc.
|
||||||
def alpaca_pt(messages):
|
def alpaca_pt(messages):
|
||||||
prompt = custom_prompt(
|
prompt = custom_prompt(
|
||||||
|
@ -638,11 +643,12 @@ def anthropic_messages_pt(messages: list):
|
||||||
"""
|
"""
|
||||||
# add role=tool support to allow function call result/error submission
|
# add role=tool support to allow function call result/error submission
|
||||||
user_message_types = {"user", "tool"}
|
user_message_types = {"user", "tool"}
|
||||||
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
|
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
|
||||||
new_messages = []
|
new_messages = []
|
||||||
msg_i = 0
|
msg_i = 0
|
||||||
while msg_i < len(messages):
|
while msg_i < len(messages):
|
||||||
user_content = []
|
user_content = []
|
||||||
|
## MERGE CONSECUTIVE USER CONTENT ##
|
||||||
while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
|
while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
|
||||||
if isinstance(messages[msg_i]["content"], list):
|
if isinstance(messages[msg_i]["content"], list):
|
||||||
for m in messages[msg_i]["content"]:
|
for m in messages[msg_i]["content"]:
|
||||||
|
@ -676,6 +682,7 @@ def anthropic_messages_pt(messages: list):
|
||||||
new_messages.append({"role": "user", "content": user_content})
|
new_messages.append({"role": "user", "content": user_content})
|
||||||
|
|
||||||
assistant_content = []
|
assistant_content = []
|
||||||
|
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||||
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
||||||
assistant_text = (
|
assistant_text = (
|
||||||
messages[msg_i].get("content") or ""
|
messages[msg_i].get("content") or ""
|
||||||
|
@ -694,9 +701,14 @@ def anthropic_messages_pt(messages: list):
|
||||||
new_messages.append({"role": "assistant", "content": assistant_content})
|
new_messages.append({"role": "assistant", "content": assistant_content})
|
||||||
|
|
||||||
if new_messages[0]["role"] != "user":
|
if new_messages[0]["role"] != "user":
|
||||||
|
if litellm.modify_params:
|
||||||
new_messages.insert(
|
new_messages.insert(
|
||||||
0, {"role": "user", "content": [{"type": "text", "text": "."}]}
|
0, {"role": "user", "content": [{"type": "text", "text": "."}]}
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
"Invalid first message. Should always start with 'role'='user' for Anthropic. System prompt is sent separately for Anthropic. set 'litellm.modify_params = True' or 'litellm_settings:modify_params = True' on proxy, to insert a placeholder user message - '.' as the first message, "
|
||||||
|
)
|
||||||
|
|
||||||
if new_messages[-1]["role"] == "assistant":
|
if new_messages[-1]["role"] == "assistant":
|
||||||
for content in new_messages[-1]["content"]:
|
for content in new_messages[-1]["content"]:
|
||||||
|
@ -714,17 +726,23 @@ def extract_between_tags(tag: str, string: str, strip: bool = False) -> List[str
|
||||||
ext_list = [e.strip() for e in ext_list]
|
ext_list = [e.strip() for e in ext_list]
|
||||||
return ext_list
|
return ext_list
|
||||||
|
|
||||||
|
|
||||||
def contains_tag(tag: str, string: str) -> bool:
|
def contains_tag(tag: str, string: str) -> bool:
|
||||||
return bool(re.search(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL))
|
return bool(re.search(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL))
|
||||||
|
|
||||||
|
|
||||||
def parse_xml_params(xml_content):
|
def parse_xml_params(xml_content):
|
||||||
root = ET.fromstring(xml_content)
|
root = ET.fromstring(xml_content)
|
||||||
params = {}
|
params = {}
|
||||||
for child in root.findall(".//parameters/*"):
|
for child in root.findall(".//parameters/*"):
|
||||||
|
try:
|
||||||
|
# Attempt to decode the element's text as JSON
|
||||||
|
params[child.tag] = json.loads(child.text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# If JSON decoding fails, use the original text
|
||||||
params[child.tag] = child.text
|
params[child.tag] = child.text
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
|
|
||||||
|
@ -917,7 +935,7 @@ def gemini_text_image_pt(messages: list):
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import google.generativeai as genai
|
import google.generativeai as genai # type: ignore
|
||||||
except:
|
except:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Importing google.generativeai failed, please run 'pip install -q google-generativeai"
|
"Importing google.generativeai failed, please run 'pip install -q google-generativeai"
|
||||||
|
@ -958,9 +976,7 @@ def azure_text_pt(messages: list):
|
||||||
|
|
||||||
# Function call template
|
# Function call template
|
||||||
def function_call_prompt(messages: list, functions: list):
|
def function_call_prompt(messages: list, functions: list):
|
||||||
function_prompt = (
|
function_prompt = """Produce JSON OUTPUT ONLY! Adhere to this format {"name": "function_name", "arguments":{"argument_name": "argument_value"}} The following functions are available to you:"""
|
||||||
"""Produce JSON OUTPUT ONLY! Adhere to this format {"name": "function_name", "arguments":{"argument_name": "argument_value"}} The following functions are available to you:"""
|
|
||||||
)
|
|
||||||
for function in functions:
|
for function in functions:
|
||||||
function_prompt += f"""\n{function}\n"""
|
function_prompt += f"""\n{function}\n"""
|
||||||
|
|
||||||
|
|
|
@ -166,6 +166,7 @@ def completion(
|
||||||
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
||||||
aws_access_key_id = optional_params.pop("aws_access_key_id", None)
|
aws_access_key_id = optional_params.pop("aws_access_key_id", None)
|
||||||
aws_region_name = optional_params.pop("aws_region_name", None)
|
aws_region_name = optional_params.pop("aws_region_name", None)
|
||||||
|
model_id = optional_params.pop("model_id", None)
|
||||||
|
|
||||||
if aws_access_key_id != None:
|
if aws_access_key_id != None:
|
||||||
# uses auth params passed to completion
|
# uses auth params passed to completion
|
||||||
|
@ -245,15 +246,28 @@ def completion(
|
||||||
model=model,
|
model=model,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
data=data,
|
data=data,
|
||||||
|
model_id=model_id,
|
||||||
|
aws_secret_access_key=aws_secret_access_key,
|
||||||
|
aws_access_key_id=aws_access_key_id,
|
||||||
|
aws_region_name=aws_region_name,
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
if model_id is not None:
|
||||||
|
response = client.invoke_endpoint_with_response_stream(
|
||||||
|
EndpointName=model,
|
||||||
|
InferenceComponentName=model_id,
|
||||||
|
ContentType="application/json",
|
||||||
|
Body=data,
|
||||||
|
CustomAttributes="accept_eula=true",
|
||||||
|
)
|
||||||
|
else:
|
||||||
response = client.invoke_endpoint_with_response_stream(
|
response = client.invoke_endpoint_with_response_stream(
|
||||||
EndpointName=model,
|
EndpointName=model,
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
Body=data,
|
Body=data,
|
||||||
CustomAttributes="accept_eula=true",
|
CustomAttributes="accept_eula=true",
|
||||||
)
|
)
|
||||||
|
|
||||||
return response["Body"]
|
return response["Body"]
|
||||||
elif acompletion == True:
|
elif acompletion == True:
|
||||||
_data = {"inputs": prompt, "parameters": inference_params}
|
_data = {"inputs": prompt, "parameters": inference_params}
|
||||||
|
@ -264,10 +278,44 @@ def completion(
|
||||||
model=model,
|
model=model,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
data=_data,
|
data=_data,
|
||||||
|
model_id=model_id,
|
||||||
|
aws_secret_access_key=aws_secret_access_key,
|
||||||
|
aws_access_key_id=aws_access_key_id,
|
||||||
|
aws_region_name=aws_region_name,
|
||||||
)
|
)
|
||||||
data = json.dumps({"inputs": prompt, "parameters": inference_params}).encode(
|
data = json.dumps({"inputs": prompt, "parameters": inference_params}).encode(
|
||||||
"utf-8"
|
"utf-8"
|
||||||
)
|
)
|
||||||
|
## COMPLETION CALL
|
||||||
|
try:
|
||||||
|
if model_id is not None:
|
||||||
|
## LOGGING
|
||||||
|
request_str = f"""
|
||||||
|
response = client.invoke_endpoint(
|
||||||
|
EndpointName={model},
|
||||||
|
InferenceComponentName={model_id},
|
||||||
|
ContentType="application/json",
|
||||||
|
Body={data},
|
||||||
|
CustomAttributes="accept_eula=true",
|
||||||
|
)
|
||||||
|
""" # type: ignore
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=prompt,
|
||||||
|
api_key="",
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": data,
|
||||||
|
"request_str": request_str,
|
||||||
|
"hf_model_name": hf_model_name,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
response = client.invoke_endpoint(
|
||||||
|
EndpointName=model,
|
||||||
|
InferenceComponentName=model_id,
|
||||||
|
ContentType="application/json",
|
||||||
|
Body=data,
|
||||||
|
CustomAttributes="accept_eula=true",
|
||||||
|
)
|
||||||
|
else:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
request_str = f"""
|
request_str = f"""
|
||||||
response = client.invoke_endpoint(
|
response = client.invoke_endpoint(
|
||||||
|
@ -286,8 +334,6 @@ def completion(
|
||||||
"hf_model_name": hf_model_name,
|
"hf_model_name": hf_model_name,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
## COMPLETION CALL
|
|
||||||
try:
|
|
||||||
response = client.invoke_endpoint(
|
response = client.invoke_endpoint(
|
||||||
EndpointName=model,
|
EndpointName=model,
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
|
@ -303,6 +349,8 @@ def completion(
|
||||||
error_message = (
|
error_message = (
|
||||||
getattr(e, "response", {}).get("Error", {}).get("Message", str(e))
|
getattr(e, "response", {}).get("Error", {}).get("Message", str(e))
|
||||||
)
|
)
|
||||||
|
if "Inference Component Name header is required" in error_message:
|
||||||
|
error_message += "\n pass in via `litellm.completion(..., model_id={InferenceComponentName})`"
|
||||||
raise SagemakerError(status_code=status_code, message=error_message)
|
raise SagemakerError(status_code=status_code, message=error_message)
|
||||||
|
|
||||||
response = response["Body"].read().decode("utf8")
|
response = response["Body"].read().decode("utf8")
|
||||||
|
@ -357,8 +405,12 @@ async def async_streaming(
|
||||||
encoding,
|
encoding,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
model: str,
|
model: str,
|
||||||
|
model_id: Optional[str],
|
||||||
logging_obj: Any,
|
logging_obj: Any,
|
||||||
data,
|
data,
|
||||||
|
aws_secret_access_key: Optional[str],
|
||||||
|
aws_access_key_id: Optional[str],
|
||||||
|
aws_region_name: Optional[str],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Use aioboto3
|
Use aioboto3
|
||||||
|
@ -367,11 +419,6 @@ async def async_streaming(
|
||||||
|
|
||||||
session = aioboto3.Session()
|
session = aioboto3.Session()
|
||||||
|
|
||||||
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
|
||||||
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
|
||||||
aws_access_key_id = optional_params.pop("aws_access_key_id", None)
|
|
||||||
aws_region_name = optional_params.pop("aws_region_name", None)
|
|
||||||
|
|
||||||
if aws_access_key_id != None:
|
if aws_access_key_id != None:
|
||||||
# uses auth params passed to completion
|
# uses auth params passed to completion
|
||||||
# aws_access_key_id is not None, assume user is trying to auth using litellm.completion
|
# aws_access_key_id is not None, assume user is trying to auth using litellm.completion
|
||||||
|
@ -398,6 +445,15 @@ async def async_streaming(
|
||||||
|
|
||||||
async with _client as client:
|
async with _client as client:
|
||||||
try:
|
try:
|
||||||
|
if model_id is not None:
|
||||||
|
response = await client.invoke_endpoint_with_response_stream(
|
||||||
|
EndpointName=model,
|
||||||
|
InferenceComponentName=model_id,
|
||||||
|
ContentType="application/json",
|
||||||
|
Body=data,
|
||||||
|
CustomAttributes="accept_eula=true",
|
||||||
|
)
|
||||||
|
else:
|
||||||
response = await client.invoke_endpoint_with_response_stream(
|
response = await client.invoke_endpoint_with_response_stream(
|
||||||
EndpointName=model,
|
EndpointName=model,
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
|
@ -418,6 +474,10 @@ async def async_completion(
|
||||||
model: str,
|
model: str,
|
||||||
logging_obj: Any,
|
logging_obj: Any,
|
||||||
data: dict,
|
data: dict,
|
||||||
|
model_id: Optional[str],
|
||||||
|
aws_secret_access_key: Optional[str],
|
||||||
|
aws_access_key_id: Optional[str],
|
||||||
|
aws_region_name: Optional[str],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Use aioboto3
|
Use aioboto3
|
||||||
|
@ -426,11 +486,6 @@ async def async_completion(
|
||||||
|
|
||||||
session = aioboto3.Session()
|
session = aioboto3.Session()
|
||||||
|
|
||||||
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
|
||||||
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
|
||||||
aws_access_key_id = optional_params.pop("aws_access_key_id", None)
|
|
||||||
aws_region_name = optional_params.pop("aws_region_name", None)
|
|
||||||
|
|
||||||
if aws_access_key_id != None:
|
if aws_access_key_id != None:
|
||||||
# uses auth params passed to completion
|
# uses auth params passed to completion
|
||||||
# aws_access_key_id is not None, assume user is trying to auth using litellm.completion
|
# aws_access_key_id is not None, assume user is trying to auth using litellm.completion
|
||||||
|
@ -456,6 +511,35 @@ async def async_completion(
|
||||||
)
|
)
|
||||||
|
|
||||||
async with _client as client:
|
async with _client as client:
|
||||||
|
encoded_data = json.dumps(data).encode("utf-8")
|
||||||
|
try:
|
||||||
|
if model_id is not None:
|
||||||
|
## LOGGING
|
||||||
|
request_str = f"""
|
||||||
|
response = client.invoke_endpoint(
|
||||||
|
EndpointName={model},
|
||||||
|
InferenceComponentName={model_id},
|
||||||
|
ContentType="application/json",
|
||||||
|
Body={data},
|
||||||
|
CustomAttributes="accept_eula=true",
|
||||||
|
)
|
||||||
|
""" # type: ignore
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=data["inputs"],
|
||||||
|
api_key="",
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": data,
|
||||||
|
"request_str": request_str,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
response = await client.invoke_endpoint(
|
||||||
|
EndpointName=model,
|
||||||
|
InferenceComponentName=model_id,
|
||||||
|
ContentType="application/json",
|
||||||
|
Body=encoded_data,
|
||||||
|
CustomAttributes="accept_eula=true",
|
||||||
|
)
|
||||||
|
else:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
request_str = f"""
|
request_str = f"""
|
||||||
response = client.invoke_endpoint(
|
response = client.invoke_endpoint(
|
||||||
|
@ -473,8 +557,6 @@ async def async_completion(
|
||||||
"request_str": request_str,
|
"request_str": request_str,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
encoded_data = json.dumps(data).encode("utf-8")
|
|
||||||
try:
|
|
||||||
response = await client.invoke_endpoint(
|
response = await client.invoke_endpoint(
|
||||||
EndpointName=model,
|
EndpointName=model,
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
|
@ -482,7 +564,10 @@ async def async_completion(
|
||||||
CustomAttributes="accept_eula=true",
|
CustomAttributes="accept_eula=true",
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise SagemakerError(status_code=500, message=f"{str(e)}")
|
error_message = f"{str(e)}"
|
||||||
|
if "Inference Component Name header is required" in error_message:
|
||||||
|
error_message += "\n pass in via `litellm.completion(..., model_id={InferenceComponentName})`"
|
||||||
|
raise SagemakerError(status_code=500, message=error_message)
|
||||||
response = await response["Body"].read()
|
response = await response["Body"].read()
|
||||||
response = response.decode("utf8")
|
response = response.decode("utf8")
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
|
|
@ -289,11 +289,11 @@ def completion(
|
||||||
Part,
|
Part,
|
||||||
GenerationConfig,
|
GenerationConfig,
|
||||||
)
|
)
|
||||||
from google.cloud import aiplatform
|
from google.cloud import aiplatform # type: ignore
|
||||||
from google.protobuf import json_format # type: ignore
|
from google.protobuf import json_format # type: ignore
|
||||||
from google.protobuf.struct_pb2 import Value # type: ignore
|
from google.protobuf.struct_pb2 import Value # type: ignore
|
||||||
from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types
|
from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types # type: ignore
|
||||||
import google.auth
|
import google.auth # type: ignore
|
||||||
|
|
||||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -783,7 +783,7 @@ async def async_completion(
|
||||||
"""
|
"""
|
||||||
Vertex AI Model Garden
|
Vertex AI Model Garden
|
||||||
"""
|
"""
|
||||||
from google.cloud import aiplatform
|
from google.cloud import aiplatform # type: ignore
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
|
@ -969,7 +969,7 @@ async def async_streaming(
|
||||||
)
|
)
|
||||||
response = llm_model.predict_streaming_async(prompt, **optional_params)
|
response = llm_model.predict_streaming_async(prompt, **optional_params)
|
||||||
elif mode == "custom":
|
elif mode == "custom":
|
||||||
from google.cloud import aiplatform
|
from google.cloud import aiplatform # type: ignore
|
||||||
|
|
||||||
stream = optional_params.pop("stream", None)
|
stream = optional_params.pop("stream", None)
|
||||||
|
|
||||||
|
@ -1059,7 +1059,7 @@ def embedding(
|
||||||
)
|
)
|
||||||
|
|
||||||
from vertexai.language_models import TextEmbeddingModel
|
from vertexai.language_models import TextEmbeddingModel
|
||||||
import google.auth
|
import google.auth # type: ignore
|
||||||
|
|
||||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -115,27 +115,57 @@ class LiteLLM:
|
||||||
default_headers: Optional[Mapping[str, str]] = None,
|
default_headers: Optional[Mapping[str, str]] = None,
|
||||||
):
|
):
|
||||||
self.params = locals()
|
self.params = locals()
|
||||||
self.chat = Chat(self.params)
|
self.chat = Chat(self.params, router_obj=None)
|
||||||
|
|
||||||
|
|
||||||
class Chat:
|
class Chat:
|
||||||
def __init__(self, params):
|
def __init__(self, params, router_obj: Optional[Any]):
|
||||||
self.params = params
|
self.params = params
|
||||||
self.completions = Completions(self.params)
|
if self.params.get("acompletion", False) == True:
|
||||||
|
self.params.pop("acompletion")
|
||||||
|
self.completions: Union[AsyncCompletions, Completions] = AsyncCompletions(
|
||||||
|
self.params, router_obj=router_obj
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.completions = Completions(self.params, router_obj=router_obj)
|
||||||
|
|
||||||
|
|
||||||
class Completions:
|
class Completions:
|
||||||
def __init__(self, params):
|
def __init__(self, params, router_obj: Optional[Any]):
|
||||||
self.params = params
|
self.params = params
|
||||||
|
self.router_obj = router_obj
|
||||||
|
|
||||||
def create(self, messages, model=None, **kwargs):
|
def create(self, messages, model=None, **kwargs):
|
||||||
for k, v in kwargs.items():
|
for k, v in kwargs.items():
|
||||||
self.params[k] = v
|
self.params[k] = v
|
||||||
model = model or self.params.get("model")
|
model = model or self.params.get("model")
|
||||||
|
if self.router_obj is not None:
|
||||||
|
response = self.router_obj.completion(
|
||||||
|
model=model, messages=messages, **self.params
|
||||||
|
)
|
||||||
|
else:
|
||||||
response = completion(model=model, messages=messages, **self.params)
|
response = completion(model=model, messages=messages, **self.params)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncCompletions:
|
||||||
|
def __init__(self, params, router_obj: Optional[Any]):
|
||||||
|
self.params = params
|
||||||
|
self.router_obj = router_obj
|
||||||
|
|
||||||
|
async def create(self, messages, model=None, **kwargs):
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
self.params[k] = v
|
||||||
|
model = model or self.params.get("model")
|
||||||
|
if self.router_obj is not None:
|
||||||
|
response = await self.router_obj.acompletion(
|
||||||
|
model=model, messages=messages, **self.params
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = await acompletion(model=model, messages=messages, **self.params)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
@client
|
@client
|
||||||
async def acompletion(
|
async def acompletion(
|
||||||
model: str,
|
model: str,
|
||||||
|
@ -571,6 +601,7 @@ def completion(
|
||||||
"ttl",
|
"ttl",
|
||||||
"cache",
|
"cache",
|
||||||
"no-log",
|
"no-log",
|
||||||
|
"base_model",
|
||||||
]
|
]
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {
|
non_default_params = {
|
||||||
|
@ -639,7 +670,7 @@ def completion(
|
||||||
elif (
|
elif (
|
||||||
input_cost_per_second is not None
|
input_cost_per_second is not None
|
||||||
): # time based pricing just needs cost in place
|
): # time based pricing just needs cost in place
|
||||||
output_cost_per_second = output_cost_per_second or 0.0
|
output_cost_per_second = output_cost_per_second
|
||||||
litellm.register_model(
|
litellm.register_model(
|
||||||
{
|
{
|
||||||
f"{custom_llm_provider}/{model}": {
|
f"{custom_llm_provider}/{model}": {
|
||||||
|
@ -1752,7 +1783,11 @@ def completion(
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
|
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if (
|
||||||
|
"stream" in optional_params
|
||||||
|
and optional_params["stream"] == True
|
||||||
|
and not isinstance(response, CustomStreamWrapper)
|
||||||
|
):
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
if "ai21" in model:
|
if "ai21" in model:
|
||||||
response = CustomStreamWrapper(
|
response = CustomStreamWrapper(
|
||||||
|
@ -2754,28 +2789,25 @@ def embedding(
|
||||||
model_response=EmbeddingResponse(),
|
model_response=EmbeddingResponse(),
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "ollama":
|
elif custom_llm_provider == "ollama":
|
||||||
ollama_input = None
|
api_base = (
|
||||||
if isinstance(input, list) and len(input) > 1:
|
litellm.api_base
|
||||||
raise litellm.BadRequestError(
|
or api_base
|
||||||
message=f"Ollama Embeddings don't support batch embeddings",
|
or get_secret("OLLAMA_API_BASE")
|
||||||
model=model, # type: ignore
|
or "http://localhost:11434"
|
||||||
llm_provider="ollama", # type: ignore
|
|
||||||
)
|
)
|
||||||
if isinstance(input, list) and len(input) == 1:
|
if isinstance(input, str):
|
||||||
ollama_input = "".join(input[0])
|
input = [input]
|
||||||
elif isinstance(input, str):
|
if not all(isinstance(item, str) for item in input):
|
||||||
ollama_input = input
|
|
||||||
else:
|
|
||||||
raise litellm.BadRequestError(
|
raise litellm.BadRequestError(
|
||||||
message=f"Invalid input for ollama embeddings. input={input}",
|
message=f"Invalid input for ollama embeddings. input={input}",
|
||||||
model=model, # type: ignore
|
model=model, # type: ignore
|
||||||
llm_provider="ollama", # type: ignore
|
llm_provider="ollama", # type: ignore
|
||||||
)
|
)
|
||||||
|
if aembedding:
|
||||||
if aembedding == True:
|
|
||||||
response = ollama.ollama_aembeddings(
|
response = ollama.ollama_aembeddings(
|
||||||
|
api_base=api_base,
|
||||||
model=model,
|
model=model,
|
||||||
prompt=ollama_input,
|
prompts=input,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
logging_obj=logging,
|
logging_obj=logging,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/68a21c6e6697f7ca.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/f8da5a6a5b29d249.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[19914,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-df9015da04018cc1.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tXZFkeqtgh-goIRVbw_9q\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-589b47e7a69d316f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[90177,[\"798\",\"static/chunks/798-4baed68da0c5497d.js\",\"931\",\"static/chunks/app/page-a5a04da2a9356785.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DptMjzo5xd96cx0b56k4u\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[19914,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-df9015da04018cc1.js"],""]
|
3:I[90177,["798","static/chunks/798-4baed68da0c5497d.js","931","static/chunks/app/page-a5a04da2a9356785.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["tXZFkeqtgh-goIRVbw_9q",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/68a21c6e6697f7ca.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["DptMjzo5xd96cx0b56k4u",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f8da5a6a5b29d249.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,21 +1,20 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: fake_openai
|
- model_name: fake-openai-endpoint
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/my-fake-model
|
model: openai/my-fake-model
|
||||||
api_key: my-fake-key
|
api_key: my-fake-key
|
||||||
api_base: http://0.0.0.0:8080
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo-1106
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
cache: true
|
max_budget: 600020
|
||||||
cache_params:
|
budget_duration: 30d
|
||||||
type: redis
|
|
||||||
callbacks: ["batch_redis_requests"]
|
|
||||||
# success_callbacks: ["langfuse"]
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
database_url: "postgresql://neondb_owner:hz8tyUlJ5ivV@ep-cool-sunset-a5ywubeh.us-east-2.aws.neon.tech/neondb?sslmode=require"
|
proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||||
|
enable_jwt_auth: True
|
||||||
|
alerting: ["slack"]
|
||||||
|
litellm_jwtauth:
|
||||||
|
admin_jwt_scope: "litellm_proxy_admin"
|
||||||
|
team_jwt_scope: "litellm_team"
|
||||||
|
public_key_ttl: 600
|
|
@ -1,4 +1,5 @@
|
||||||
from pydantic import BaseModel, Extra, Field, root_validator, Json
|
from pydantic import BaseModel, Extra, Field, root_validator, Json, validator
|
||||||
|
from dataclasses import fields
|
||||||
import enum
|
import enum
|
||||||
from typing import Optional, List, Union, Dict, Literal, Any
|
from typing import Optional, List, Union, Dict, Literal, Any
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
@ -14,11 +15,6 @@ def hash_token(token: str):
|
||||||
return hashed_token
|
return hashed_token
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMProxyRoles(enum.Enum):
|
|
||||||
PROXY_ADMIN = "litellm_proxy_admin"
|
|
||||||
USER = "litellm_user"
|
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMBase(BaseModel):
|
class LiteLLMBase(BaseModel):
|
||||||
"""
|
"""
|
||||||
Implements default functions, all pydantic objects should have.
|
Implements default functions, all pydantic objects should have.
|
||||||
|
@ -42,6 +38,135 @@ class LiteLLMBase(BaseModel):
|
||||||
protected_namespaces = ()
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLMRoutes(enum.Enum):
|
||||||
|
openai_routes: List = [ # chat completions
|
||||||
|
"/openai/deployments/{model}/chat/completions",
|
||||||
|
"/chat/completions",
|
||||||
|
"/v1/chat/completions",
|
||||||
|
# completions
|
||||||
|
"/openai/deployments/{model}/completions",
|
||||||
|
"/completions",
|
||||||
|
"/v1/completions",
|
||||||
|
# embeddings
|
||||||
|
"/openai/deployments/{model}/embeddings",
|
||||||
|
"/embeddings",
|
||||||
|
"/v1/embeddings",
|
||||||
|
# image generation
|
||||||
|
"/images/generations",
|
||||||
|
"/v1/images/generations",
|
||||||
|
# audio transcription
|
||||||
|
"/audio/transcriptions",
|
||||||
|
"/v1/audio/transcriptions",
|
||||||
|
# moderations
|
||||||
|
"/moderations",
|
||||||
|
"/v1/moderations",
|
||||||
|
# models
|
||||||
|
"/models",
|
||||||
|
"/v1/models",
|
||||||
|
]
|
||||||
|
|
||||||
|
info_routes: List = ["/key/info", "/team/info", "/user/info", "/model/info"]
|
||||||
|
|
||||||
|
management_routes: List = [ # key
|
||||||
|
"/key/generate",
|
||||||
|
"/key/update",
|
||||||
|
"/key/delete",
|
||||||
|
"/key/info",
|
||||||
|
# user
|
||||||
|
"/user/new",
|
||||||
|
"/user/update",
|
||||||
|
"/user/delete",
|
||||||
|
"/user/info",
|
||||||
|
# team
|
||||||
|
"/team/new",
|
||||||
|
"/team/update",
|
||||||
|
"/team/delete",
|
||||||
|
"/team/info",
|
||||||
|
"/team/block",
|
||||||
|
"/team/unblock",
|
||||||
|
# model
|
||||||
|
"/model/new",
|
||||||
|
"/model/update",
|
||||||
|
"/model/delete",
|
||||||
|
"/model/info",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLM_JWTAuth(LiteLLMBase):
|
||||||
|
"""
|
||||||
|
A class to define the roles and permissions for a LiteLLM Proxy w/ JWT Auth.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
- admin_jwt_scope: The JWT scope required for proxy admin roles.
|
||||||
|
- admin_allowed_routes: list of allowed routes for proxy admin roles.
|
||||||
|
- team_jwt_scope: The JWT scope required for proxy team roles.
|
||||||
|
- team_id_jwt_field: The field in the JWT token that stores the team ID. Default - `client_id`.
|
||||||
|
- team_allowed_routes: list of allowed routes for proxy team roles.
|
||||||
|
- end_user_id_jwt_field: Default - `sub`. The field in the JWT token that stores the end-user ID. Turn this off by setting to `None`. Enables end-user cost tracking.
|
||||||
|
- public_key_ttl: Default - 600s. TTL for caching public JWT keys.
|
||||||
|
|
||||||
|
See `auth_checks.py` for the specific routes
|
||||||
|
"""
|
||||||
|
|
||||||
|
admin_jwt_scope: str = "litellm_proxy_admin"
|
||||||
|
admin_allowed_routes: List[
|
||||||
|
Literal["openai_routes", "info_routes", "management_routes"]
|
||||||
|
] = ["management_routes"]
|
||||||
|
team_jwt_scope: str = "litellm_team"
|
||||||
|
team_id_jwt_field: str = "client_id"
|
||||||
|
team_allowed_routes: List[
|
||||||
|
Literal["openai_routes", "info_routes", "management_routes"]
|
||||||
|
] = ["openai_routes", "info_routes"]
|
||||||
|
end_user_id_jwt_field: Optional[str] = "sub"
|
||||||
|
public_key_ttl: float = 600
|
||||||
|
|
||||||
|
def __init__(self, **kwargs: Any) -> None:
|
||||||
|
# get the attribute names for this Pydantic model
|
||||||
|
allowed_keys = self.__annotations__.keys()
|
||||||
|
|
||||||
|
invalid_keys = set(kwargs.keys()) - allowed_keys
|
||||||
|
|
||||||
|
if invalid_keys:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid arguments provided: {', '.join(invalid_keys)}. Allowed arguments are: {', '.join(allowed_keys)}."
|
||||||
|
)
|
||||||
|
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLMPromptInjectionParams(LiteLLMBase):
|
||||||
|
heuristics_check: bool = False
|
||||||
|
vector_db_check: bool = False
|
||||||
|
llm_api_check: bool = False
|
||||||
|
llm_api_name: Optional[str] = None
|
||||||
|
llm_api_system_prompt: Optional[str] = None
|
||||||
|
llm_api_fail_call_string: Optional[str] = None
|
||||||
|
|
||||||
|
@root_validator(pre=True)
|
||||||
|
def check_llm_api_params(cls, values):
|
||||||
|
llm_api_check = values.get("llm_api_check")
|
||||||
|
if llm_api_check is True:
|
||||||
|
if "llm_api_name" not in values or not values["llm_api_name"]:
|
||||||
|
raise ValueError(
|
||||||
|
"If llm_api_check is set to True, llm_api_name must be provided"
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
"llm_api_system_prompt" not in values
|
||||||
|
or not values["llm_api_system_prompt"]
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
"If llm_api_check is set to True, llm_api_system_prompt must be provided"
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
"llm_api_fail_call_string" not in values
|
||||||
|
or not values["llm_api_fail_call_string"]
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
"If llm_api_check is set to True, llm_api_fail_call_string must be provided"
|
||||||
|
)
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
######### Request Class Definition ######
|
######### Request Class Definition ######
|
||||||
class ProxyChatCompletionRequest(LiteLLMBase):
|
class ProxyChatCompletionRequest(LiteLLMBase):
|
||||||
model: str
|
model: str
|
||||||
|
@ -180,7 +305,7 @@ class GenerateKeyResponse(GenerateKeyRequest):
|
||||||
key: str
|
key: str
|
||||||
key_name: Optional[str] = None
|
key_name: Optional[str] = None
|
||||||
expires: Optional[datetime]
|
expires: Optional[datetime]
|
||||||
user_id: str
|
user_id: Optional[str] = None
|
||||||
|
|
||||||
@root_validator(pre=True)
|
@root_validator(pre=True)
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
|
@ -274,6 +399,7 @@ class TeamBase(LiteLLMBase):
|
||||||
rpm_limit: Optional[int] = None
|
rpm_limit: Optional[int] = None
|
||||||
max_budget: Optional[float] = None
|
max_budget: Optional[float] = None
|
||||||
models: list = []
|
models: list = []
|
||||||
|
blocked: bool = False
|
||||||
|
|
||||||
|
|
||||||
class NewTeamRequest(TeamBase):
|
class NewTeamRequest(TeamBase):
|
||||||
|
@ -301,19 +427,18 @@ class TeamMemberDeleteRequest(LiteLLMBase):
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
|
||||||
class UpdateTeamRequest(LiteLLMBase):
|
class UpdateTeamRequest(TeamBase):
|
||||||
team_id: str # required
|
team_id: str # required
|
||||||
team_alias: Optional[str] = None
|
|
||||||
admins: Optional[list] = None
|
|
||||||
members: Optional[list] = None
|
|
||||||
members_with_roles: Optional[List[Member]] = None
|
|
||||||
metadata: Optional[dict] = None
|
|
||||||
|
|
||||||
|
|
||||||
class DeleteTeamRequest(LiteLLMBase):
|
class DeleteTeamRequest(LiteLLMBase):
|
||||||
team_ids: List[str] # required
|
team_ids: List[str] # required
|
||||||
|
|
||||||
|
|
||||||
|
class BlockTeamRequest(LiteLLMBase):
|
||||||
|
team_id: str # required
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_TeamTable(TeamBase):
|
class LiteLLM_TeamTable(TeamBase):
|
||||||
spend: Optional[float] = None
|
spend: Optional[float] = None
|
||||||
max_parallel_requests: Optional[int] = None
|
max_parallel_requests: Optional[int] = None
|
||||||
|
@ -498,6 +623,9 @@ class ConfigGeneralSettings(LiteLLMBase):
|
||||||
ui_access_mode: Optional[Literal["admin_only", "all"]] = Field(
|
ui_access_mode: Optional[Literal["admin_only", "all"]] = Field(
|
||||||
"all", description="Control access to the Proxy UI"
|
"all", description="Control access to the Proxy UI"
|
||||||
)
|
)
|
||||||
|
allowed_routes: Optional[List] = Field(
|
||||||
|
None, description="Proxy API Endpoints you want users to be able to access"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ConfigYAML(LiteLLMBase):
|
class ConfigYAML(LiteLLMBase):
|
||||||
|
@ -565,6 +693,8 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
|
||||||
team_tpm_limit: Optional[int] = None
|
team_tpm_limit: Optional[int] = None
|
||||||
team_rpm_limit: Optional[int] = None
|
team_rpm_limit: Optional[int] = None
|
||||||
team_max_budget: Optional[float] = None
|
team_max_budget: Optional[float] = None
|
||||||
|
team_models: List = []
|
||||||
|
team_blocked: bool = False
|
||||||
soft_budget: Optional[float] = None
|
soft_budget: Optional[float] = None
|
||||||
team_model_aliases: Optional[Dict] = None
|
team_model_aliases: Optional[Dict] = None
|
||||||
|
|
||||||
|
|
|
@ -8,45 +8,160 @@ Run checks for:
|
||||||
2. If user is in budget
|
2. If user is in budget
|
||||||
3. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
3. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||||
"""
|
"""
|
||||||
from litellm.proxy._types import LiteLLM_UserTable, LiteLLM_EndUserTable
|
from litellm.proxy._types import (
|
||||||
from typing import Optional
|
LiteLLM_UserTable,
|
||||||
|
LiteLLM_EndUserTable,
|
||||||
|
LiteLLM_JWTAuth,
|
||||||
|
LiteLLM_TeamTable,
|
||||||
|
LiteLLMRoutes,
|
||||||
|
)
|
||||||
|
from typing import Optional, Literal, Union
|
||||||
from litellm.proxy.utils import PrismaClient
|
from litellm.proxy.utils import PrismaClient
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes.value
|
||||||
|
|
||||||
|
|
||||||
def common_checks(
|
def common_checks(
|
||||||
request_body: dict,
|
request_body: dict,
|
||||||
user_object: LiteLLM_UserTable,
|
team_object: LiteLLM_TeamTable,
|
||||||
end_user_object: Optional[LiteLLM_EndUserTable],
|
end_user_object: Optional[LiteLLM_EndUserTable],
|
||||||
|
global_proxy_spend: Optional[float],
|
||||||
|
general_settings: dict,
|
||||||
|
route: str,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Common checks across jwt + key-based auth.
|
||||||
|
|
||||||
|
1. If team is blocked
|
||||||
|
2. If team can call model
|
||||||
|
3. If team is in budget
|
||||||
|
4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||||
|
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
|
||||||
|
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||||
|
"""
|
||||||
_model = request_body.get("model", None)
|
_model = request_body.get("model", None)
|
||||||
# 1. If user can call model
|
if team_object.blocked == True:
|
||||||
|
raise Exception(
|
||||||
|
f"Team={team_object.team_id} is blocked. Update via `/team/unblock` if your admin."
|
||||||
|
)
|
||||||
|
# 2. If user can call model
|
||||||
if (
|
if (
|
||||||
_model is not None
|
_model is not None
|
||||||
and len(user_object.models) > 0
|
and len(team_object.models) > 0
|
||||||
and _model not in user_object.models
|
and _model not in team_object.models
|
||||||
):
|
):
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"User={user_object.user_id} not allowed to call model={_model}. Allowed user models = {user_object.models}"
|
f"Team={team_object.team_id} not allowed to call model={_model}. Allowed team models = {team_object.models}"
|
||||||
)
|
)
|
||||||
# 2. If user is in budget
|
# 3. If team is in budget
|
||||||
if (
|
if (
|
||||||
user_object.max_budget is not None
|
team_object.max_budget is not None
|
||||||
and user_object.spend > user_object.max_budget
|
and team_object.spend is not None
|
||||||
|
and team_object.spend > team_object.max_budget
|
||||||
):
|
):
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"User={user_object.user_id} over budget. Spend={user_object.spend}, Budget={user_object.max_budget}"
|
f"Team={team_object.team_id} over budget. Spend={team_object.spend}, Budget={team_object.max_budget}"
|
||||||
)
|
)
|
||||||
# 3. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
# 4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||||
if end_user_object is not None and end_user_object.litellm_budget_table is not None:
|
if end_user_object is not None and end_user_object.litellm_budget_table is not None:
|
||||||
end_user_budget = end_user_object.litellm_budget_table.max_budget
|
end_user_budget = end_user_object.litellm_budget_table.max_budget
|
||||||
if end_user_budget is not None and end_user_object.spend > end_user_budget:
|
if end_user_budget is not None and end_user_object.spend > end_user_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
|
f"ExceededBudget: End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
|
||||||
|
)
|
||||||
|
# 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
|
||||||
|
if (
|
||||||
|
general_settings.get("enforce_user_param", None) is not None
|
||||||
|
and general_settings["enforce_user_param"] == True
|
||||||
|
):
|
||||||
|
if route in LiteLLMRoutes.openai_routes.value and "user" not in request_body:
|
||||||
|
raise Exception(
|
||||||
|
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||||
|
)
|
||||||
|
# 6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||||
|
if litellm.max_budget > 0 and global_proxy_spend is not None:
|
||||||
|
if global_proxy_spend > litellm.max_budget:
|
||||||
|
raise Exception(
|
||||||
|
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _allowed_routes_check(user_route: str, allowed_routes: list) -> bool:
|
||||||
|
for allowed_route in allowed_routes:
|
||||||
|
if (
|
||||||
|
allowed_route == LiteLLMRoutes.openai_routes.name
|
||||||
|
and user_route in LiteLLMRoutes.openai_routes.value
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
elif (
|
||||||
|
allowed_route == LiteLLMRoutes.info_routes.name
|
||||||
|
and user_route in LiteLLMRoutes.info_routes.value
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
elif (
|
||||||
|
allowed_route == LiteLLMRoutes.management_routes.name
|
||||||
|
and user_route in LiteLLMRoutes.management_routes.value
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
elif allowed_route == user_route:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_routes_check(
|
||||||
|
user_role: Literal["proxy_admin", "team"],
|
||||||
|
user_route: str,
|
||||||
|
litellm_proxy_roles: LiteLLM_JWTAuth,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Check if user -> not admin - allowed to access these routes
|
||||||
|
"""
|
||||||
|
|
||||||
|
if user_role == "proxy_admin":
|
||||||
|
if litellm_proxy_roles.admin_allowed_routes is None:
|
||||||
|
is_allowed = _allowed_routes_check(
|
||||||
|
user_route=user_route, allowed_routes=["management_routes"]
|
||||||
|
)
|
||||||
|
return is_allowed
|
||||||
|
elif litellm_proxy_roles.admin_allowed_routes is not None:
|
||||||
|
is_allowed = _allowed_routes_check(
|
||||||
|
user_route=user_route,
|
||||||
|
allowed_routes=litellm_proxy_roles.admin_allowed_routes,
|
||||||
|
)
|
||||||
|
return is_allowed
|
||||||
|
|
||||||
|
elif user_role == "team":
|
||||||
|
if litellm_proxy_roles.team_allowed_routes is None:
|
||||||
|
"""
|
||||||
|
By default allow a team to call openai + info routes
|
||||||
|
"""
|
||||||
|
is_allowed = _allowed_routes_check(
|
||||||
|
user_route=user_route, allowed_routes=["openai_routes", "info_routes"]
|
||||||
|
)
|
||||||
|
return is_allowed
|
||||||
|
elif litellm_proxy_roles.team_allowed_routes is not None:
|
||||||
|
is_allowed = _allowed_routes_check(
|
||||||
|
user_route=user_route,
|
||||||
|
allowed_routes=litellm_proxy_roles.team_allowed_routes,
|
||||||
|
)
|
||||||
|
return is_allowed
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_actual_routes(allowed_routes: list) -> list:
|
||||||
|
actual_routes: list = []
|
||||||
|
for route_name in allowed_routes:
|
||||||
|
try:
|
||||||
|
route_value = LiteLLMRoutes[route_name].value
|
||||||
|
actual_routes = actual_routes + route_value
|
||||||
|
except KeyError:
|
||||||
|
actual_routes.append(route_name)
|
||||||
|
return actual_routes
|
||||||
|
|
||||||
|
|
||||||
async def get_end_user_object(
|
async def get_end_user_object(
|
||||||
end_user_id: Optional[str],
|
end_user_id: Optional[str],
|
||||||
prisma_client: Optional[PrismaClient],
|
prisma_client: Optional[PrismaClient],
|
||||||
|
@ -82,3 +197,75 @@ async def get_end_user_object(
|
||||||
return LiteLLM_EndUserTable(**response.dict())
|
return LiteLLM_EndUserTable(**response.dict())
|
||||||
except Exception as e: # if end-user not in db
|
except Exception as e: # if end-user not in db
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_user_object(self, user_id: str) -> LiteLLM_UserTable:
|
||||||
|
"""
|
||||||
|
- Check if user id in proxy User Table
|
||||||
|
- if valid, return LiteLLM_UserTable object with defined limits
|
||||||
|
- if not, then raise an error
|
||||||
|
"""
|
||||||
|
if self.prisma_client is None:
|
||||||
|
raise Exception(
|
||||||
|
"No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
|
||||||
|
)
|
||||||
|
|
||||||
|
# check if in cache
|
||||||
|
cached_user_obj = self.user_api_key_cache.async_get_cache(key=user_id)
|
||||||
|
if cached_user_obj is not None:
|
||||||
|
if isinstance(cached_user_obj, dict):
|
||||||
|
return LiteLLM_UserTable(**cached_user_obj)
|
||||||
|
elif isinstance(cached_user_obj, LiteLLM_UserTable):
|
||||||
|
return cached_user_obj
|
||||||
|
# else, check db
|
||||||
|
try:
|
||||||
|
response = await self.prisma_client.db.litellm_usertable.find_unique(
|
||||||
|
where={"user_id": user_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response is None:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
return LiteLLM_UserTable(**response.dict())
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(
|
||||||
|
f"User doesn't exist in db. User={user_id}. Create user via `/user/new` call."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_team_object(
|
||||||
|
team_id: str,
|
||||||
|
prisma_client: Optional[PrismaClient],
|
||||||
|
user_api_key_cache: DualCache,
|
||||||
|
) -> LiteLLM_TeamTable:
|
||||||
|
"""
|
||||||
|
- Check if team id in proxy Team Table
|
||||||
|
- if valid, return LiteLLM_TeamTable object with defined limits
|
||||||
|
- if not, then raise an error
|
||||||
|
"""
|
||||||
|
if prisma_client is None:
|
||||||
|
raise Exception(
|
||||||
|
"No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
|
||||||
|
)
|
||||||
|
|
||||||
|
# check if in cache
|
||||||
|
cached_team_obj = user_api_key_cache.async_get_cache(key=team_id)
|
||||||
|
if cached_team_obj is not None:
|
||||||
|
if isinstance(cached_team_obj, dict):
|
||||||
|
return LiteLLM_TeamTable(**cached_team_obj)
|
||||||
|
elif isinstance(cached_team_obj, LiteLLM_TeamTable):
|
||||||
|
return cached_team_obj
|
||||||
|
# else, check db
|
||||||
|
try:
|
||||||
|
response = await prisma_client.db.litellm_teamtable.find_unique(
|
||||||
|
where={"team_id": team_id}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response is None:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
return LiteLLM_TeamTable(**response.dict())
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(
|
||||||
|
f"Team doesn't exist in db. Team={team_id}. Create team via `/team/new` call."
|
||||||
|
)
|
||||||
|
|
|
@ -6,50 +6,17 @@ Currently only supports admin.
|
||||||
JWT token must have 'litellm_proxy_admin' in scope.
|
JWT token must have 'litellm_proxy_admin' in scope.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import httpx
|
|
||||||
import jwt
|
import jwt
|
||||||
from jwt.algorithms import RSAAlgorithm
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.proxy._types import LiteLLMProxyRoles, LiteLLM_UserTable
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable
|
||||||
from litellm.proxy.utils import PrismaClient
|
from litellm.proxy.utils import PrismaClient
|
||||||
|
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class HTTPHandler:
|
|
||||||
def __init__(self, concurrent_limit=1000):
|
|
||||||
# Create a client with a connection pool
|
|
||||||
self.client = httpx.AsyncClient(
|
|
||||||
limits=httpx.Limits(
|
|
||||||
max_connections=concurrent_limit,
|
|
||||||
max_keepalive_connections=concurrent_limit,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
async def close(self):
|
|
||||||
# Close the client when you're done with it
|
|
||||||
await self.client.aclose()
|
|
||||||
|
|
||||||
async def get(
|
|
||||||
self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None
|
|
||||||
):
|
|
||||||
response = await self.client.get(url, params=params, headers=headers)
|
|
||||||
return response
|
|
||||||
|
|
||||||
async def post(
|
|
||||||
self,
|
|
||||||
url: str,
|
|
||||||
data: Optional[dict] = None,
|
|
||||||
params: Optional[dict] = None,
|
|
||||||
headers: Optional[dict] = None,
|
|
||||||
):
|
|
||||||
response = await self.client.post(
|
|
||||||
url, data=data, params=params, headers=headers
|
|
||||||
)
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
class JWTHandler:
|
class JWTHandler:
|
||||||
"""
|
"""
|
||||||
- treat the sub id passed in as the user id
|
- treat the sub id passed in as the user id
|
||||||
|
@ -67,105 +34,131 @@ class JWTHandler:
|
||||||
self.http_handler = HTTPHandler()
|
self.http_handler = HTTPHandler()
|
||||||
|
|
||||||
def update_environment(
|
def update_environment(
|
||||||
self, prisma_client: Optional[PrismaClient], user_api_key_cache: DualCache
|
self,
|
||||||
|
prisma_client: Optional[PrismaClient],
|
||||||
|
user_api_key_cache: DualCache,
|
||||||
|
litellm_jwtauth: LiteLLM_JWTAuth,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.prisma_client = prisma_client
|
self.prisma_client = prisma_client
|
||||||
self.user_api_key_cache = user_api_key_cache
|
self.user_api_key_cache = user_api_key_cache
|
||||||
|
self.litellm_jwtauth = litellm_jwtauth
|
||||||
|
|
||||||
def is_jwt(self, token: str):
|
def is_jwt(self, token: str):
|
||||||
parts = token.split(".")
|
parts = token.split(".")
|
||||||
return len(parts) == 3
|
return len(parts) == 3
|
||||||
|
|
||||||
def is_admin(self, scopes: list) -> bool:
|
def is_admin(self, scopes: list) -> bool:
|
||||||
if LiteLLMProxyRoles.PROXY_ADMIN.value in scopes:
|
if self.litellm_jwtauth.admin_jwt_scope in scopes:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_user_id(self, token: dict, default_value: str) -> str:
|
def is_team(self, scopes: list) -> bool:
|
||||||
|
if self.litellm_jwtauth.team_jwt_scope in scopes:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_end_user_id(self, token: dict, default_value: Optional[str]) -> str:
|
||||||
try:
|
try:
|
||||||
user_id = token["sub"]
|
if self.litellm_jwtauth.end_user_id_jwt_field is not None:
|
||||||
|
user_id = token[self.litellm_jwtauth.end_user_id_jwt_field]
|
||||||
|
else:
|
||||||
|
user_id = None
|
||||||
except KeyError:
|
except KeyError:
|
||||||
user_id = default_value
|
user_id = default_value
|
||||||
return user_id
|
return user_id
|
||||||
|
|
||||||
def get_team_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
|
def get_team_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
|
||||||
try:
|
try:
|
||||||
team_id = token["azp"]
|
team_id = token[self.litellm_jwtauth.team_id_jwt_field]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
team_id = default_value
|
team_id = default_value
|
||||||
return team_id
|
return team_id
|
||||||
|
|
||||||
async def get_user_object(self, user_id: str) -> LiteLLM_UserTable:
|
|
||||||
"""
|
|
||||||
- Check if user id in proxy User Table
|
|
||||||
- if valid, return LiteLLM_UserTable object with defined limits
|
|
||||||
- if not, then raise an error
|
|
||||||
"""
|
|
||||||
if self.prisma_client is None:
|
|
||||||
raise Exception(
|
|
||||||
"No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
|
|
||||||
)
|
|
||||||
|
|
||||||
# check if in cache
|
|
||||||
cached_user_obj = self.user_api_key_cache.async_get_cache(key=user_id)
|
|
||||||
if cached_user_obj is not None:
|
|
||||||
if isinstance(cached_user_obj, dict):
|
|
||||||
return LiteLLM_UserTable(**cached_user_obj)
|
|
||||||
elif isinstance(cached_user_obj, LiteLLM_UserTable):
|
|
||||||
return cached_user_obj
|
|
||||||
# else, check db
|
|
||||||
try:
|
|
||||||
response = await self.prisma_client.db.litellm_usertable.find_unique(
|
|
||||||
where={"user_id": user_id}
|
|
||||||
)
|
|
||||||
|
|
||||||
if response is None:
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
return LiteLLM_UserTable(**response.dict())
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(
|
|
||||||
f"User doesn't exist in db. User={user_id}. Create user via `/user/new` call."
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_scopes(self, token: dict) -> list:
|
def get_scopes(self, token: dict) -> list:
|
||||||
try:
|
try:
|
||||||
|
if isinstance(token["scope"], str):
|
||||||
# Assuming the scopes are stored in 'scope' claim and are space-separated
|
# Assuming the scopes are stored in 'scope' claim and are space-separated
|
||||||
scopes = token["scope"].split()
|
scopes = token["scope"].split()
|
||||||
|
elif isinstance(token["scope"], list):
|
||||||
|
scopes = token["scope"]
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
f"Unmapped scope type - {type(token['scope'])}. Supported types - list, str."
|
||||||
|
)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
scopes = []
|
scopes = []
|
||||||
return scopes
|
return scopes
|
||||||
|
|
||||||
async def auth_jwt(self, token: str) -> dict:
|
async def get_public_key(self, kid: Optional[str]) -> dict:
|
||||||
keys_url = os.getenv("JWT_PUBLIC_KEY_URL")
|
keys_url = os.getenv("JWT_PUBLIC_KEY_URL")
|
||||||
|
|
||||||
if keys_url is None:
|
if keys_url is None:
|
||||||
raise Exception("Missing JWT Public Key URL from environment.")
|
raise Exception("Missing JWT Public Key URL from environment.")
|
||||||
|
|
||||||
|
cached_keys = await self.user_api_key_cache.async_get_cache(
|
||||||
|
"litellm_jwt_auth_keys"
|
||||||
|
)
|
||||||
|
if cached_keys is None:
|
||||||
response = await self.http_handler.get(keys_url)
|
response = await self.http_handler.get(keys_url)
|
||||||
|
|
||||||
keys = response.json()["keys"]
|
keys = response.json()["keys"]
|
||||||
|
|
||||||
header = jwt.get_unverified_header(token)
|
await self.user_api_key_cache.async_set_cache(
|
||||||
kid = header["kid"]
|
key="litellm_jwt_auth_keys",
|
||||||
|
value=keys,
|
||||||
|
ttl=self.litellm_jwtauth.public_key_ttl, # cache for 10 mins
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
keys = cached_keys
|
||||||
|
|
||||||
|
public_key: Optional[dict] = None
|
||||||
|
|
||||||
|
if len(keys) == 1:
|
||||||
|
if kid is None or key["kid"] == kid:
|
||||||
|
public_key = keys[0]
|
||||||
|
elif len(keys) > 1:
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if key["kid"] == kid:
|
if kid is not None and key["kid"] == kid:
|
||||||
jwk = {
|
public_key = key
|
||||||
"kty": key["kty"],
|
|
||||||
"kid": key["kid"],
|
if public_key is None:
|
||||||
"n": key["n"],
|
raise Exception(
|
||||||
"e": key["e"],
|
f"No matching public key found. kid={kid}, keys_url={keys_url}, cached_keys={cached_keys}"
|
||||||
}
|
)
|
||||||
public_key = RSAAlgorithm.from_jwk(json.dumps(jwk))
|
|
||||||
|
return public_key
|
||||||
|
|
||||||
|
async def auth_jwt(self, token: str) -> dict:
|
||||||
|
from jwt.algorithms import RSAAlgorithm
|
||||||
|
|
||||||
|
header = jwt.get_unverified_header(token)
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug("header: %s", header)
|
||||||
|
|
||||||
|
kid = header.get("kid", None)
|
||||||
|
|
||||||
|
public_key = await self.get_public_key(kid=kid)
|
||||||
|
|
||||||
|
if public_key is not None and isinstance(public_key, dict):
|
||||||
|
jwk = {}
|
||||||
|
if "kty" in public_key:
|
||||||
|
jwk["kty"] = public_key["kty"]
|
||||||
|
if "kid" in public_key:
|
||||||
|
jwk["kid"] = public_key["kid"]
|
||||||
|
if "n" in public_key:
|
||||||
|
jwk["n"] = public_key["n"]
|
||||||
|
if "e" in public_key:
|
||||||
|
jwk["e"] = public_key["e"]
|
||||||
|
|
||||||
|
public_key_rsa = RSAAlgorithm.from_jwk(json.dumps(jwk))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# decode the token using the public key
|
# decode the token using the public key
|
||||||
payload = jwt.decode(
|
payload = jwt.decode(
|
||||||
token,
|
token,
|
||||||
public_key, # type: ignore
|
public_key_rsa, # type: ignore
|
||||||
algorithms=["RS256"],
|
algorithms=["RS256"],
|
||||||
audience="account",
|
options={"verify_aud": False},
|
||||||
)
|
)
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|