forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_call_id_in_response
This commit is contained in:
commit
72f1c9181d
119 changed files with 4737 additions and 1868 deletions
|
@ -243,7 +243,102 @@ jobs:
|
|||
command: |
|
||||
pwd
|
||||
ls
|
||||
python -m pytest -vv tests/ -x --junitxml=test-results/junit.xml --durations=5
|
||||
python -m pytest -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests
|
||||
no_output_timeout: 120m
|
||||
|
||||
# Store test results
|
||||
- store_test_results:
|
||||
path: test-results
|
||||
proxy_log_to_otel_tests:
|
||||
machine:
|
||||
image: ubuntu-2204:2023.10.1
|
||||
resource_class: xlarge
|
||||
working_directory: ~/project
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Install Docker CLI (In case it's not already installed)
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
|
||||
- run:
|
||||
name: Install Python 3.9
|
||||
command: |
|
||||
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
|
||||
bash miniconda.sh -b -p $HOME/miniconda
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
conda init bash
|
||||
source ~/.bashrc
|
||||
conda create -n myenv python=3.9 -y
|
||||
conda activate myenv
|
||||
python --version
|
||||
- run:
|
||||
name: Install Dependencies
|
||||
command: |
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install aiohttp
|
||||
pip install openai
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install -r .circleci/requirements.txt
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-mock==3.12.0"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install mypy
|
||||
pip install pyarrow
|
||||
pip install numpydoc
|
||||
pip install prisma
|
||||
pip install fastapi
|
||||
pip install jsonschema
|
||||
pip install "httpx==0.24.1"
|
||||
pip install "anyio==3.7.1"
|
||||
pip install "asyncio==3.4.3"
|
||||
pip install "PyGithub==1.59.1"
|
||||
- run:
|
||||
name: Build Docker image
|
||||
command: docker build -t my-app:latest -f Dockerfile.database .
|
||||
- run:
|
||||
name: Run Docker container
|
||||
# intentionally give bad redis credentials here
|
||||
# the OTEL test - should get this as a trace
|
||||
command: |
|
||||
docker run -d \
|
||||
-p 4000:4000 \
|
||||
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
||||
-e REDIS_HOST=$REDIS_HOST \
|
||||
-e REDIS_PASSWORD=$REDIS_PASSWORD \
|
||||
-e REDIS_PORT=$REDIS_PORT \
|
||||
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||
-e LITELLM_LICENSE=$LITELLM_LICENSE \
|
||||
-e OTEL_EXPORTER="in_memory" \
|
||||
--name my-app \
|
||||
-v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \
|
||||
my-app:latest \
|
||||
--config /app/config.yaml \
|
||||
--port 4000 \
|
||||
--detailed_debug \
|
||||
- run:
|
||||
name: Install curl and dockerize
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y curl
|
||||
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
|
||||
- run:
|
||||
name: Start outputting logs
|
||||
command: docker logs -f my-app
|
||||
background: true
|
||||
- run:
|
||||
name: Wait for app to be ready
|
||||
command: dockerize -wait http://localhost:4000 -timeout 5m
|
||||
- run:
|
||||
name: Run tests
|
||||
command: |
|
||||
pwd
|
||||
ls
|
||||
python -m pytest -vv tests/otel_tests/test_otel.py -x --junitxml=test-results/junit.xml --durations=5
|
||||
no_output_timeout: 120m
|
||||
|
||||
# Store test results
|
||||
|
@ -337,6 +432,12 @@ workflows:
|
|||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- proxy_log_to_otel_tests:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
- /litellm_.*/
|
||||
- installing_litellm_on_python:
|
||||
filters:
|
||||
branches:
|
||||
|
@ -347,6 +448,7 @@ workflows:
|
|||
requires:
|
||||
- local_testing
|
||||
- build_and_test
|
||||
- proxy_log_to_otel_tests
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
|
|
|
@ -1,88 +0,0 @@
|
|||
apiVersion: v1
|
||||
entries:
|
||||
postgresql:
|
||||
- annotations:
|
||||
category: Database
|
||||
images: |
|
||||
- name: os-shell
|
||||
image: docker.io/bitnami/os-shell:12-debian-12-r16
|
||||
- name: postgres-exporter
|
||||
image: docker.io/bitnami/postgres-exporter:0.15.0-debian-12-r14
|
||||
- name: postgresql
|
||||
image: docker.io/bitnami/postgresql:16.2.0-debian-12-r6
|
||||
licenses: Apache-2.0
|
||||
apiVersion: v2
|
||||
appVersion: 16.2.0
|
||||
created: "2024-07-08T11:05:19.312515+08:00"
|
||||
dependencies:
|
||||
- name: common
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
tags:
|
||||
- bitnami-common
|
||||
version: 2.x.x
|
||||
description: PostgreSQL (Postgres) is an open source object-relational database
|
||||
known for reliability and data integrity. ACID-compliant, it supports foreign
|
||||
keys, joins, views, triggers and stored procedures.
|
||||
digest: 3c8125526b06833df32e2f626db34aeaedb29d38f03d15349db6604027d4a167
|
||||
home: https://bitnami.com
|
||||
icon: https://bitnami.com/assets/stacks/postgresql/img/postgresql-stack-220x234.png
|
||||
keywords:
|
||||
- postgresql
|
||||
- postgres
|
||||
- database
|
||||
- sql
|
||||
- replication
|
||||
- cluster
|
||||
maintainers:
|
||||
- name: VMware, Inc.
|
||||
url: https://github.com/bitnami/charts
|
||||
name: postgresql
|
||||
sources:
|
||||
- https://github.com/bitnami/charts/tree/main/bitnami/postgresql
|
||||
urls:
|
||||
- https://berriai.github.io/litellm/charts/postgresql-14.3.1.tgz
|
||||
version: 14.3.1
|
||||
redis:
|
||||
- annotations:
|
||||
category: Database
|
||||
images: |
|
||||
- name: kubectl
|
||||
image: docker.io/bitnami/kubectl:1.29.2-debian-12-r3
|
||||
- name: os-shell
|
||||
image: docker.io/bitnami/os-shell:12-debian-12-r16
|
||||
- name: redis
|
||||
image: docker.io/bitnami/redis:7.2.4-debian-12-r9
|
||||
- name: redis-exporter
|
||||
image: docker.io/bitnami/redis-exporter:1.58.0-debian-12-r4
|
||||
- name: redis-sentinel
|
||||
image: docker.io/bitnami/redis-sentinel:7.2.4-debian-12-r7
|
||||
licenses: Apache-2.0
|
||||
apiVersion: v2
|
||||
appVersion: 7.2.4
|
||||
created: "2024-07-08T11:05:19.317065+08:00"
|
||||
dependencies:
|
||||
- name: common
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
tags:
|
||||
- bitnami-common
|
||||
version: 2.x.x
|
||||
description: Redis(R) is an open source, advanced key-value store. It is often
|
||||
referred to as a data structure server since keys can contain strings, hashes,
|
||||
lists, sets and sorted sets.
|
||||
digest: b2fa1835f673a18002ca864c54fadac3c33789b26f6c5e58e2851b0b14a8f984
|
||||
home: https://bitnami.com
|
||||
icon: https://bitnami.com/assets/stacks/redis/img/redis-stack-220x234.png
|
||||
keywords:
|
||||
- redis
|
||||
- keyvalue
|
||||
- database
|
||||
maintainers:
|
||||
- name: VMware, Inc.
|
||||
url: https://github.com/bitnami/charts
|
||||
name: redis
|
||||
sources:
|
||||
- https://github.com/bitnami/charts/tree/main/bitnami/redis
|
||||
urls:
|
||||
- https://berriai.github.io/litellm/charts/redis-18.19.1.tgz
|
||||
version: 18.19.1
|
||||
generated: "2024-07-08T11:05:19.308028+08:00"
|
54
docs/my-website/docs/anthropic_completion.md
Normal file
54
docs/my-website/docs/anthropic_completion.md
Normal file
|
@ -0,0 +1,54 @@
|
|||
# [BETA] Anthropic `/v1/messages`
|
||||
|
||||
Call 100+ LLMs in the Anthropic format.
|
||||
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: my-test-model
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||
-H 'x-api-key: sk-1234' \
|
||||
-H 'content-type: application/json' \
|
||||
-D '{
|
||||
"model": "my-test-model",
|
||||
"max_tokens": 1024,
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, world"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
## Test with Anthropic SDK
|
||||
|
||||
```python
|
||||
import os
|
||||
from anthropic import Anthropic
|
||||
|
||||
client = Anthropic(api_key="sk-1234", base_url="http://0.0.0.0:4000") # 👈 CONNECT TO PROXY
|
||||
|
||||
message = client.messages.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, Claude",
|
||||
}
|
||||
],
|
||||
model="my-test-model", # 👈 set 'model_name'
|
||||
)
|
||||
print(message.content)
|
||||
```
|
|
@ -26,6 +26,7 @@ Call an existing Assistant.
|
|||
|
||||
- Run the Assistant on the Thread to generate a response by calling the model and the tools.
|
||||
|
||||
### SDK + PROXY
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
|
@ -281,3 +282,31 @@ curl -X POST 'http://0.0.0.0:4000/threads/{thread_id}/runs' \
|
|||
</Tabs>
|
||||
|
||||
## [👉 Proxy API Reference](https://litellm-api.up.railway.app/#/assistants)
|
||||
|
||||
## OpenAI-Compatible APIs
|
||||
|
||||
To call openai-compatible Assistants API's (eg. Astra Assistants API), just add `openai/` to the model name:
|
||||
|
||||
|
||||
**config**
|
||||
```yaml
|
||||
assistant_settings:
|
||||
custom_llm_provider: openai
|
||||
litellm_params:
|
||||
api_key: os.environ/ASTRA_API_KEY
|
||||
api_base: os.environ/ASTRA_API_BASE
|
||||
```
|
||||
|
||||
**curl**
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:4000/v1/assistants" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"instructions": "You are a personal math tutor. When asked a question, write and run Python code to answer the question.",
|
||||
"name": "Math Tutor",
|
||||
"tools": [{"type": "code_interpreter"}],
|
||||
"model": "openai/<my-astra-model-name>"
|
||||
}'
|
||||
```
|
34
docs/my-website/docs/data_security.md
Normal file
34
docs/my-website/docs/data_security.md
Normal file
|
@ -0,0 +1,34 @@
|
|||
# Data Privacy and Security
|
||||
|
||||
## Security Measures
|
||||
|
||||
### LiteLLM Cloud
|
||||
|
||||
- We encrypt all data stored using your `LITELLM_MASTER_KEY` and in transit using TLS.
|
||||
- Our database and application run on GCP, AWS infrastructure, partly managed by NeonDB.
|
||||
- US data region: Northern California (AWS/GCP `us-west-1`) & Virginia (AWS `us-east-1`)
|
||||
- EU data region Germany/Frankfurt (AWS/GCP `eu-central-1`)
|
||||
- All users have access to SSO (Single Sign-On) through OAuth 2.0 with Google, Okta, Microsoft, KeyCloak.
|
||||
- Audit Logs with retention policy
|
||||
- Control Allowed IP Addresses that can access your Cloud LiteLLM Instance
|
||||
|
||||
For security inquiries, please contact us at support@berri.ai
|
||||
|
||||
### Supported data regions for LiteLLM Cloud
|
||||
|
||||
LiteLLM supports the following data regions:
|
||||
|
||||
- US, Northern California (AWS/GCP `us-west-1`)
|
||||
- Europe, Frankfurt, Germany (AWS/GCP `eu-central-1`)
|
||||
|
||||
All data, user accounts, and infrastructure are completely separated between these two regions
|
||||
|
||||
### Security Vulnerability Reporting Guidelines
|
||||
|
||||
We value the security community's role in protecting our systems and users. To report a security vulnerability:
|
||||
|
||||
- Email support@berri.ai with details
|
||||
- Include steps to reproduce the issue
|
||||
- Provide any relevant additional information
|
||||
|
||||
We'll review all reports promptly. Note that we don't currently offer a bug bounty program.
|
|
@ -24,6 +24,7 @@ This covers:
|
|||
- ✅ [JWT-Auth](../docs/proxy/token_auth.md)
|
||||
- ✅ [Control available public, private routes](./proxy/enterprise#control-available-public-private-routes)
|
||||
- ✅ [[BETA] AWS Key Manager v2 - Key Decryption](./proxy/enterprise#beta-aws-key-manager---key-decryption)
|
||||
- ✅ IP address‑based access control lists
|
||||
- ✅ Track Request IP Address
|
||||
- ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
|
||||
- ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests)
|
||||
|
|
|
@ -21,6 +21,14 @@ See our status page for [**live reliability**](https://status.litellm.ai/)
|
|||
- **Reliable**: Our hosted proxy is tested on 1k requests per second, making it reliable for high load.
|
||||
- **Secure**: LiteLLM is currently undergoing SOC-2 compliance, to make sure your data is as secure as possible.
|
||||
|
||||
## Data Privacy & Security
|
||||
|
||||
You can find our [data privacy & security policy for cloud litellm here](../docs/data_security#litellm-cloud)
|
||||
|
||||
## Supported data regions for LiteLLM Cloud
|
||||
|
||||
You can find [supported data regions litellm here](../docs/data_security#supported-data-regions-for-litellm-cloud)
|
||||
|
||||
### Pricing
|
||||
|
||||
Pricing is based on usage. We can figure out a price that works for your team, on the call.
|
||||
|
|
|
@ -18,6 +18,7 @@ Features:
|
|||
- ✅ [JWT-Auth](../docs/proxy/token_auth.md)
|
||||
- ✅ [Control available public, private routes](#control-available-public-private-routes)
|
||||
- ✅ [[BETA] AWS Key Manager v2 - Key Decryption](#beta-aws-key-manager---key-decryption)
|
||||
- ✅ IP address‑based access control lists
|
||||
- ✅ Track Request IP Address
|
||||
- ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
|
||||
- ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
|
||||
|
|
|
@ -112,37 +112,52 @@ model_list:
|
|||
mode: completion # 👈 ADD THIS
|
||||
```
|
||||
|
||||
### Speech to Text Models
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: whisper
|
||||
litellm_params:
|
||||
model: whisper-1
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model_info:
|
||||
mode: audio_transcription
|
||||
```
|
||||
|
||||
|
||||
## `/health/readiness`
|
||||
|
||||
Unprotected endpoint for checking if proxy is ready to accept requests
|
||||
|
||||
Example Request:
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:4000/health/readiness'
|
||||
```bash
|
||||
curl http://0.0.0.0:4000/health/readiness
|
||||
```
|
||||
|
||||
Example Response:
|
||||
|
||||
*If proxy connected to a database*
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "healthy",
|
||||
"db": "connected",
|
||||
"litellm_version":"1.19.2",
|
||||
"status": "connected",
|
||||
"db": "connected",
|
||||
"cache": null,
|
||||
"litellm_version": "1.40.21",
|
||||
"success_callbacks": [
|
||||
"langfuse",
|
||||
"_PROXY_track_cost_callback",
|
||||
"response_taking_too_long_callback",
|
||||
"_PROXY_MaxParallelRequestsHandler",
|
||||
"_PROXY_MaxBudgetLimiter",
|
||||
"_PROXY_CacheControlCheck",
|
||||
"ServiceLogging"
|
||||
],
|
||||
"last_updated": "2024-07-10T18:59:10.616968"
|
||||
}
|
||||
```
|
||||
|
||||
*If proxy not connected to a database*
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "healthy",
|
||||
"db": "Not connected",
|
||||
"litellm_version":"1.19.2",
|
||||
}
|
||||
```
|
||||
If the proxy is not connected to a database, then the `"db"` field will be `"Not
|
||||
connected"` instead of `"connected"` and the `"last_updated"` field will not be present.
|
||||
|
||||
## `/health/liveliness`
|
||||
|
||||
|
|
|
@ -1,27 +1,19 @@
|
|||
# 🪢 Logging
|
||||
|
||||
Log Proxy input, output, and exceptions using:
|
||||
|
||||
- Langfuse
|
||||
- OpenTelemetry
|
||||
- Custom Callbacks
|
||||
- DataDog
|
||||
- DynamoDB
|
||||
- s3 Bucket
|
||||
- etc.
|
||||
|
||||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
|
||||
# 🪢 Logging - Langfuse, OpenTelemetry, Custom Callbacks, DataDog, s3 Bucket, Sentry, Athina, Azure Content-Safety
|
||||
|
||||
Log Proxy Input, Output, Exceptions using Langfuse, OpenTelemetry, Custom Callbacks, DataDog, DynamoDB, s3 Bucket
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Logging to Langfuse](#logging-proxy-inputoutput---langfuse)
|
||||
- [Logging with OpenTelemetry (OpenTelemetry)](#logging-proxy-inputoutput-in-opentelemetry-format)
|
||||
- [Async Custom Callbacks](#custom-callback-class-async)
|
||||
- [Async Custom Callback APIs](#custom-callback-apis-async)
|
||||
- [Logging to Galileo](#logging-llm-io-to-galileo)
|
||||
- [Logging to OpenMeter](#logging-proxy-inputoutput---langfuse)
|
||||
- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets)
|
||||
- [Logging to DataDog](#logging-proxy-inputoutput---datadog)
|
||||
- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb)
|
||||
- [Logging to Sentry](#logging-proxy-inputoutput---sentry)
|
||||
- [Logging to Athina](#logging-proxy-inputoutput-athina)
|
||||
- [(BETA) Moderation with Azure Content-Safety](#moderation-with-azure-content-safety)
|
||||
|
||||
## Getting the LiteLLM Call ID
|
||||
|
||||
LiteLLM generates a unique `call_id` for each request. This `call_id` can be
|
||||
|
@ -56,6 +48,7 @@ A number of these headers could be useful for troubleshooting, but the
|
|||
components in your system, including in logging tools.
|
||||
|
||||
## Logging Proxy Input/Output - Langfuse
|
||||
|
||||
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment
|
||||
|
||||
**Step 1** Install langfuse
|
||||
|
@ -65,6 +58,7 @@ pip install langfuse>=2.0.0
|
|||
```
|
||||
|
||||
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -75,6 +69,7 @@ litellm_settings:
|
|||
```
|
||||
|
||||
**Step 3**: Set required env variables for logging to langfuse
|
||||
|
||||
```shell
|
||||
export LANGFUSE_PUBLIC_KEY="pk_kk"
|
||||
export LANGFUSE_SECRET_KEY="sk_ss"
|
||||
|
@ -85,11 +80,13 @@ export LANGFUSE_HOST="https://xxx.langfuse.com"
|
|||
**Step 4**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
||||
```shell
|
||||
litellm --config config.yaml --debug
|
||||
```
|
||||
|
||||
Test Request
|
||||
|
||||
```
|
||||
litellm --test
|
||||
```
|
||||
|
@ -100,7 +97,6 @@ Expected output on Langfuse
|
|||
|
||||
### Logging Metadata to Langfuse
|
||||
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="Curl" label="Curl Request">
|
||||
|
@ -126,6 +122,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
}
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="openai" label="OpenAI v1.0.0+">
|
||||
|
||||
|
@ -159,6 +156,7 @@ response = client.chat.completions.create(
|
|||
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="langchain" label="Langchain">
|
||||
|
||||
|
@ -201,7 +199,6 @@ print(response)
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
### Team based Logging to Langfuse
|
||||
|
||||
**Example:**
|
||||
|
@ -290,6 +287,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
}
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="openai" label="OpenAI v1.0.0+">
|
||||
|
||||
|
@ -320,6 +318,7 @@ response = client.chat.completions.create(
|
|||
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="langchain" label="Langchain">
|
||||
|
||||
|
@ -365,7 +364,6 @@ You will see `raw_request` in your Langfuse Metadata. This is the RAW CURL comma
|
|||
|
||||
<Image img={require('../../img/debug_langfuse.png')} />
|
||||
|
||||
|
||||
## Logging Proxy Input/Output in OpenTelemetry format
|
||||
|
||||
:::info
|
||||
|
@ -381,10 +379,8 @@ OTEL_SERVICE_NAME=<your-service-name>` # default="litellm"
|
|||
|
||||
<Tabs>
|
||||
|
||||
|
||||
<TabItem value="Console Exporter" label="Log to console">
|
||||
|
||||
|
||||
**Step 1:** Set callbacks and env vars
|
||||
|
||||
Add the following to your env
|
||||
|
@ -400,7 +396,6 @@ litellm_settings:
|
|||
callbacks: ["otel"]
|
||||
```
|
||||
|
||||
|
||||
**Step 2**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
@ -460,7 +455,6 @@ This is the Span from OTEL Logging
|
|||
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="Honeycomb" label="Log to Honeycomb">
|
||||
|
||||
#### Quick Start - Log to Honeycomb
|
||||
|
@ -482,7 +476,6 @@ litellm_settings:
|
|||
callbacks: ["otel"]
|
||||
```
|
||||
|
||||
|
||||
**Step 2**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
@ -507,10 +500,8 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
}'
|
||||
```
|
||||
|
||||
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="otel-col" label="Log to OTEL HTTP Collector">
|
||||
|
||||
#### Quick Start - Log to OTEL Collector
|
||||
|
@ -532,7 +523,6 @@ litellm_settings:
|
|||
callbacks: ["otel"]
|
||||
```
|
||||
|
||||
|
||||
**Step 2**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
@ -559,7 +549,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="otel-col-grpc" label="Log to OTEL GRPC Collector">
|
||||
|
||||
#### Quick Start - Log to OTEL GRPC Collector
|
||||
|
@ -581,7 +570,6 @@ litellm_settings:
|
|||
callbacks: ["otel"]
|
||||
```
|
||||
|
||||
|
||||
**Step 2**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
@ -606,7 +594,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
}'
|
||||
```
|
||||
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="traceloop" label="Log to Traceloop Cloud">
|
||||
|
@ -629,7 +616,6 @@ environment_variables:
|
|||
TRACELOOP_API_KEY: "XXXXX"
|
||||
```
|
||||
|
||||
|
||||
**Step 3**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
@ -665,11 +651,15 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
❓ Use this when you want to **pass information about the incoming request in a distributed tracing system**
|
||||
|
||||
✅ Key change: Pass the **`traceparent` header** in your requests. [Read more about traceparent headers here](https://uptrace.dev/opentelemetry/opentelemetry-traceparent.html#what-is-traceparent-header)
|
||||
|
||||
```curl
|
||||
traceparent: 00-80e1afed08e019fc1110464cfa66635c-7a085853722dc6d2-01
|
||||
```
|
||||
|
||||
Example Usage
|
||||
|
||||
1. Make Request to LiteLLM Proxy with `traceparent` header
|
||||
|
||||
```python
|
||||
import openai
|
||||
import uuid
|
||||
|
@ -693,7 +683,6 @@ response = client.chat.completions.create(
|
|||
)
|
||||
|
||||
print(response)
|
||||
|
||||
```
|
||||
|
||||
```shell
|
||||
|
@ -707,12 +696,12 @@ Search for Trace=`80e1afed08e019fc1110464cfa66635c` on your OTEL Collector
|
|||
|
||||
<Image img={require('../../img/otel_parent.png')} />
|
||||
|
||||
|
||||
|
||||
## Custom Callback Class [Async]
|
||||
|
||||
Use this when you want to run custom callbacks in `python`
|
||||
|
||||
#### Step 1 - Create your custom `litellm` callback class
|
||||
|
||||
We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)**
|
||||
|
||||
Define your custom callback class in a python file.
|
||||
|
@ -815,16 +804,17 @@ proxy_handler_instance = MyCustomHandler()
|
|||
```
|
||||
|
||||
#### Step 2 - Pass your custom callback class in `config.yaml`
|
||||
|
||||
We pass the custom callback class defined in **Step1** to the config.yaml.
|
||||
Set `callbacks` to `python_filename.logger_instance_name`
|
||||
|
||||
In the config below, we pass
|
||||
|
||||
- python_filename: `custom_callbacks.py`
|
||||
- logger_instance_name: `proxy_handler_instance`. This is defined in Step 1
|
||||
|
||||
`callbacks: custom_callbacks.proxy_handler_instance`
|
||||
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -837,6 +827,7 @@ litellm_settings:
|
|||
```
|
||||
|
||||
#### Step 3 - Start proxy + test request
|
||||
|
||||
```shell
|
||||
litellm --config proxy_config.yaml
|
||||
```
|
||||
|
@ -858,6 +849,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
```
|
||||
|
||||
#### Resulting Log on Proxy
|
||||
|
||||
```shell
|
||||
On Success
|
||||
Model: gpt-3.5-turbo,
|
||||
|
@ -910,7 +902,6 @@ class MyCustomHandler(CustomLogger):
|
|||
"max_tokens": 10
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
#### Logging `model_info` set in config.yaml
|
||||
|
@ -928,11 +919,13 @@ class MyCustomHandler(CustomLogger):
|
|||
```
|
||||
|
||||
**Expected Output**
|
||||
|
||||
```json
|
||||
{'mode': 'embedding', 'input_cost_per_token': 0.002}
|
||||
```
|
||||
|
||||
### Logging responses from proxy
|
||||
|
||||
Both `/chat/completions` and `/embeddings` responses are available as `response_obj`
|
||||
|
||||
**Note: for `/chat/completions`, both `stream=True` and `non stream` responses are available as `response_obj`**
|
||||
|
@ -946,6 +939,7 @@ class MyCustomHandler(CustomLogger):
|
|||
```
|
||||
|
||||
**Expected Output /chat/completion [for both `stream` and `non-stream` responses]**
|
||||
|
||||
```json
|
||||
ModelResponse(
|
||||
id='chatcmpl-8Tfu8GoMElwOZuj2JlHBhNHG01PPo',
|
||||
|
@ -972,6 +966,7 @@ ModelResponse(
|
|||
```
|
||||
|
||||
**Expected Output /embeddings**
|
||||
|
||||
```json
|
||||
{
|
||||
'model': 'ada',
|
||||
|
@ -991,7 +986,6 @@ ModelResponse(
|
|||
}
|
||||
```
|
||||
|
||||
|
||||
## Custom Callback APIs [Async]
|
||||
|
||||
:::info
|
||||
|
@ -1001,10 +995,12 @@ This is an Enterprise only feature [Get Started with Enterprise here](https://gi
|
|||
:::
|
||||
|
||||
Use this if you:
|
||||
|
||||
- Want to use custom callbacks written in a non Python programming language
|
||||
- Want your callbacks to run on a different microservice
|
||||
|
||||
#### Step 1. Create your generic logging API endpoint
|
||||
|
||||
Set up a generic API endpoint that can receive data in JSON format. The data will be included within a "data" field.
|
||||
|
||||
Your server should support the following Request format:
|
||||
|
@ -1067,11 +1063,8 @@ async def log_event(request: Request):
|
|||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="127.0.0.1", port=4000)
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
#### Step 2. Set your `GENERIC_LOGGER_ENDPOINT` to the endpoint + route we should send callback logs to
|
||||
|
||||
```shell
|
||||
|
@ -1081,6 +1074,7 @@ os.environ["GENERIC_LOGGER_ENDPOINT"] = "http://localhost:4000/log-event"
|
|||
#### Step 3. Create a `config.yaml` file and set `litellm_settings`: `success_callback` = ["generic"]
|
||||
|
||||
Example litellm proxy config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -1092,8 +1086,8 @@ litellm_settings:
|
|||
|
||||
Start the LiteLLM Proxy and make a test request to verify the logs reached your callback API
|
||||
|
||||
|
||||
## Logging LLM IO to Galileo
|
||||
|
||||
[BETA]
|
||||
|
||||
Log LLM I/O on [www.rungalileo.io](https://www.rungalileo.io/)
|
||||
|
@ -1116,6 +1110,7 @@ export GALILEO_PASSWORD=""
|
|||
### Quick Start
|
||||
|
||||
1. Add to Config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- litellm_params:
|
||||
|
@ -1151,7 +1146,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
'
|
||||
```
|
||||
|
||||
|
||||
🎉 That's it - Expect to see your Logs on your Galileo Dashboard
|
||||
|
||||
## Logging Proxy Cost + Usage - OpenMeter
|
||||
|
@ -1169,6 +1163,7 @@ export OPENMETER_API_KEY=""
|
|||
### Quick Start
|
||||
|
||||
1. Add to Config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- litellm_params:
|
||||
|
@ -1204,13 +1199,14 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
'
|
||||
```
|
||||
|
||||
|
||||
<Image img={require('../../img/openmeter_img_2.png')} />
|
||||
|
||||
## Logging Proxy Input/Output - DataDog
|
||||
|
||||
We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog
|
||||
|
||||
**Step 1**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -1230,6 +1226,7 @@ DD_SITE="us5.datadoghq.com" # your datadog base url
|
|||
**Step 3**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
||||
```shell
|
||||
litellm --config config.yaml --debug
|
||||
```
|
||||
|
@ -1257,10 +1254,10 @@ Expected output on Datadog
|
|||
|
||||
<Image img={require('../../img/dd_small1.png')} />
|
||||
|
||||
|
||||
## Logging Proxy Input/Output - s3 Buckets
|
||||
|
||||
We will use the `--config` to set
|
||||
|
||||
- `litellm.success_callback = ["s3"]`
|
||||
|
||||
This will log all successfull LLM calls to s3 Bucket
|
||||
|
@ -1274,6 +1271,7 @@ AWS_REGION_NAME = ""
|
|||
```
|
||||
|
||||
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -1293,11 +1291,13 @@ litellm_settings:
|
|||
**Step 3**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
||||
```shell
|
||||
litellm --config config.yaml --debug
|
||||
```
|
||||
|
||||
Test Request
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
|
@ -1317,6 +1317,7 @@ Your logs should be available on the specified s3 Bucket
|
|||
## Logging Proxy Input/Output - DynamoDB
|
||||
|
||||
We will use the `--config` to set
|
||||
|
||||
- `litellm.success_callback = ["dynamodb"]`
|
||||
- `litellm.dynamodb_table_name = "your-table-name"`
|
||||
|
||||
|
@ -1331,6 +1332,7 @@ AWS_REGION_NAME = ""
|
|||
```
|
||||
|
||||
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -1344,11 +1346,13 @@ litellm_settings:
|
|||
**Step 3**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
||||
```shell
|
||||
litellm --config config.yaml --debug
|
||||
```
|
||||
|
||||
Test Request
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
|
@ -1436,19 +1440,18 @@ Your logs should be available on DynamoDB
|
|||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
## Logging Proxy Input/Output - Sentry
|
||||
|
||||
If api calls fail (llm/database) you can log those to Sentry:
|
||||
|
||||
**Step 1** Install Sentry
|
||||
|
||||
```shell
|
||||
pip install --upgrade sentry-sdk
|
||||
```
|
||||
|
||||
**Step 2**: Save your Sentry_DSN and add `litellm_settings`: `failure_callback`
|
||||
|
||||
```shell
|
||||
export SENTRY_DSN="your-sentry-dsn"
|
||||
```
|
||||
|
@ -1468,11 +1471,13 @@ general_settings:
|
|||
**Step 3**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
||||
```shell
|
||||
litellm --config config.yaml --debug
|
||||
```
|
||||
|
||||
Test Request
|
||||
|
||||
```
|
||||
litellm --test
|
||||
```
|
||||
|
@ -1490,6 +1495,7 @@ ATHINA_API_KEY = "your-athina-api-key"
|
|||
```
|
||||
|
||||
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -1502,11 +1508,13 @@ litellm_settings:
|
|||
**Step 3**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
||||
```shell
|
||||
litellm --config config.yaml --debug
|
||||
```
|
||||
|
||||
Test Request
|
||||
|
||||
```
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
|
@ -1538,6 +1546,7 @@ AZURE_CONTENT_SAFETY_KEY = "<your-azure-content-safety-key>"
|
|||
```
|
||||
|
||||
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -1553,11 +1562,13 @@ litellm_settings:
|
|||
**Step 3**: Start the proxy, make a test request
|
||||
|
||||
Start proxy
|
||||
|
||||
```shell
|
||||
litellm --config config.yaml --debug
|
||||
```
|
||||
|
||||
Test Request
|
||||
|
||||
```
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
|
@ -1573,7 +1584,8 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
```
|
||||
|
||||
An HTTP 400 error will be returned if the content is detected with a value greater than the threshold set in the `config.yaml`.
|
||||
The details of the response will describe :
|
||||
The details of the response will describe:
|
||||
|
||||
- The `source` : input text or llm generated text
|
||||
- The `category` : the category of the content that triggered the moderation
|
||||
- The `severity` : the severity from 0 to 10
|
||||
|
|
|
@ -15,9 +15,9 @@ model_list:
|
|||
metadata: "here's additional metadata on the model" # returned via GET /model/info
|
||||
```
|
||||
|
||||
## Get Model Information
|
||||
## Get Model Information - `/model/info`
|
||||
|
||||
Retrieve detailed information about each model listed in the `/models` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled the model_info you set and the litellm model cost map. Sensitive details like API keys are excluded for security purposes.
|
||||
Retrieve detailed information about each model listed in the `/model/info` endpoint, including descriptions from the `config.yaml` file, and additional model info (e.g. max tokens, cost per input token, etc.) pulled the model_info you set and the litellm model cost map. Sensitive details like API keys are excluded for security purposes.
|
||||
|
||||
<Tabs
|
||||
defaultValue="curl"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# [OLD PROXY 👉 [**NEW** proxy here](./simple_proxy)] Local OpenAI Proxy Server
|
||||
# [OLD PROXY 👉 [NEW proxy here](./simple_proxy)] Local OpenAI Proxy Server
|
||||
|
||||
A fast, and lightweight OpenAI-compatible server to call 100+ LLM APIs.
|
||||
|
||||
|
|
|
@ -117,6 +117,7 @@ const sidebars = {
|
|||
"text_to_speech",
|
||||
"assistants",
|
||||
"batches",
|
||||
"anthropic_completion"
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -237,6 +238,7 @@ const sidebars = {
|
|||
label: "Extras",
|
||||
items: [
|
||||
"extras/contributing",
|
||||
"data_security",
|
||||
"contributing",
|
||||
"rules",
|
||||
"proxy_server",
|
||||
|
|
29
index.yaml
29
index.yaml
|
@ -1,6 +1,25 @@
|
|||
apiVersion: v1
|
||||
entries:
|
||||
litellm-helm:
|
||||
- apiVersion: v2
|
||||
appVersion: v1.41.8
|
||||
created: "2024-07-10T00:59:11.1889+08:00"
|
||||
dependencies:
|
||||
- condition: db.deployStandalone
|
||||
name: postgresql
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
version: '>=13.3.0'
|
||||
- condition: redis.enabled
|
||||
name: redis
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
version: '>=18.0.0'
|
||||
description: Call all LLM APIs using the OpenAI format
|
||||
digest: eeff5e4e6cebb4c977cb7359c1ec6c773c66982f6aa39dbed94a674890144a43
|
||||
name: litellm-helm
|
||||
type: application
|
||||
urls:
|
||||
- https://berriai.github.io/litellm/litellm-helm-0.2.1.tgz
|
||||
version: 0.2.1
|
||||
- apiVersion: v2
|
||||
appVersion: v1.35.38
|
||||
created: "2024-05-06T10:22:24.384392-07:00"
|
||||
|
@ -33,7 +52,7 @@ entries:
|
|||
licenses: Apache-2.0
|
||||
apiVersion: v2
|
||||
appVersion: 16.2.0
|
||||
created: "2024-05-06T10:22:24.387717-07:00"
|
||||
created: "2024-07-10T00:59:11.191731+08:00"
|
||||
dependencies:
|
||||
- name: common
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
|
@ -60,7 +79,7 @@ entries:
|
|||
sources:
|
||||
- https://github.com/bitnami/charts/tree/main/bitnami/postgresql
|
||||
urls:
|
||||
- charts/postgresql-14.3.1.tgz
|
||||
- https://berriai.github.io/litellm/charts/postgresql-14.3.1.tgz
|
||||
version: 14.3.1
|
||||
redis:
|
||||
- annotations:
|
||||
|
@ -79,7 +98,7 @@ entries:
|
|||
licenses: Apache-2.0
|
||||
apiVersion: v2
|
||||
appVersion: 7.2.4
|
||||
created: "2024-05-06T10:22:24.391903-07:00"
|
||||
created: "2024-07-10T00:59:11.195667+08:00"
|
||||
dependencies:
|
||||
- name: common
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
|
@ -103,6 +122,6 @@ entries:
|
|||
sources:
|
||||
- https://github.com/bitnami/charts/tree/main/bitnami/redis
|
||||
urls:
|
||||
- charts/redis-18.19.1.tgz
|
||||
- https://berriai.github.io/litellm/charts/redis-18.19.1.tgz
|
||||
version: 18.19.1
|
||||
generated: "2024-05-06T10:22:24.375026-07:00"
|
||||
generated: "2024-07-10T00:59:11.179952+08:00"
|
||||
|
|
BIN
litellm-helm-0.2.1.tgz
Normal file
BIN
litellm-helm-0.2.1.tgz
Normal file
Binary file not shown.
|
@ -364,7 +364,7 @@ for key, value in model_cost.items():
|
|||
elif value.get("litellm_provider") == "mistral":
|
||||
mistral_chat_models.append(key)
|
||||
elif value.get("litellm_provider") == "anthropic":
|
||||
anthropic_models.append(key)
|
||||
anthropic_models.append(key)
|
||||
elif value.get("litellm_provider") == "empower":
|
||||
empower_models.append(key)
|
||||
elif value.get("litellm_provider") == "openrouter":
|
||||
|
@ -789,6 +789,7 @@ from .utils import (
|
|||
get_api_base,
|
||||
get_first_chars_messages,
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
get_provider_fields,
|
||||
)
|
||||
|
@ -879,5 +880,11 @@ from .proxy.proxy_cli import run_server
|
|||
from .router import Router
|
||||
from .assistants.main import *
|
||||
from .batches.main import *
|
||||
from .files.main import *
|
||||
from .scheduler import *
|
||||
from .cost_calculator import response_cost_calculator, cost_per_token
|
||||
|
||||
### ADAPTERS ###
|
||||
from .types.adapter import AdapterItem
|
||||
|
||||
adapters: List[AdapterItem] = []
|
||||
|
|
50
litellm/adapters/anthropic_adapter.py
Normal file
50
litellm/adapters/anthropic_adapter.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
# What is this?
|
||||
## Translates OpenAI call to Anthropic `/v1/messages` format
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Literal, Optional
|
||||
|
||||
import dotenv
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm import ChatCompletionRequest, verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
|
||||
|
||||
|
||||
class AnthropicAdapter(CustomLogger):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def translate_completion_input_params(
|
||||
self, kwargs
|
||||
) -> Optional[ChatCompletionRequest]:
|
||||
"""
|
||||
- translate params, where needed
|
||||
- pass rest, as is
|
||||
"""
|
||||
request_body = AnthropicMessagesRequest(**kwargs) # type: ignore
|
||||
|
||||
translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai(
|
||||
anthropic_message_request=request_body
|
||||
)
|
||||
|
||||
return translated_body
|
||||
|
||||
def translate_completion_output_params(
|
||||
self, response: litellm.ModelResponse
|
||||
) -> Optional[AnthropicResponse]:
|
||||
|
||||
return litellm.AnthropicConfig().translate_openai_response_to_anthropic(
|
||||
response=response
|
||||
)
|
||||
|
||||
def translate_completion_output_params_streaming(self) -> Optional[BaseModel]:
|
||||
return super().translate_completion_output_params_streaming()
|
||||
|
||||
|
||||
anthropic_adapter = AnthropicAdapter()
|
|
@ -10,296 +10,37 @@ https://platform.openai.com/docs/api-reference/batch
|
|||
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
from functools import partial
|
||||
import contextvars
|
||||
from typing import Literal, Optional, Dict, Coroutine, Any, Union
|
||||
import os
|
||||
from functools import partial
|
||||
from typing import Any, Coroutine, Dict, Literal, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm import client
|
||||
from litellm.utils import supports_httpx_timeout
|
||||
from ..types.router import *
|
||||
|
||||
from ..llms.openai import OpenAIBatchesAPI, OpenAIFilesAPI
|
||||
from ..types.llms.openai import (
|
||||
CreateBatchRequest,
|
||||
RetrieveBatchRequest,
|
||||
CancelBatchRequest,
|
||||
CreateFileRequest,
|
||||
FileTypes,
|
||||
FileObject,
|
||||
Batch,
|
||||
CancelBatchRequest,
|
||||
CreateBatchRequest,
|
||||
CreateFileRequest,
|
||||
FileContentRequest,
|
||||
FileObject,
|
||||
FileTypes,
|
||||
HttpxBinaryResponseContent,
|
||||
RetrieveBatchRequest,
|
||||
)
|
||||
from ..types.router import *
|
||||
|
||||
####### ENVIRONMENT VARIABLES ###################
|
||||
openai_batches_instance = OpenAIBatchesAPI()
|
||||
openai_files_instance = OpenAIFilesAPI()
|
||||
#################################################
|
||||
|
||||
|
||||
async def acreate_file(
|
||||
file: FileTypes,
|
||||
purpose: Literal["assistants", "batch", "fine-tune"],
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Coroutine[Any, Any, FileObject]:
|
||||
"""
|
||||
Async: Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
|
||||
|
||||
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["acreate_file"] = True
|
||||
|
||||
# Use a partial function to pass your keyword arguments
|
||||
func = partial(
|
||||
create_file,
|
||||
file,
|
||||
purpose,
|
||||
custom_llm_provider,
|
||||
extra_headers,
|
||||
extra_body,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Add the context to the function
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response # type: ignore
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def create_file(
|
||||
file: FileTypes,
|
||||
purpose: Literal["assistants", "batch", "fine-tune"],
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
|
||||
"""
|
||||
Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
|
||||
|
||||
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
if custom_llm_provider == "openai":
|
||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||
api_base = (
|
||||
optional_params.api_base
|
||||
or litellm.api_base
|
||||
or os.getenv("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
organization = (
|
||||
optional_params.organization
|
||||
or litellm.organization
|
||||
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
optional_params.api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.openai_key
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = (
|
||||
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
)
|
||||
# set timeout for 10 minutes by default
|
||||
|
||||
if (
|
||||
timeout is not None
|
||||
and isinstance(timeout, httpx.Timeout)
|
||||
and supports_httpx_timeout(custom_llm_provider) == False
|
||||
):
|
||||
read_timeout = timeout.read or 600
|
||||
timeout = read_timeout # default 10 min timeout
|
||||
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||
timeout = float(timeout) # type: ignore
|
||||
elif timeout is None:
|
||||
timeout = 600.0
|
||||
|
||||
_create_file_request = CreateFileRequest(
|
||||
file=file,
|
||||
purpose=purpose,
|
||||
extra_headers=extra_headers,
|
||||
extra_body=extra_body,
|
||||
)
|
||||
|
||||
_is_async = kwargs.pop("acreate_file", False) is True
|
||||
|
||||
response = openai_files_instance.create_file(
|
||||
_is_async=_is_async,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
max_retries=optional_params.max_retries,
|
||||
organization=organization,
|
||||
create_file_data=_create_file_request,
|
||||
)
|
||||
else:
|
||||
raise litellm.exceptions.BadRequestError(
|
||||
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||
custom_llm_provider
|
||||
),
|
||||
model="n/a",
|
||||
llm_provider=custom_llm_provider,
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
content="Unsupported provider",
|
||||
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
async def afile_content(
|
||||
file_id: str,
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Coroutine[Any, Any, HttpxBinaryResponseContent]:
|
||||
"""
|
||||
Async: Get file contents
|
||||
|
||||
LiteLLM Equivalent of GET https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["afile_content"] = True
|
||||
|
||||
# Use a partial function to pass your keyword arguments
|
||||
func = partial(
|
||||
file_content,
|
||||
file_id,
|
||||
custom_llm_provider,
|
||||
extra_headers,
|
||||
extra_body,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Add the context to the function
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response # type: ignore
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def file_content(
|
||||
file_id: str,
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]:
|
||||
"""
|
||||
Returns the contents of the specified file.
|
||||
|
||||
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
if custom_llm_provider == "openai":
|
||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||
api_base = (
|
||||
optional_params.api_base
|
||||
or litellm.api_base
|
||||
or os.getenv("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
organization = (
|
||||
optional_params.organization
|
||||
or litellm.organization
|
||||
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
optional_params.api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.openai_key
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = (
|
||||
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
)
|
||||
# set timeout for 10 minutes by default
|
||||
|
||||
if (
|
||||
timeout is not None
|
||||
and isinstance(timeout, httpx.Timeout)
|
||||
and supports_httpx_timeout(custom_llm_provider) == False
|
||||
):
|
||||
read_timeout = timeout.read or 600
|
||||
timeout = read_timeout # default 10 min timeout
|
||||
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||
timeout = float(timeout) # type: ignore
|
||||
elif timeout is None:
|
||||
timeout = 600.0
|
||||
|
||||
_file_content_request = FileContentRequest(
|
||||
file_id=file_id,
|
||||
extra_headers=extra_headers,
|
||||
extra_body=extra_body,
|
||||
)
|
||||
|
||||
_is_async = kwargs.pop("afile_content", False) is True
|
||||
|
||||
response = openai_files_instance.file_content(
|
||||
_is_async=_is_async,
|
||||
file_content_request=_file_content_request,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
max_retries=optional_params.max_retries,
|
||||
organization=organization,
|
||||
)
|
||||
else:
|
||||
raise litellm.exceptions.BadRequestError(
|
||||
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||
custom_llm_provider
|
||||
),
|
||||
model="n/a",
|
||||
llm_provider=custom_llm_provider,
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
content="Unsupported provider",
|
||||
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
async def acreate_batch(
|
||||
completion_window: Literal["24h"],
|
||||
endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
|
||||
|
|
|
@ -15,10 +15,12 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
|
|||
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||
cost_per_token as google_cost_per_token,
|
||||
)
|
||||
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||
cost_router as google_cost_router,
|
||||
)
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
|
||||
from litellm.utils import (
|
||||
CallTypes,
|
||||
CostPerToken,
|
||||
|
@ -160,22 +162,32 @@ def cost_per_token(
|
|||
|
||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||
if custom_llm_provider == "vertex_ai" and "claude" in model:
|
||||
return google_cost_per_token(
|
||||
model=model_without_prefix,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
)
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
return google_cost_per_character(
|
||||
cost_router = google_cost_router(
|
||||
model=model_without_prefix,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
prompt_characters=prompt_characters,
|
||||
completion_characters=completion_characters,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
call_type=call_type,
|
||||
)
|
||||
if cost_router == "cost_per_character":
|
||||
return google_cost_per_character(
|
||||
model=model_without_prefix,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
prompt_characters=prompt_characters,
|
||||
completion_characters=completion_characters,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
)
|
||||
elif cost_router == "cost_per_token":
|
||||
return google_cost_per_token(
|
||||
model=model_without_prefix,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
)
|
||||
elif custom_llm_provider == "gemini":
|
||||
return google_cost_per_token(
|
||||
model=model_without_prefix,
|
||||
|
|
659
litellm/files/main.py
Normal file
659
litellm/files/main.py
Normal file
|
@ -0,0 +1,659 @@
|
|||
"""
|
||||
Main File for Files API implementation
|
||||
|
||||
https://platform.openai.com/docs/api-reference/files
|
||||
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import os
|
||||
from functools import partial
|
||||
from typing import Any, Coroutine, Dict, Literal, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm import client
|
||||
from litellm.llms.openai import FileDeleted, FileObject, OpenAIFilesAPI
|
||||
from litellm.types.llms.openai import (
|
||||
Batch,
|
||||
CreateFileRequest,
|
||||
FileContentRequest,
|
||||
FileTypes,
|
||||
HttpxBinaryResponseContent,
|
||||
)
|
||||
from litellm.types.router import *
|
||||
from litellm.utils import supports_httpx_timeout
|
||||
|
||||
####### ENVIRONMENT VARIABLES ###################
|
||||
openai_files_instance = OpenAIFilesAPI()
|
||||
#################################################
|
||||
|
||||
|
||||
async def afile_retrieve(
|
||||
file_id: str,
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Coroutine[Any, Any, FileObject]:
|
||||
"""
|
||||
Async: Get file contents
|
||||
|
||||
LiteLLM Equivalent of GET https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["is_async"] = True
|
||||
|
||||
# Use a partial function to pass your keyword arguments
|
||||
func = partial(
|
||||
file_retrieve,
|
||||
file_id,
|
||||
custom_llm_provider,
|
||||
extra_headers,
|
||||
extra_body,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Add the context to the function
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response # type: ignore
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def file_retrieve(
|
||||
file_id: str,
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> FileObject:
|
||||
"""
|
||||
Returns the contents of the specified file.
|
||||
|
||||
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
if custom_llm_provider == "openai":
|
||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||
api_base = (
|
||||
optional_params.api_base
|
||||
or litellm.api_base
|
||||
or os.getenv("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
organization = (
|
||||
optional_params.organization
|
||||
or litellm.organization
|
||||
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
optional_params.api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.openai_key
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = (
|
||||
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
)
|
||||
# set timeout for 10 minutes by default
|
||||
|
||||
if (
|
||||
timeout is not None
|
||||
and isinstance(timeout, httpx.Timeout)
|
||||
and supports_httpx_timeout(custom_llm_provider) == False
|
||||
):
|
||||
read_timeout = timeout.read or 600
|
||||
timeout = read_timeout # default 10 min timeout
|
||||
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||
timeout = float(timeout) # type: ignore
|
||||
elif timeout is None:
|
||||
timeout = 600.0
|
||||
|
||||
_is_async = kwargs.pop("is_async", False) is True
|
||||
|
||||
response = openai_files_instance.retrieve_file(
|
||||
file_id=file_id,
|
||||
_is_async=_is_async,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
max_retries=optional_params.max_retries,
|
||||
organization=organization,
|
||||
)
|
||||
else:
|
||||
raise litellm.exceptions.BadRequestError(
|
||||
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||
custom_llm_provider
|
||||
),
|
||||
model="n/a",
|
||||
llm_provider=custom_llm_provider,
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
content="Unsupported provider",
|
||||
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
# Delete file
|
||||
async def afile_delete(
|
||||
file_id: str,
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Coroutine[Any, Any, FileObject]:
|
||||
"""
|
||||
Async: Delete file
|
||||
|
||||
LiteLLM Equivalent of DELETE https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["is_async"] = True
|
||||
|
||||
# Use a partial function to pass your keyword arguments
|
||||
func = partial(
|
||||
file_delete,
|
||||
file_id,
|
||||
custom_llm_provider,
|
||||
extra_headers,
|
||||
extra_body,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Add the context to the function
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response # type: ignore
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def file_delete(
|
||||
file_id: str,
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> FileDeleted:
|
||||
"""
|
||||
Delete file
|
||||
|
||||
LiteLLM Equivalent of DELETE https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
if custom_llm_provider == "openai":
|
||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||
api_base = (
|
||||
optional_params.api_base
|
||||
or litellm.api_base
|
||||
or os.getenv("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
organization = (
|
||||
optional_params.organization
|
||||
or litellm.organization
|
||||
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
optional_params.api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.openai_key
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = (
|
||||
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
)
|
||||
# set timeout for 10 minutes by default
|
||||
|
||||
if (
|
||||
timeout is not None
|
||||
and isinstance(timeout, httpx.Timeout)
|
||||
and supports_httpx_timeout(custom_llm_provider) == False
|
||||
):
|
||||
read_timeout = timeout.read or 600
|
||||
timeout = read_timeout # default 10 min timeout
|
||||
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||
timeout = float(timeout) # type: ignore
|
||||
elif timeout is None:
|
||||
timeout = 600.0
|
||||
|
||||
_is_async = kwargs.pop("is_async", False) is True
|
||||
|
||||
response = openai_files_instance.delete_file(
|
||||
file_id=file_id,
|
||||
_is_async=_is_async,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
max_retries=optional_params.max_retries,
|
||||
organization=organization,
|
||||
)
|
||||
else:
|
||||
raise litellm.exceptions.BadRequestError(
|
||||
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||
custom_llm_provider
|
||||
),
|
||||
model="n/a",
|
||||
llm_provider=custom_llm_provider,
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
content="Unsupported provider",
|
||||
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
# List files
|
||||
async def afile_list(
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
purpose: Optional[str] = None,
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Async: List files
|
||||
|
||||
LiteLLM Equivalent of GET https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["is_async"] = True
|
||||
|
||||
# Use a partial function to pass your keyword arguments
|
||||
func = partial(
|
||||
file_list,
|
||||
custom_llm_provider,
|
||||
purpose,
|
||||
extra_headers,
|
||||
extra_body,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Add the context to the function
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response # type: ignore
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def file_list(
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
purpose: Optional[str] = None,
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
List files
|
||||
|
||||
LiteLLM Equivalent of GET https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
if custom_llm_provider == "openai":
|
||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||
api_base = (
|
||||
optional_params.api_base
|
||||
or litellm.api_base
|
||||
or os.getenv("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
organization = (
|
||||
optional_params.organization
|
||||
or litellm.organization
|
||||
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
optional_params.api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.openai_key
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = (
|
||||
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
)
|
||||
# set timeout for 10 minutes by default
|
||||
|
||||
if (
|
||||
timeout is not None
|
||||
and isinstance(timeout, httpx.Timeout)
|
||||
and supports_httpx_timeout(custom_llm_provider) == False
|
||||
):
|
||||
read_timeout = timeout.read or 600
|
||||
timeout = read_timeout # default 10 min timeout
|
||||
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||
timeout = float(timeout) # type: ignore
|
||||
elif timeout is None:
|
||||
timeout = 600.0
|
||||
|
||||
_is_async = kwargs.pop("is_async", False) is True
|
||||
|
||||
response = openai_files_instance.list_files(
|
||||
purpose=purpose,
|
||||
_is_async=_is_async,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
max_retries=optional_params.max_retries,
|
||||
organization=organization,
|
||||
)
|
||||
else:
|
||||
raise litellm.exceptions.BadRequestError(
|
||||
message="LiteLLM doesn't support {} for 'file_list'. Only 'openai' is supported.".format(
|
||||
custom_llm_provider
|
||||
),
|
||||
model="n/a",
|
||||
llm_provider=custom_llm_provider,
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
content="Unsupported provider",
|
||||
request=httpx.Request(method="file_list", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
async def acreate_file(
|
||||
file: FileTypes,
|
||||
purpose: Literal["assistants", "batch", "fine-tune"],
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Coroutine[Any, Any, FileObject]:
|
||||
"""
|
||||
Async: Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
|
||||
|
||||
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["acreate_file"] = True
|
||||
|
||||
# Use a partial function to pass your keyword arguments
|
||||
func = partial(
|
||||
create_file,
|
||||
file,
|
||||
purpose,
|
||||
custom_llm_provider,
|
||||
extra_headers,
|
||||
extra_body,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Add the context to the function
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response # type: ignore
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def create_file(
|
||||
file: FileTypes,
|
||||
purpose: Literal["assistants", "batch", "fine-tune"],
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
|
||||
"""
|
||||
Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
|
||||
|
||||
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
if custom_llm_provider == "openai":
|
||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||
api_base = (
|
||||
optional_params.api_base
|
||||
or litellm.api_base
|
||||
or os.getenv("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
organization = (
|
||||
optional_params.organization
|
||||
or litellm.organization
|
||||
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
optional_params.api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.openai_key
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = (
|
||||
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
)
|
||||
# set timeout for 10 minutes by default
|
||||
|
||||
if (
|
||||
timeout is not None
|
||||
and isinstance(timeout, httpx.Timeout)
|
||||
and supports_httpx_timeout(custom_llm_provider) == False
|
||||
):
|
||||
read_timeout = timeout.read or 600
|
||||
timeout = read_timeout # default 10 min timeout
|
||||
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||
timeout = float(timeout) # type: ignore
|
||||
elif timeout is None:
|
||||
timeout = 600.0
|
||||
|
||||
_create_file_request = CreateFileRequest(
|
||||
file=file,
|
||||
purpose=purpose,
|
||||
extra_headers=extra_headers,
|
||||
extra_body=extra_body,
|
||||
)
|
||||
|
||||
_is_async = kwargs.pop("acreate_file", False) is True
|
||||
|
||||
response = openai_files_instance.create_file(
|
||||
_is_async=_is_async,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
max_retries=optional_params.max_retries,
|
||||
organization=organization,
|
||||
create_file_data=_create_file_request,
|
||||
)
|
||||
else:
|
||||
raise litellm.exceptions.BadRequestError(
|
||||
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||
custom_llm_provider
|
||||
),
|
||||
model="n/a",
|
||||
llm_provider=custom_llm_provider,
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
content="Unsupported provider",
|
||||
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
async def afile_content(
|
||||
file_id: str,
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Coroutine[Any, Any, HttpxBinaryResponseContent]:
|
||||
"""
|
||||
Async: Get file contents
|
||||
|
||||
LiteLLM Equivalent of GET https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["afile_content"] = True
|
||||
|
||||
# Use a partial function to pass your keyword arguments
|
||||
func = partial(
|
||||
file_content,
|
||||
file_id,
|
||||
custom_llm_provider,
|
||||
extra_headers,
|
||||
extra_body,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Add the context to the function
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response # type: ignore
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def file_content(
|
||||
file_id: str,
|
||||
custom_llm_provider: Literal["openai"] = "openai",
|
||||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Union[HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]]:
|
||||
"""
|
||||
Returns the contents of the specified file.
|
||||
|
||||
LiteLLM Equivalent of POST: POST https://api.openai.com/v1/files
|
||||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
if custom_llm_provider == "openai":
|
||||
# for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there
|
||||
api_base = (
|
||||
optional_params.api_base
|
||||
or litellm.api_base
|
||||
or os.getenv("OPENAI_API_BASE")
|
||||
or "https://api.openai.com/v1"
|
||||
)
|
||||
organization = (
|
||||
optional_params.organization
|
||||
or litellm.organization
|
||||
or os.getenv("OPENAI_ORGANIZATION", None)
|
||||
or None # default - https://github.com/openai/openai-python/blob/284c1799070c723c6a553337134148a7ab088dd8/openai/util.py#L105
|
||||
)
|
||||
# set API KEY
|
||||
api_key = (
|
||||
optional_params.api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.openai_key
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = (
|
||||
optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
)
|
||||
# set timeout for 10 minutes by default
|
||||
|
||||
if (
|
||||
timeout is not None
|
||||
and isinstance(timeout, httpx.Timeout)
|
||||
and supports_httpx_timeout(custom_llm_provider) == False
|
||||
):
|
||||
read_timeout = timeout.read or 600
|
||||
timeout = read_timeout # default 10 min timeout
|
||||
elif timeout is not None and not isinstance(timeout, httpx.Timeout):
|
||||
timeout = float(timeout) # type: ignore
|
||||
elif timeout is None:
|
||||
timeout = 600.0
|
||||
|
||||
_file_content_request = FileContentRequest(
|
||||
file_id=file_id,
|
||||
extra_headers=extra_headers,
|
||||
extra_body=extra_body,
|
||||
)
|
||||
|
||||
_is_async = kwargs.pop("afile_content", False) is True
|
||||
|
||||
response = openai_files_instance.file_content(
|
||||
_is_async=_is_async,
|
||||
file_content_request=_file_content_request,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
max_retries=optional_params.max_retries,
|
||||
organization=organization,
|
||||
)
|
||||
else:
|
||||
raise litellm.exceptions.BadRequestError(
|
||||
message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
|
||||
custom_llm_provider
|
||||
),
|
||||
model="n/a",
|
||||
llm_provider=custom_llm_provider,
|
||||
response=httpx.Response(
|
||||
status_code=400,
|
||||
content="Unsupported provider",
|
||||
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
|
@ -5,9 +5,12 @@ import traceback
|
|||
from typing import Literal, Optional, Union
|
||||
|
||||
import dotenv
|
||||
from pydantic import BaseModel
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.llms.openai import ChatCompletionRequest
|
||||
from litellm.types.utils import ModelResponse
|
||||
|
||||
|
||||
class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
|
||||
|
@ -55,6 +58,30 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
|||
def pre_call_check(self, deployment: dict) -> Optional[dict]:
|
||||
pass
|
||||
|
||||
#### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls
|
||||
|
||||
def translate_completion_input_params(
|
||||
self, kwargs
|
||||
) -> Optional[ChatCompletionRequest]:
|
||||
"""
|
||||
Translates the input params, from the provider's native format to the litellm.completion() format.
|
||||
"""
|
||||
pass
|
||||
|
||||
def translate_completion_output_params(
|
||||
self, response: ModelResponse
|
||||
) -> Optional[BaseModel]:
|
||||
"""
|
||||
Translates the output params, from the OpenAI format to the custom format.
|
||||
"""
|
||||
pass
|
||||
|
||||
def translate_completion_output_params_streaming(self) -> Optional[BaseModel]:
|
||||
"""
|
||||
Translates the streaming chunk, from the OpenAI format to the custom format.
|
||||
"""
|
||||
pass
|
||||
|
||||
#### CALL HOOKS - proxy only ####
|
||||
"""
|
||||
Control the modify incoming / outgoung data before calling the model
|
||||
|
|
|
@ -326,7 +326,12 @@ class LangFuseLogger:
|
|||
or isinstance(value, int)
|
||||
or isinstance(value, float)
|
||||
):
|
||||
new_metadata[key] = copy.deepcopy(value)
|
||||
try:
|
||||
new_metadata[key] = copy.deepcopy(value)
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
f"Langfuse [Non-blocking error] - error copying metadata: {str(e)}"
|
||||
)
|
||||
metadata = new_metadata
|
||||
|
||||
supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3")
|
||||
|
|
|
@ -52,6 +52,12 @@ class OpenTelemetryConfig:
|
|||
|
||||
OTEL_HEADERS gets sent as headers = {"x-honeycomb-team": "B85YgLm96******"}
|
||||
"""
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
|
||||
InMemorySpanExporter,
|
||||
)
|
||||
|
||||
if os.getenv("OTEL_EXPORTER") == "in_memory":
|
||||
return cls(exporter=InMemorySpanExporter())
|
||||
return cls(
|
||||
exporter=os.getenv("OTEL_EXPORTER", "console"),
|
||||
endpoint=os.getenv("OTEL_ENDPOINT"),
|
||||
|
|
|
@ -675,7 +675,7 @@ class SlackAlerting(CustomLogger):
|
|||
async def failed_tracking_alert(self, error_message: str):
|
||||
"""Raise alert when tracking failed for specific model"""
|
||||
_cache: DualCache = self.internal_usage_cache
|
||||
message = "Failed Tracking Cost for" + error_message
|
||||
message = "Failed Tracking Cost for " + error_message
|
||||
_cache_key = "budget_alerts:failed_tracking:{}".format(message)
|
||||
result = await _cache.async_get_cache(key=_cache_key)
|
||||
if result is None:
|
||||
|
@ -1530,15 +1530,19 @@ Model Info:
|
|||
"""Log deployment latency"""
|
||||
try:
|
||||
if "daily_reports" in self.alert_types:
|
||||
model_id = (
|
||||
kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "")
|
||||
)
|
||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
model_info = litellm_params.get("model_info", {}) or {}
|
||||
model_id = model_info.get("id", "") or ""
|
||||
response_s: timedelta = end_time - start_time
|
||||
|
||||
final_value = response_s
|
||||
total_tokens = 0
|
||||
|
||||
if isinstance(response_obj, litellm.ModelResponse):
|
||||
if isinstance(response_obj, litellm.ModelResponse) and (
|
||||
hasattr(response_obj, "usage")
|
||||
and response_obj.usage is not None
|
||||
and hasattr(response_obj.usage, "completion_tokens")
|
||||
):
|
||||
completion_tokens = response_obj.usage.completion_tokens
|
||||
if completion_tokens is not None and completion_tokens > 0:
|
||||
final_value = float(
|
||||
|
@ -1557,8 +1561,7 @@ Model Info:
|
|||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"[Non-Blocking Error] Slack Alerting: Got error in logging LLM deployment latency: ",
|
||||
e,
|
||||
f"[Non-Blocking Error] Slack Alerting: Got error in logging LLM deployment latency: {str(e)}"
|
||||
)
|
||||
pass
|
||||
|
||||
|
|
|
@ -1275,7 +1275,7 @@ class Logging:
|
|||
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
||||
)
|
||||
except litellm.NotFoundError as e:
|
||||
verbose_logger.error(
|
||||
verbose_logger.warning(
|
||||
f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
|
||||
)
|
||||
self.model_call_details["response_cost"] = None
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# What is this?
|
||||
## Cost calculation for Google AI Studio / Vertex AI models
|
||||
import traceback
|
||||
from typing import List, Literal, Optional, Tuple
|
||||
from typing import List, Literal, Optional, Tuple, Union
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
|
@ -29,6 +29,32 @@ def _is_above_128k(tokens: float) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def cost_router(
|
||||
model: str,
|
||||
custom_llm_provider: str,
|
||||
prompt_tokens: float,
|
||||
completion_tokens: float,
|
||||
prompt_characters: float,
|
||||
completion_characters: float,
|
||||
call_type: Union[Literal["embedding", "aembedding"], str],
|
||||
) -> Literal["cost_per_character", "cost_per_token"]:
|
||||
"""
|
||||
Route the cost calc to the right place, based on model/call_type/etc.
|
||||
|
||||
Returns
|
||||
- str, the specific google cost calc function it should route to.
|
||||
"""
|
||||
if custom_llm_provider == "vertex_ai" and "claude" in model:
|
||||
return "cost_per_token"
|
||||
elif custom_llm_provider == "gemini":
|
||||
return "cost_per_token"
|
||||
elif custom_llm_provider == "vertex_ai" and (
|
||||
call_type == "embedding" or call_type == "aembedding"
|
||||
):
|
||||
return "cost_per_token"
|
||||
return "cost_per_character"
|
||||
|
||||
|
||||
def cost_per_character(
|
||||
model: str,
|
||||
custom_llm_provider: str,
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
import os, types, traceback
|
||||
import json
|
||||
import os
|
||||
import time # type: ignore
|
||||
import traceback
|
||||
import types
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import time, httpx # type: ignore
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message
|
||||
|
||||
import httpx
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.utils import Choices, Message, ModelResponse
|
||||
|
||||
|
||||
class AI21Error(Exception):
|
||||
|
@ -185,7 +190,7 @@ def completion(
|
|||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
model_response.choices = choices_list # type: ignore
|
||||
except Exception as e:
|
||||
raise AI21Error(
|
||||
message=traceback.format_exc(), status_code=response.status_code
|
||||
|
@ -197,13 +202,17 @@ def completion(
|
|||
encoding.encode(model_response["choices"][0]["message"].get("content"))
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import os
|
||||
import time
|
||||
import types
|
||||
from enum import Enum
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||
|
||||
|
||||
class AlephAlphaError(Exception):
|
||||
|
@ -275,7 +278,7 @@ def completion(
|
|||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
model_response.choices = choices_list # type: ignore
|
||||
except:
|
||||
raise AlephAlphaError(
|
||||
message=json.dumps(completion_response),
|
||||
|
@ -291,8 +294,8 @@ def completion(
|
|||
)
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -20,19 +20,43 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
_get_httpx_client,
|
||||
)
|
||||
from litellm.types.llms.anthropic import (
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
AnthropicFinishReason,
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicMessagesTool,
|
||||
AnthropicMessagesToolChoice,
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthropicResponse,
|
||||
AnthropicResponseContentBlockText,
|
||||
AnthropicResponseContentBlockToolUse,
|
||||
AnthropicResponseUsageBlock,
|
||||
ContentBlockDelta,
|
||||
ContentBlockStart,
|
||||
MessageBlockDelta,
|
||||
MessageStartBlock,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionAssistantToolCall,
|
||||
ChatCompletionImageObject,
|
||||
ChatCompletionImageUrlObject,
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponseMessage,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionTextObject,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionToolChoiceFunctionParam,
|
||||
ChatCompletionToolChoiceObjectParam,
|
||||
ChatCompletionToolChoiceValues,
|
||||
ChatCompletionToolMessage,
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
ChatCompletionUserMessage,
|
||||
)
|
||||
from litellm.types.utils import GenericStreamingChunk
|
||||
from litellm.types.utils import Choices, GenericStreamingChunk
|
||||
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||
|
||||
from .base import BaseLLM
|
||||
|
@ -168,6 +192,287 @@ class AnthropicConfig:
|
|||
optional_params["top_p"] = value
|
||||
return optional_params
|
||||
|
||||
### FOR [BETA] `/v1/messages` endpoint support
|
||||
|
||||
def translatable_anthropic_params(self) -> List:
|
||||
"""
|
||||
Which anthropic params, we need to translate to the openai format.
|
||||
"""
|
||||
return ["messages", "metadata", "system", "tool_choice", "tools"]
|
||||
|
||||
def translate_anthropic_messages_to_openai(
|
||||
self,
|
||||
messages: List[
|
||||
Union[
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
]
|
||||
],
|
||||
) -> List:
|
||||
new_messages: List[AllMessageValues] = []
|
||||
for m in messages:
|
||||
user_message: Optional[ChatCompletionUserMessage] = None
|
||||
tool_message_list: List[ChatCompletionToolMessage] = []
|
||||
## USER MESSAGE ##
|
||||
if m["role"] == "user":
|
||||
## translate user message
|
||||
if isinstance(m["content"], str):
|
||||
user_message = ChatCompletionUserMessage(
|
||||
role="user", content=m["content"]
|
||||
)
|
||||
elif isinstance(m["content"], list):
|
||||
new_user_content_list: List[
|
||||
Union[ChatCompletionTextObject, ChatCompletionImageObject]
|
||||
] = []
|
||||
for content in m["content"]:
|
||||
if content["type"] == "text":
|
||||
text_obj = ChatCompletionTextObject(
|
||||
type="text", text=content["text"]
|
||||
)
|
||||
new_user_content_list.append(text_obj)
|
||||
elif content["type"] == "image":
|
||||
image_url = ChatCompletionImageUrlObject(
|
||||
url=f"data:{content['type']};base64,{content['source']}"
|
||||
)
|
||||
image_obj = ChatCompletionImageObject(
|
||||
type="image_url", image_url=image_url
|
||||
)
|
||||
|
||||
new_user_content_list.append(image_obj)
|
||||
elif content["type"] == "tool_result":
|
||||
if "content" not in content:
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content="",
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif isinstance(content["content"], str):
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=content["content"],
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif isinstance(content["content"], list):
|
||||
for c in content["content"]:
|
||||
if c["type"] == "text":
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=c["text"],
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif c["type"] == "image":
|
||||
image_str = (
|
||||
f"data:{c['type']};base64,{c['source']}"
|
||||
)
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=image_str,
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
|
||||
if user_message is not None:
|
||||
new_messages.append(user_message)
|
||||
|
||||
if len(tool_message_list) > 0:
|
||||
new_messages.extend(tool_message_list)
|
||||
|
||||
## ASSISTANT MESSAGE ##
|
||||
assistant_message_str: Optional[str] = None
|
||||
tool_calls: List[ChatCompletionAssistantToolCall] = []
|
||||
if m["role"] == "assistant":
|
||||
if isinstance(m["content"], str):
|
||||
assistant_message_str = m["content"]
|
||||
elif isinstance(m["content"], list):
|
||||
for content in m["content"]:
|
||||
if content["type"] == "text":
|
||||
if assistant_message_str is None:
|
||||
assistant_message_str = content["text"]
|
||||
else:
|
||||
assistant_message_str += content["text"]
|
||||
elif content["type"] == "tool_use":
|
||||
function_chunk = ChatCompletionToolCallFunctionChunk(
|
||||
name=content["name"],
|
||||
arguments=json.dumps(content["input"]),
|
||||
)
|
||||
|
||||
tool_calls.append(
|
||||
ChatCompletionAssistantToolCall(
|
||||
id=content["id"],
|
||||
type="function",
|
||||
function=function_chunk,
|
||||
)
|
||||
)
|
||||
|
||||
if assistant_message_str is not None or len(tool_calls) > 0:
|
||||
assistant_message = ChatCompletionAssistantMessage(
|
||||
role="assistant",
|
||||
content=assistant_message_str,
|
||||
)
|
||||
if len(tool_calls) > 0:
|
||||
assistant_message["tool_calls"] = tool_calls
|
||||
new_messages.append(assistant_message)
|
||||
|
||||
return new_messages
|
||||
|
||||
def translate_anthropic_tool_choice_to_openai(
|
||||
self, tool_choice: AnthropicMessagesToolChoice
|
||||
) -> ChatCompletionToolChoiceValues:
|
||||
if tool_choice["type"] == "any":
|
||||
return "required"
|
||||
elif tool_choice["type"] == "auto":
|
||||
return "auto"
|
||||
elif tool_choice["type"] == "tool":
|
||||
tc_function_param = ChatCompletionToolChoiceFunctionParam(
|
||||
name=tool_choice.get("name", "")
|
||||
)
|
||||
return ChatCompletionToolChoiceObjectParam(
|
||||
type="function", function=tc_function_param
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Incompatible tool choice param submitted - {}".format(tool_choice)
|
||||
)
|
||||
|
||||
def translate_anthropic_tools_to_openai(
|
||||
self, tools: List[AnthropicMessagesTool]
|
||||
) -> List[ChatCompletionToolParam]:
|
||||
new_tools: List[ChatCompletionToolParam] = []
|
||||
for tool in tools:
|
||||
function_chunk = ChatCompletionToolParamFunctionChunk(
|
||||
name=tool["name"],
|
||||
parameters=tool["input_schema"],
|
||||
)
|
||||
if "description" in tool:
|
||||
function_chunk["description"] = tool["description"]
|
||||
new_tools.append(
|
||||
ChatCompletionToolParam(type="function", function=function_chunk)
|
||||
)
|
||||
|
||||
return new_tools
|
||||
|
||||
def translate_anthropic_to_openai(
|
||||
self, anthropic_message_request: AnthropicMessagesRequest
|
||||
) -> ChatCompletionRequest:
|
||||
"""
|
||||
This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
|
||||
"""
|
||||
new_messages: List[AllMessageValues] = []
|
||||
|
||||
## CONVERT ANTHROPIC MESSAGES TO OPENAI
|
||||
new_messages = self.translate_anthropic_messages_to_openai(
|
||||
messages=anthropic_message_request["messages"]
|
||||
)
|
||||
## ADD SYSTEM MESSAGE TO MESSAGES
|
||||
if "system" in anthropic_message_request:
|
||||
new_messages.insert(
|
||||
0,
|
||||
ChatCompletionSystemMessage(
|
||||
role="system", content=anthropic_message_request["system"]
|
||||
),
|
||||
)
|
||||
|
||||
new_kwargs: ChatCompletionRequest = {
|
||||
"model": anthropic_message_request["model"],
|
||||
"messages": new_messages,
|
||||
}
|
||||
## CONVERT METADATA (user_id)
|
||||
if "metadata" in anthropic_message_request:
|
||||
if "user_id" in anthropic_message_request["metadata"]:
|
||||
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
||||
|
||||
## CONVERT TOOL CHOICE
|
||||
if "tool_choice" in anthropic_message_request:
|
||||
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
||||
tool_choice=anthropic_message_request["tool_choice"]
|
||||
)
|
||||
## CONVERT TOOLS
|
||||
if "tools" in anthropic_message_request:
|
||||
new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
|
||||
tools=anthropic_message_request["tools"]
|
||||
)
|
||||
|
||||
translatable_params = self.translatable_anthropic_params()
|
||||
for k, v in anthropic_message_request.items():
|
||||
if k not in translatable_params: # pass remaining params as is
|
||||
new_kwargs[k] = v # type: ignore
|
||||
|
||||
return new_kwargs
|
||||
|
||||
def _translate_openai_content_to_anthropic(
|
||||
self, choices: List[Choices]
|
||||
) -> List[
|
||||
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
|
||||
]:
|
||||
new_content: List[
|
||||
Union[
|
||||
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
|
||||
]
|
||||
] = []
|
||||
for choice in choices:
|
||||
if (
|
||||
choice.message.tool_calls is not None
|
||||
and len(choice.message.tool_calls) > 0
|
||||
):
|
||||
for tool_call in choice.message.tool_calls:
|
||||
new_content.append(
|
||||
AnthropicResponseContentBlockToolUse(
|
||||
type="tool_use",
|
||||
id=tool_call.id,
|
||||
name=tool_call.function.name or "",
|
||||
input=json.loads(tool_call.function.arguments),
|
||||
)
|
||||
)
|
||||
elif choice.message.content is not None:
|
||||
new_content.append(
|
||||
AnthropicResponseContentBlockText(
|
||||
type="text", text=choice.message.content
|
||||
)
|
||||
)
|
||||
|
||||
return new_content
|
||||
|
||||
def _translate_openai_finish_reason_to_anthropic(
|
||||
self, openai_finish_reason: str
|
||||
) -> AnthropicFinishReason:
|
||||
if openai_finish_reason == "stop":
|
||||
return "end_turn"
|
||||
elif openai_finish_reason == "length":
|
||||
return "max_tokens"
|
||||
elif openai_finish_reason == "tool_calls":
|
||||
return "tool_use"
|
||||
return "end_turn"
|
||||
|
||||
def translate_openai_response_to_anthropic(
|
||||
self, response: litellm.ModelResponse
|
||||
) -> AnthropicResponse:
|
||||
## translate content block
|
||||
anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore
|
||||
## extract finish reason
|
||||
anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
|
||||
openai_finish_reason=response.choices[0].finish_reason # type: ignore
|
||||
)
|
||||
# extract usage
|
||||
usage: litellm.Usage = getattr(response, "usage")
|
||||
anthropic_usage = AnthropicResponseUsageBlock(
|
||||
input_tokens=usage.prompt_tokens, output_tokens=usage.completion_tokens
|
||||
)
|
||||
translated_obj = AnthropicResponse(
|
||||
id=response.id,
|
||||
type="message",
|
||||
role="assistant",
|
||||
model=response.model or "unknown-model",
|
||||
stop_sequence=None,
|
||||
usage=anthropic_usage,
|
||||
content=anthropic_content,
|
||||
stop_reason=anthropic_finish_reason,
|
||||
)
|
||||
|
||||
return translated_obj
|
||||
|
||||
|
||||
# makes headers for API call
|
||||
def validate_environment(api_key, user_headers):
|
||||
|
@ -231,121 +536,6 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
# def process_streaming_response(
|
||||
# self,
|
||||
# model: str,
|
||||
# response: Union[requests.Response, httpx.Response],
|
||||
# model_response: ModelResponse,
|
||||
# stream: bool,
|
||||
# logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
|
||||
# optional_params: dict,
|
||||
# api_key: str,
|
||||
# data: Union[dict, str],
|
||||
# messages: List,
|
||||
# print_verbose,
|
||||
# encoding,
|
||||
# ) -> CustomStreamWrapper:
|
||||
# """
|
||||
# Return stream object for tool-calling + streaming
|
||||
# """
|
||||
# ## LOGGING
|
||||
# logging_obj.post_call(
|
||||
# input=messages,
|
||||
# api_key=api_key,
|
||||
# original_response=response.text,
|
||||
# additional_args={"complete_input_dict": data},
|
||||
# )
|
||||
# print_verbose(f"raw model_response: {response.text}")
|
||||
# ## RESPONSE OBJECT
|
||||
# try:
|
||||
# completion_response = response.json()
|
||||
# except:
|
||||
# raise AnthropicError(
|
||||
# message=response.text, status_code=response.status_code
|
||||
# )
|
||||
# text_content = ""
|
||||
# tool_calls = []
|
||||
# for content in completion_response["content"]:
|
||||
# if content["type"] == "text":
|
||||
# text_content += content["text"]
|
||||
# ## TOOL CALLING
|
||||
# elif content["type"] == "tool_use":
|
||||
# tool_calls.append(
|
||||
# {
|
||||
# "id": content["id"],
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": content["name"],
|
||||
# "arguments": json.dumps(content["input"]),
|
||||
# },
|
||||
# }
|
||||
# )
|
||||
# if "error" in completion_response:
|
||||
# raise AnthropicError(
|
||||
# message=str(completion_response["error"]),
|
||||
# status_code=response.status_code,
|
||||
# )
|
||||
# _message = litellm.Message(
|
||||
# tool_calls=tool_calls,
|
||||
# content=text_content or None,
|
||||
# )
|
||||
# model_response.choices[0].message = _message # type: ignore
|
||||
# model_response._hidden_params["original_response"] = completion_response[
|
||||
# "content"
|
||||
# ] # allow user to access raw anthropic tool calling response
|
||||
|
||||
# model_response.choices[0].finish_reason = map_finish_reason(
|
||||
# completion_response["stop_reason"]
|
||||
# )
|
||||
|
||||
# print_verbose("INSIDE ANTHROPIC STREAMING TOOL CALLING CONDITION BLOCK")
|
||||
# # return an iterator
|
||||
# streaming_model_response = ModelResponse(stream=True)
|
||||
# streaming_model_response.choices[0].finish_reason = model_response.choices[ # type: ignore
|
||||
# 0
|
||||
# ].finish_reason
|
||||
# # streaming_model_response.choices = [litellm.utils.StreamingChoices()]
|
||||
# streaming_choice = litellm.utils.StreamingChoices()
|
||||
# streaming_choice.index = model_response.choices[0].index
|
||||
# _tool_calls = []
|
||||
# print_verbose(
|
||||
# f"type of model_response.choices[0]: {type(model_response.choices[0])}"
|
||||
# )
|
||||
# print_verbose(f"type of streaming_choice: {type(streaming_choice)}")
|
||||
# if isinstance(model_response.choices[0], litellm.Choices):
|
||||
# if getattr(
|
||||
# model_response.choices[0].message, "tool_calls", None
|
||||
# ) is not None and isinstance(
|
||||
# model_response.choices[0].message.tool_calls, list
|
||||
# ):
|
||||
# for tool_call in model_response.choices[0].message.tool_calls:
|
||||
# _tool_call = {**tool_call.dict(), "index": 0}
|
||||
# _tool_calls.append(_tool_call)
|
||||
# delta_obj = litellm.utils.Delta(
|
||||
# content=getattr(model_response.choices[0].message, "content", None),
|
||||
# role=model_response.choices[0].message.role,
|
||||
# tool_calls=_tool_calls,
|
||||
# )
|
||||
# streaming_choice.delta = delta_obj
|
||||
# streaming_model_response.choices = [streaming_choice]
|
||||
# completion_stream = ModelResponseIterator(
|
||||
# model_response=streaming_model_response
|
||||
# )
|
||||
# print_verbose(
|
||||
# "Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
|
||||
# )
|
||||
# return CustomStreamWrapper(
|
||||
# completion_stream=completion_stream,
|
||||
# model=model,
|
||||
# custom_llm_provider="cached_response",
|
||||
# logging_obj=logging_obj,
|
||||
# )
|
||||
# else:
|
||||
# raise AnthropicError(
|
||||
# status_code=422,
|
||||
# message="Unprocessable response object - {}".format(response.text),
|
||||
# )
|
||||
|
||||
def process_response(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -417,8 +607,8 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
completion_tokens = completion_response["usage"]["output_tokens"]
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,15 +1,19 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import os
|
||||
import time
|
||||
import types
|
||||
from enum import Enum
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
import httpx
|
||||
from .base import BaseLLM
|
||||
import requests
|
||||
|
||||
import litellm
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class AnthropicConstants(Enum):
|
||||
|
@ -117,9 +121,9 @@ class AnthropicTextCompletion(BaseLLM):
|
|||
)
|
||||
else:
|
||||
if len(completion_response["completion"]) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = (
|
||||
completion_response["completion"]
|
||||
)
|
||||
model_response.choices[0].message.content = completion_response[ # type: ignore
|
||||
"completion"
|
||||
]
|
||||
model_response.choices[0].finish_reason = completion_response["stop_reason"]
|
||||
|
||||
## CALCULATING USAGE
|
||||
|
@ -130,8 +134,8 @@ class AnthropicTextCompletion(BaseLLM):
|
|||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
) ##[TODO] use the anthropic tokenizer here
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
import os
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import os
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import Callable
|
||||
|
||||
import requests # type: ignore
|
||||
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
|
||||
|
||||
|
@ -106,28 +108,32 @@ def completion(
|
|||
and "data" in completion_response["model_output"]
|
||||
and isinstance(completion_response["model_output"]["data"], list)
|
||||
):
|
||||
model_response["choices"][0]["message"]["content"] = (
|
||||
completion_response["model_output"]["data"][0]
|
||||
)
|
||||
model_response.choices[0].message.content = completion_response[ # type: ignore
|
||||
"model_output"
|
||||
][
|
||||
"data"
|
||||
][
|
||||
0
|
||||
]
|
||||
elif isinstance(completion_response["model_output"], str):
|
||||
model_response["choices"][0]["message"]["content"] = (
|
||||
completion_response["model_output"]
|
||||
)
|
||||
model_response.choices[0].message.content = completion_response[ # type: ignore
|
||||
"model_output"
|
||||
]
|
||||
elif "completion" in completion_response and isinstance(
|
||||
completion_response["completion"], str
|
||||
):
|
||||
model_response["choices"][0]["message"]["content"] = (
|
||||
completion_response["completion"]
|
||||
)
|
||||
model_response.choices[0].message.content = completion_response[ # type: ignore
|
||||
"completion"
|
||||
]
|
||||
elif isinstance(completion_response, list) and len(completion_response) > 0:
|
||||
if "generated_text" not in completion_response:
|
||||
raise BasetenError(
|
||||
message=f"Unable to parse response. Original response: {response.text}",
|
||||
status_code=response.status_code,
|
||||
)
|
||||
model_response["choices"][0]["message"]["content"] = (
|
||||
completion_response[0]["generated_text"]
|
||||
)
|
||||
model_response.choices[0].message.content = completion_response[0][ # type: ignore
|
||||
"generated_text"
|
||||
]
|
||||
## GETTING LOGPROBS
|
||||
if (
|
||||
"details" in completion_response[0]
|
||||
|
@ -139,7 +145,7 @@ def completion(
|
|||
sum_logprob = 0
|
||||
for token in completion_response[0]["details"]["tokens"]:
|
||||
sum_logprob += token["logprob"]
|
||||
model_response["choices"][0]["message"]._logprobs = sum_logprob
|
||||
model_response.choices[0].logprobs = sum_logprob
|
||||
else:
|
||||
raise BasetenError(
|
||||
message=f"Unable to parse response. Original response: {response.text}",
|
||||
|
@ -152,8 +158,8 @@ def completion(
|
|||
encoding.encode(model_response["choices"][0]["message"]["content"])
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1122,7 +1122,7 @@ def completion(
|
|||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
model_response["finish_reason"] = map_finish_reason(
|
||||
model_response.choices[0].finish_reason = map_finish_reason(
|
||||
response_body["stop_reason"]
|
||||
)
|
||||
_usage = litellm.Usage(
|
||||
|
@ -1134,14 +1134,16 @@ def completion(
|
|||
setattr(model_response, "usage", _usage)
|
||||
else:
|
||||
outputText = response_body["completion"]
|
||||
model_response["finish_reason"] = response_body["stop_reason"]
|
||||
model_response.choices[0].finish_reason = response_body["stop_reason"]
|
||||
elif provider == "cohere":
|
||||
outputText = response_body["generations"][0]["text"]
|
||||
elif provider == "meta":
|
||||
outputText = response_body["generation"]
|
||||
elif provider == "mistral":
|
||||
outputText = response_body["outputs"][0]["text"]
|
||||
model_response["finish_reason"] = response_body["outputs"][0]["stop_reason"]
|
||||
model_response.choices[0].finish_reason = response_body["outputs"][0][
|
||||
"stop_reason"
|
||||
]
|
||||
else: # amazon titan
|
||||
outputText = response_body.get("results")[0].get("outputText")
|
||||
|
||||
|
@ -1160,7 +1162,7 @@ def completion(
|
|||
and getattr(model_response.choices[0].message, "tool_calls", None)
|
||||
is None
|
||||
):
|
||||
model_response["choices"][0]["message"]["content"] = outputText
|
||||
model_response.choices[0].message.content = outputText
|
||||
elif (
|
||||
hasattr(model_response.choices[0], "message")
|
||||
and getattr(model_response.choices[0].message, "tool_calls", None)
|
||||
|
@ -1199,8 +1201,8 @@ def completion(
|
|||
)
|
||||
setattr(model_response, "usage", usage)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
|
||||
model_response._hidden_params["region_name"] = client.meta.region_name
|
||||
print_verbose(f"model_response._hidden_params: {model_response._hidden_params}")
|
||||
|
@ -1323,9 +1325,9 @@ def _embedding_func_single(
|
|||
def embedding(
|
||||
model: str,
|
||||
input: Union[list, str],
|
||||
model_response: litellm.EmbeddingResponse,
|
||||
api_key: Optional[str] = None,
|
||||
logging_obj=None,
|
||||
model_response=None,
|
||||
optional_params=None,
|
||||
encoding=None,
|
||||
):
|
||||
|
@ -1391,9 +1393,9 @@ def embedding(
|
|||
"embedding": embedding,
|
||||
}
|
||||
)
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
model_response.object = "list"
|
||||
model_response.data = embedding_response
|
||||
model_response.model = model
|
||||
input_tokens = 0
|
||||
|
||||
input_str = "".join(input)
|
||||
|
|
|
@ -521,7 +521,7 @@ class BedrockLLM(BaseLLM):
|
|||
outputText = completion_response["text"] # type: ignore
|
||||
elif "generations" in completion_response:
|
||||
outputText = completion_response["generations"][0]["text"]
|
||||
model_response["finish_reason"] = map_finish_reason(
|
||||
model_response.choices[0].finish_reason = map_finish_reason(
|
||||
completion_response["generations"][0]["finish_reason"]
|
||||
)
|
||||
elif provider == "anthropic":
|
||||
|
@ -625,7 +625,7 @@ class BedrockLLM(BaseLLM):
|
|||
logging_obj=logging_obj,
|
||||
)
|
||||
|
||||
model_response["finish_reason"] = map_finish_reason(
|
||||
model_response.choices[0].finish_reason = map_finish_reason(
|
||||
completion_response.get("stop_reason", "")
|
||||
)
|
||||
_usage = litellm.Usage(
|
||||
|
@ -638,7 +638,9 @@ class BedrockLLM(BaseLLM):
|
|||
else:
|
||||
outputText = completion_response["completion"]
|
||||
|
||||
model_response["finish_reason"] = completion_response["stop_reason"]
|
||||
model_response.choices[0].finish_reason = completion_response[
|
||||
"stop_reason"
|
||||
]
|
||||
elif provider == "ai21":
|
||||
outputText = (
|
||||
completion_response.get("completions")[0].get("data").get("text")
|
||||
|
@ -647,9 +649,9 @@ class BedrockLLM(BaseLLM):
|
|||
outputText = completion_response["generation"]
|
||||
elif provider == "mistral":
|
||||
outputText = completion_response["outputs"][0]["text"]
|
||||
model_response["finish_reason"] = completion_response["outputs"][0][
|
||||
"stop_reason"
|
||||
]
|
||||
model_response.choices[0].finish_reason = completion_response[
|
||||
"outputs"
|
||||
][0]["stop_reason"]
|
||||
else: # amazon titan
|
||||
outputText = completion_response.get("results")[0].get("outputText")
|
||||
except Exception as e:
|
||||
|
@ -667,7 +669,7 @@ class BedrockLLM(BaseLLM):
|
|||
and getattr(model_response.choices[0].message, "tool_calls", None)
|
||||
is None
|
||||
):
|
||||
model_response["choices"][0]["message"]["content"] = outputText
|
||||
model_response.choices[0].message.content = outputText
|
||||
elif (
|
||||
hasattr(model_response.choices[0], "message")
|
||||
and getattr(model_response.choices[0].message, "tool_calls", None)
|
||||
|
@ -723,8 +725,8 @@ class BedrockLLM(BaseLLM):
|
|||
)
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -1066,7 +1068,7 @@ class BedrockLLM(BaseLLM):
|
|||
|
||||
if response.status_code != 200:
|
||||
raise BedrockError(
|
||||
status_code=response.status_code, message=response.text
|
||||
status_code=response.status_code, message=response.read()
|
||||
)
|
||||
|
||||
decoder = AWSEventStreamDecoder(model=model)
|
||||
|
@ -1446,8 +1448,8 @@ class BedrockConverseLLM(BaseLLM):
|
|||
message=litellm.Message(**chat_completion_message),
|
||||
)
|
||||
]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens,
|
||||
completion_tokens=output_tokens,
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
import os, types, traceback
|
||||
import json
|
||||
import requests
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
import types
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage, Choices, Message, CustomStreamWrapper
|
||||
import litellm
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
import litellm
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
|
||||
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class ClarifaiError(Exception):
|
||||
|
@ -87,7 +92,14 @@ def completions_to_model(payload):
|
|||
|
||||
|
||||
def process_response(
|
||||
model, prompt, response, model_response, api_key, data, encoding, logging_obj
|
||||
model,
|
||||
prompt,
|
||||
response,
|
||||
model_response: litellm.ModelResponse,
|
||||
api_key,
|
||||
data,
|
||||
encoding,
|
||||
logging_obj,
|
||||
):
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
|
@ -116,7 +128,7 @@ def process_response(
|
|||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
model_response.choices = choices_list # type: ignore
|
||||
|
||||
except Exception as e:
|
||||
raise ClarifaiError(
|
||||
|
@ -128,11 +140,15 @@ def process_response(
|
|||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content"))
|
||||
)
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
model_response.model = model
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
@ -202,7 +218,7 @@ async def async_completion(
|
|||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
model_response.choices = choices_list # type: ignore
|
||||
|
||||
except Exception as e:
|
||||
raise ClarifaiError(
|
||||
|
@ -214,11 +230,15 @@ async def async_completion(
|
|||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content"))
|
||||
)
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
model_response.model = model
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
|
|
@ -1,13 +1,17 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import os
|
||||
import time
|
||||
import types
|
||||
from enum import Enum
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class CloudflareError(Exception):
|
||||
|
@ -147,9 +151,9 @@ def completion(
|
|||
)
|
||||
completion_response = response.json()
|
||||
|
||||
model_response["choices"][0]["message"]["content"] = completion_response[
|
||||
"result"
|
||||
]["response"]
|
||||
model_response.choices[0].message.content = completion_response["result"][ # type: ignore
|
||||
"response"
|
||||
]
|
||||
|
||||
## CALCULATING USAGE
|
||||
print_verbose(
|
||||
|
@ -160,8 +164,8 @@ def completion(
|
|||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "cloudflare/" + model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = "cloudflare/" + model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,12 +1,16 @@
|
|||
import os, types
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
import types
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import time, traceback
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
import litellm
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||
|
||||
|
||||
class CohereError(Exception):
|
||||
|
@ -117,7 +121,7 @@ class CohereConfig:
|
|||
|
||||
def validate_environment(api_key):
|
||||
headers = {
|
||||
"Request-Source":"unspecified:litellm",
|
||||
"Request-Source": "unspecified:litellm",
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
}
|
||||
|
@ -219,7 +223,7 @@ def completion(
|
|||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
model_response.choices = choices_list # type: ignore
|
||||
except Exception as e:
|
||||
raise CohereError(
|
||||
message=response.text, status_code=response.status_code
|
||||
|
@ -231,8 +235,8 @@ def completion(
|
|||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -245,9 +249,9 @@ def completion(
|
|||
def embedding(
|
||||
model: str,
|
||||
input: list,
|
||||
model_response: litellm.EmbeddingResponse,
|
||||
api_key: Optional[str] = None,
|
||||
logging_obj=None,
|
||||
model_response=None,
|
||||
encoding=None,
|
||||
optional_params=None,
|
||||
):
|
||||
|
@ -294,14 +298,18 @@ def embedding(
|
|||
output_data.append(
|
||||
{"object": "embedding", "index": idx, "embedding": embedding}
|
||||
)
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = output_data
|
||||
model_response["model"] = model
|
||||
model_response.object = "list"
|
||||
model_response.data = output_data
|
||||
model_response.model = model
|
||||
input_tokens = 0
|
||||
for text in input:
|
||||
input_tokens += len(encoding.encode(text))
|
||||
|
||||
model_response["usage"] = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
|
|
@ -305,8 +305,8 @@ def completion(
|
|||
prompt_tokens = billed_units.get("input_tokens", 0)
|
||||
completion_tokens = billed_units.get("output_tokens", 0)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,26 +1,26 @@
|
|||
# What is this?
|
||||
## Handler file for databricks API https://docs.databricks.com/en/machine-learning/foundation-models/api-reference.html#chat-request
|
||||
from functools import partial
|
||||
import os, types
|
||||
import copy
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests, copy # type: ignore
|
||||
import os
|
||||
import time
|
||||
from typing import Callable, Optional, List, Union, Tuple, Literal
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
Usage,
|
||||
CustomStreamWrapper,
|
||||
EmbeddingResponse,
|
||||
)
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from .base import BaseLLM
|
||||
import types
|
||||
from enum import Enum
|
||||
from functools import partial
|
||||
from typing import Callable, List, Literal, Optional, Tuple, Union
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.types.llms.databricks import GenericStreamingChunk
|
||||
from litellm.types.utils import ProviderField
|
||||
from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage
|
||||
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class DatabricksError(Exception):
|
||||
|
@ -354,8 +354,8 @@ class DatabricksChatCompletion(BaseLLM):
|
|||
completion_tokens = completion_response["usage"]["output_tokens"]
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
####################################
|
||||
######### DEPRECATED FILE ##########
|
||||
####################################
|
||||
# logic moved to `vertex_httpx.py` #
|
||||
# ####################################
|
||||
# ######### DEPRECATED FILE ##########
|
||||
# ####################################
|
||||
# # logic moved to `vertex_httpx.py` #
|
||||
|
||||
import copy
|
||||
import time
|
||||
|
@ -92,332 +92,332 @@ class GeminiConfig:
|
|||
}
|
||||
|
||||
|
||||
class TextStreamer:
|
||||
"""
|
||||
A class designed to return an async stream from AsyncGenerateContentResponse object.
|
||||
"""
|
||||
# class TextStreamer:
|
||||
# """
|
||||
# A class designed to return an async stream from AsyncGenerateContentResponse object.
|
||||
# """
|
||||
|
||||
def __init__(self, response):
|
||||
self.response = response
|
||||
self._aiter = self.response.__aiter__()
|
||||
# def __init__(self, response):
|
||||
# self.response = response
|
||||
# self._aiter = self.response.__aiter__()
|
||||
|
||||
async def __aiter__(self):
|
||||
while True:
|
||||
try:
|
||||
# This will manually advance the async iterator.
|
||||
# In the case the next object doesn't exists, __anext__() will simply raise a StopAsyncIteration exception
|
||||
next_object = await self._aiter.__anext__()
|
||||
yield next_object
|
||||
except StopAsyncIteration:
|
||||
# After getting all items from the async iterator, stop iterating
|
||||
break
|
||||
# async def __aiter__(self):
|
||||
# while True:
|
||||
# try:
|
||||
# # This will manually advance the async iterator.
|
||||
# # In the case the next object doesn't exists, __anext__() will simply raise a StopAsyncIteration exception
|
||||
# next_object = await self._aiter.__anext__()
|
||||
# yield next_object
|
||||
# except StopAsyncIteration:
|
||||
# # After getting all items from the async iterator, stop iterating
|
||||
# break
|
||||
|
||||
|
||||
def supports_system_instruction():
|
||||
import google.generativeai as genai
|
||||
# def supports_system_instruction():
|
||||
# import google.generativeai as genai
|
||||
|
||||
gemini_pkg_version = Version(genai.__version__)
|
||||
return gemini_pkg_version >= Version("0.5.0")
|
||||
# gemini_pkg_version = Version(genai.__version__)
|
||||
# return gemini_pkg_version >= Version("0.5.0")
|
||||
|
||||
|
||||
def completion(
|
||||
model: str,
|
||||
messages: list,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
api_key,
|
||||
encoding,
|
||||
logging_obj,
|
||||
custom_prompt_dict: dict,
|
||||
acompletion: bool = False,
|
||||
optional_params=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
):
|
||||
try:
|
||||
import google.generativeai as genai # type: ignore
|
||||
except:
|
||||
raise Exception(
|
||||
"Importing google.generativeai failed, please run 'pip install -q google-generativeai"
|
||||
)
|
||||
genai.configure(api_key=api_key)
|
||||
system_prompt = ""
|
||||
if model in custom_prompt_dict:
|
||||
# check if the model has a registered custom prompt
|
||||
model_prompt_details = custom_prompt_dict[model]
|
||||
prompt = custom_prompt(
|
||||
role_dict=model_prompt_details["roles"],
|
||||
initial_prompt_value=model_prompt_details["initial_prompt_value"],
|
||||
final_prompt_value=model_prompt_details["final_prompt_value"],
|
||||
messages=messages,
|
||||
)
|
||||
else:
|
||||
system_prompt, messages = get_system_prompt(messages=messages)
|
||||
prompt = prompt_factory(
|
||||
model=model, messages=messages, custom_llm_provider="gemini"
|
||||
)
|
||||
# def completion(
|
||||
# model: str,
|
||||
# messages: list,
|
||||
# model_response: ModelResponse,
|
||||
# print_verbose: Callable,
|
||||
# api_key,
|
||||
# encoding,
|
||||
# logging_obj,
|
||||
# custom_prompt_dict: dict,
|
||||
# acompletion: bool = False,
|
||||
# optional_params=None,
|
||||
# litellm_params=None,
|
||||
# logger_fn=None,
|
||||
# ):
|
||||
# try:
|
||||
# import google.generativeai as genai # type: ignore
|
||||
# except:
|
||||
# raise Exception(
|
||||
# "Importing google.generativeai failed, please run 'pip install -q google-generativeai"
|
||||
# )
|
||||
# genai.configure(api_key=api_key)
|
||||
# system_prompt = ""
|
||||
# if model in custom_prompt_dict:
|
||||
# # check if the model has a registered custom prompt
|
||||
# model_prompt_details = custom_prompt_dict[model]
|
||||
# prompt = custom_prompt(
|
||||
# role_dict=model_prompt_details["roles"],
|
||||
# initial_prompt_value=model_prompt_details["initial_prompt_value"],
|
||||
# final_prompt_value=model_prompt_details["final_prompt_value"],
|
||||
# messages=messages,
|
||||
# )
|
||||
# else:
|
||||
# system_prompt, messages = get_system_prompt(messages=messages)
|
||||
# prompt = prompt_factory(
|
||||
# model=model, messages=messages, custom_llm_provider="gemini"
|
||||
# )
|
||||
|
||||
## Load Config
|
||||
inference_params = copy.deepcopy(optional_params)
|
||||
stream = inference_params.pop("stream", None)
|
||||
# ## Load Config
|
||||
# inference_params = copy.deepcopy(optional_params)
|
||||
# stream = inference_params.pop("stream", None)
|
||||
|
||||
# Handle safety settings
|
||||
safety_settings_param = inference_params.pop("safety_settings", None)
|
||||
safety_settings = None
|
||||
if safety_settings_param:
|
||||
safety_settings = [
|
||||
genai.types.SafetySettingDict(x) for x in safety_settings_param
|
||||
]
|
||||
# # Handle safety settings
|
||||
# safety_settings_param = inference_params.pop("safety_settings", None)
|
||||
# safety_settings = None
|
||||
# if safety_settings_param:
|
||||
# safety_settings = [
|
||||
# genai.types.SafetySettingDict(x) for x in safety_settings_param
|
||||
# ]
|
||||
|
||||
config = litellm.GeminiConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if (
|
||||
k not in inference_params
|
||||
): # completion(top_k=3) > gemini_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
inference_params[k] = v
|
||||
# config = litellm.GeminiConfig.get_config()
|
||||
# for k, v in config.items():
|
||||
# if (
|
||||
# k not in inference_params
|
||||
# ): # completion(top_k=3) > gemini_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
# inference_params[k] = v
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": {
|
||||
"inference_params": inference_params,
|
||||
"system_prompt": system_prompt,
|
||||
}
|
||||
},
|
||||
)
|
||||
## COMPLETION CALL
|
||||
try:
|
||||
_params = {"model_name": "models/{}".format(model)}
|
||||
_system_instruction = supports_system_instruction()
|
||||
if _system_instruction and len(system_prompt) > 0:
|
||||
_params["system_instruction"] = system_prompt
|
||||
_model = genai.GenerativeModel(**_params)
|
||||
if stream is True:
|
||||
if acompletion is True:
|
||||
# ## LOGGING
|
||||
# logging_obj.pre_call(
|
||||
# input=prompt,
|
||||
# api_key="",
|
||||
# additional_args={
|
||||
# "complete_input_dict": {
|
||||
# "inference_params": inference_params,
|
||||
# "system_prompt": system_prompt,
|
||||
# }
|
||||
# },
|
||||
# )
|
||||
# ## COMPLETION CALL
|
||||
# try:
|
||||
# _params = {"model_name": "models/{}".format(model)}
|
||||
# _system_instruction = supports_system_instruction()
|
||||
# if _system_instruction and len(system_prompt) > 0:
|
||||
# _params["system_instruction"] = system_prompt
|
||||
# _model = genai.GenerativeModel(**_params)
|
||||
# if stream is True:
|
||||
# if acompletion is True:
|
||||
|
||||
async def async_streaming():
|
||||
try:
|
||||
response = await _model.generate_content_async(
|
||||
contents=prompt,
|
||||
generation_config=genai.types.GenerationConfig(
|
||||
**inference_params
|
||||
),
|
||||
safety_settings=safety_settings,
|
||||
stream=True,
|
||||
)
|
||||
# async def async_streaming():
|
||||
# try:
|
||||
# response = await _model.generate_content_async(
|
||||
# contents=prompt,
|
||||
# generation_config=genai.types.GenerationConfig(
|
||||
# **inference_params
|
||||
# ),
|
||||
# safety_settings=safety_settings,
|
||||
# stream=True,
|
||||
# )
|
||||
|
||||
response = litellm.CustomStreamWrapper(
|
||||
TextStreamer(response),
|
||||
model,
|
||||
custom_llm_provider="gemini",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise GeminiError(status_code=500, message=str(e))
|
||||
# response = litellm.CustomStreamWrapper(
|
||||
# TextStreamer(response),
|
||||
# model,
|
||||
# custom_llm_provider="gemini",
|
||||
# logging_obj=logging_obj,
|
||||
# )
|
||||
# return response
|
||||
# except Exception as e:
|
||||
# raise GeminiError(status_code=500, message=str(e))
|
||||
|
||||
return async_streaming()
|
||||
response = _model.generate_content(
|
||||
contents=prompt,
|
||||
generation_config=genai.types.GenerationConfig(**inference_params),
|
||||
safety_settings=safety_settings,
|
||||
stream=True,
|
||||
)
|
||||
return response
|
||||
elif acompletion == True:
|
||||
return async_completion(
|
||||
_model=_model,
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
inference_params=inference_params,
|
||||
safety_settings=safety_settings,
|
||||
logging_obj=logging_obj,
|
||||
print_verbose=print_verbose,
|
||||
model_response=model_response,
|
||||
messages=messages,
|
||||
encoding=encoding,
|
||||
)
|
||||
else:
|
||||
params = {
|
||||
"contents": prompt,
|
||||
"generation_config": genai.types.GenerationConfig(**inference_params),
|
||||
"safety_settings": safety_settings,
|
||||
}
|
||||
response = _model.generate_content(**params)
|
||||
except Exception as e:
|
||||
raise GeminiError(
|
||||
message=str(e),
|
||||
status_code=500,
|
||||
)
|
||||
# return async_streaming()
|
||||
# response = _model.generate_content(
|
||||
# contents=prompt,
|
||||
# generation_config=genai.types.GenerationConfig(**inference_params),
|
||||
# safety_settings=safety_settings,
|
||||
# stream=True,
|
||||
# )
|
||||
# return response
|
||||
# elif acompletion == True:
|
||||
# return async_completion(
|
||||
# _model=_model,
|
||||
# model=model,
|
||||
# prompt=prompt,
|
||||
# inference_params=inference_params,
|
||||
# safety_settings=safety_settings,
|
||||
# logging_obj=logging_obj,
|
||||
# print_verbose=print_verbose,
|
||||
# model_response=model_response,
|
||||
# messages=messages,
|
||||
# encoding=encoding,
|
||||
# )
|
||||
# else:
|
||||
# params = {
|
||||
# "contents": prompt,
|
||||
# "generation_config": genai.types.GenerationConfig(**inference_params),
|
||||
# "safety_settings": safety_settings,
|
||||
# }
|
||||
# response = _model.generate_content(**params)
|
||||
# except Exception as e:
|
||||
# raise GeminiError(
|
||||
# message=str(e),
|
||||
# status_code=500,
|
||||
# )
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
api_key="",
|
||||
original_response=response,
|
||||
additional_args={"complete_input_dict": {}},
|
||||
)
|
||||
print_verbose(f"raw model_response: {response}")
|
||||
## RESPONSE OBJECT
|
||||
completion_response = response
|
||||
try:
|
||||
choices_list = []
|
||||
for idx, item in enumerate(completion_response.candidates):
|
||||
if len(item.content.parts) > 0:
|
||||
message_obj = Message(content=item.content.parts[0].text)
|
||||
else:
|
||||
message_obj = Message(content=None)
|
||||
choice_obj = Choices(index=idx, message=message_obj)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
except Exception as e:
|
||||
verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
raise GeminiError(
|
||||
message=traceback.format_exc(), status_code=response.status_code
|
||||
)
|
||||
# ## LOGGING
|
||||
# logging_obj.post_call(
|
||||
# input=prompt,
|
||||
# api_key="",
|
||||
# original_response=response,
|
||||
# additional_args={"complete_input_dict": {}},
|
||||
# )
|
||||
# print_verbose(f"raw model_response: {response}")
|
||||
# ## RESPONSE OBJECT
|
||||
# completion_response = response
|
||||
# try:
|
||||
# choices_list = []
|
||||
# for idx, item in enumerate(completion_response.candidates):
|
||||
# if len(item.content.parts) > 0:
|
||||
# message_obj = Message(content=item.content.parts[0].text)
|
||||
# else:
|
||||
# message_obj = Message(content=None)
|
||||
# choice_obj = Choices(index=idx, message=message_obj)
|
||||
# choices_list.append(choice_obj)
|
||||
# model_response.choices = choices_list
|
||||
# except Exception as e:
|
||||
# verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
|
||||
# verbose_logger.debug(traceback.format_exc())
|
||||
# raise GeminiError(
|
||||
# message=traceback.format_exc(), status_code=response.status_code
|
||||
# )
|
||||
|
||||
try:
|
||||
completion_response = model_response["choices"][0]["message"].get("content")
|
||||
if completion_response is None:
|
||||
raise Exception
|
||||
except:
|
||||
original_response = f"response: {response}"
|
||||
if hasattr(response, "candidates"):
|
||||
original_response = f"response: {response.candidates}"
|
||||
if "SAFETY" in original_response:
|
||||
original_response += (
|
||||
"\nThe candidate content was flagged for safety reasons."
|
||||
)
|
||||
elif "RECITATION" in original_response:
|
||||
original_response += (
|
||||
"\nThe candidate content was flagged for recitation reasons."
|
||||
)
|
||||
raise GeminiError(
|
||||
status_code=400,
|
||||
message=f"No response received. Original response - {original_response}",
|
||||
)
|
||||
# try:
|
||||
# completion_response = model_response["choices"][0]["message"].get("content")
|
||||
# if completion_response is None:
|
||||
# raise Exception
|
||||
# except:
|
||||
# original_response = f"response: {response}"
|
||||
# if hasattr(response, "candidates"):
|
||||
# original_response = f"response: {response.candidates}"
|
||||
# if "SAFETY" in original_response:
|
||||
# original_response += (
|
||||
# "\nThe candidate content was flagged for safety reasons."
|
||||
# )
|
||||
# elif "RECITATION" in original_response:
|
||||
# original_response += (
|
||||
# "\nThe candidate content was flagged for recitation reasons."
|
||||
# )
|
||||
# raise GeminiError(
|
||||
# status_code=400,
|
||||
# message=f"No response received. Original response - {original_response}",
|
||||
# )
|
||||
|
||||
## CALCULATING USAGE
|
||||
prompt_str = ""
|
||||
for m in messages:
|
||||
if isinstance(m["content"], str):
|
||||
prompt_str += m["content"]
|
||||
elif isinstance(m["content"], list):
|
||||
for content in m["content"]:
|
||||
if content["type"] == "text":
|
||||
prompt_str += content["text"]
|
||||
prompt_tokens = len(encoding.encode(prompt_str))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
# ## CALCULATING USAGE
|
||||
# prompt_str = ""
|
||||
# for m in messages:
|
||||
# if isinstance(m["content"], str):
|
||||
# prompt_str += m["content"]
|
||||
# elif isinstance(m["content"], list):
|
||||
# for content in m["content"]:
|
||||
# if content["type"] == "text":
|
||||
# prompt_str += content["text"]
|
||||
# prompt_tokens = len(encoding.encode(prompt_str))
|
||||
# completion_tokens = len(
|
||||
# encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
# )
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "gemini/" + model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
setattr(model_response, "usage", usage)
|
||||
return model_response
|
||||
# model_response.created = int(time.time())
|
||||
# model_response.model = "gemini/" + model
|
||||
# usage = Usage(
|
||||
# prompt_tokens=prompt_tokens,
|
||||
# completion_tokens=completion_tokens,
|
||||
# total_tokens=prompt_tokens + completion_tokens,
|
||||
# )
|
||||
# setattr(model_response, "usage", usage)
|
||||
# return model_response
|
||||
|
||||
|
||||
async def async_completion(
|
||||
_model,
|
||||
model,
|
||||
prompt,
|
||||
inference_params,
|
||||
safety_settings,
|
||||
logging_obj,
|
||||
print_verbose,
|
||||
model_response,
|
||||
messages,
|
||||
encoding,
|
||||
):
|
||||
import google.generativeai as genai # type: ignore
|
||||
# async def async_completion(
|
||||
# _model,
|
||||
# model,
|
||||
# prompt,
|
||||
# inference_params,
|
||||
# safety_settings,
|
||||
# logging_obj,
|
||||
# print_verbose,
|
||||
# model_response,
|
||||
# messages,
|
||||
# encoding,
|
||||
# ):
|
||||
# import google.generativeai as genai # type: ignore
|
||||
|
||||
response = await _model.generate_content_async(
|
||||
contents=prompt,
|
||||
generation_config=genai.types.GenerationConfig(**inference_params),
|
||||
safety_settings=safety_settings,
|
||||
)
|
||||
# response = await _model.generate_content_async(
|
||||
# contents=prompt,
|
||||
# generation_config=genai.types.GenerationConfig(**inference_params),
|
||||
# safety_settings=safety_settings,
|
||||
# )
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
api_key="",
|
||||
original_response=response,
|
||||
additional_args={"complete_input_dict": {}},
|
||||
)
|
||||
print_verbose(f"raw model_response: {response}")
|
||||
## RESPONSE OBJECT
|
||||
completion_response = response
|
||||
try:
|
||||
choices_list = []
|
||||
for idx, item in enumerate(completion_response.candidates):
|
||||
if len(item.content.parts) > 0:
|
||||
message_obj = Message(content=item.content.parts[0].text)
|
||||
else:
|
||||
message_obj = Message(content=None)
|
||||
choice_obj = Choices(index=idx, message=message_obj)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
except Exception as e:
|
||||
verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
raise GeminiError(
|
||||
message=traceback.format_exc(), status_code=response.status_code
|
||||
)
|
||||
# ## LOGGING
|
||||
# logging_obj.post_call(
|
||||
# input=prompt,
|
||||
# api_key="",
|
||||
# original_response=response,
|
||||
# additional_args={"complete_input_dict": {}},
|
||||
# )
|
||||
# print_verbose(f"raw model_response: {response}")
|
||||
# ## RESPONSE OBJECT
|
||||
# completion_response = response
|
||||
# try:
|
||||
# choices_list = []
|
||||
# for idx, item in enumerate(completion_response.candidates):
|
||||
# if len(item.content.parts) > 0:
|
||||
# message_obj = Message(content=item.content.parts[0].text)
|
||||
# else:
|
||||
# message_obj = Message(content=None)
|
||||
# choice_obj = Choices(index=idx, message=message_obj)
|
||||
# choices_list.append(choice_obj)
|
||||
# model_response["choices"] = choices_list
|
||||
# except Exception as e:
|
||||
# verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
|
||||
# verbose_logger.debug(traceback.format_exc())
|
||||
# raise GeminiError(
|
||||
# message=traceback.format_exc(), status_code=response.status_code
|
||||
# )
|
||||
|
||||
try:
|
||||
completion_response = model_response["choices"][0]["message"].get("content")
|
||||
if completion_response is None:
|
||||
raise Exception
|
||||
except:
|
||||
original_response = f"response: {response}"
|
||||
if hasattr(response, "candidates"):
|
||||
original_response = f"response: {response.candidates}"
|
||||
if "SAFETY" in original_response:
|
||||
original_response += (
|
||||
"\nThe candidate content was flagged for safety reasons."
|
||||
)
|
||||
elif "RECITATION" in original_response:
|
||||
original_response += (
|
||||
"\nThe candidate content was flagged for recitation reasons."
|
||||
)
|
||||
raise GeminiError(
|
||||
status_code=400,
|
||||
message=f"No response received. Original response - {original_response}",
|
||||
)
|
||||
# try:
|
||||
# completion_response = model_response["choices"][0]["message"].get("content")
|
||||
# if completion_response is None:
|
||||
# raise Exception
|
||||
# except:
|
||||
# original_response = f"response: {response}"
|
||||
# if hasattr(response, "candidates"):
|
||||
# original_response = f"response: {response.candidates}"
|
||||
# if "SAFETY" in original_response:
|
||||
# original_response += (
|
||||
# "\nThe candidate content was flagged for safety reasons."
|
||||
# )
|
||||
# elif "RECITATION" in original_response:
|
||||
# original_response += (
|
||||
# "\nThe candidate content was flagged for recitation reasons."
|
||||
# )
|
||||
# raise GeminiError(
|
||||
# status_code=400,
|
||||
# message=f"No response received. Original response - {original_response}",
|
||||
# )
|
||||
|
||||
## CALCULATING USAGE
|
||||
prompt_str = ""
|
||||
for m in messages:
|
||||
if isinstance(m["content"], str):
|
||||
prompt_str += m["content"]
|
||||
elif isinstance(m["content"], list):
|
||||
for content in m["content"]:
|
||||
if content["type"] == "text":
|
||||
prompt_str += content["text"]
|
||||
prompt_tokens = len(encoding.encode(prompt_str))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
# ## CALCULATING USAGE
|
||||
# prompt_str = ""
|
||||
# for m in messages:
|
||||
# if isinstance(m["content"], str):
|
||||
# prompt_str += m["content"]
|
||||
# elif isinstance(m["content"], list):
|
||||
# for content in m["content"]:
|
||||
# if content["type"] == "text":
|
||||
# prompt_str += content["text"]
|
||||
# prompt_tokens = len(encoding.encode(prompt_str))
|
||||
# completion_tokens = len(
|
||||
# encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
# )
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "gemini/" + model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
model_response.usage = usage
|
||||
return model_response
|
||||
# model_response["created"] = int(time.time())
|
||||
# model_response["model"] = "gemini/" + model
|
||||
# usage = Usage(
|
||||
# prompt_tokens=prompt_tokens,
|
||||
# completion_tokens=completion_tokens,
|
||||
# total_tokens=prompt_tokens + completion_tokens,
|
||||
# )
|
||||
# model_response.usage = usage
|
||||
# return model_response
|
||||
|
||||
|
||||
def embedding():
|
||||
# logic for parsing in - calling - parsing out model embedding calls
|
||||
pass
|
||||
# def embedding():
|
||||
# # logic for parsing in - calling - parsing out model embedding calls
|
||||
# pass
|
||||
|
|
|
@ -1,17 +1,22 @@
|
|||
## Uses the huggingface text generation inference API
|
||||
import os, copy, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import httpx, requests
|
||||
from .base import BaseLLM
|
||||
import time
|
||||
import litellm
|
||||
from typing import Callable, Dict, List, Any, Literal, Tuple
|
||||
from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, Usage
|
||||
from typing import Optional
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.types.completion import ChatCompletionMessageToolCallParam
|
||||
import copy
|
||||
import enum
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import types
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
import litellm
|
||||
from litellm.types.completion import ChatCompletionMessageToolCallParam
|
||||
from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
|
||||
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class HuggingfaceError(Exception):
|
||||
|
@ -269,7 +274,7 @@ class Huggingface(BaseLLM):
|
|||
def convert_to_model_response_object(
|
||||
self,
|
||||
completion_response,
|
||||
model_response,
|
||||
model_response: litellm.ModelResponse,
|
||||
task: hf_tasks,
|
||||
optional_params,
|
||||
encoding,
|
||||
|
@ -278,11 +283,9 @@ class Huggingface(BaseLLM):
|
|||
):
|
||||
if task == "conversational":
|
||||
if len(completion_response["generated_text"]) > 0: # type: ignore
|
||||
model_response["choices"][0]["message"][
|
||||
"content"
|
||||
] = completion_response[
|
||||
model_response.choices[0].message.content = completion_response[ # type: ignore
|
||||
"generated_text"
|
||||
] # type: ignore
|
||||
]
|
||||
elif task == "text-generation-inference":
|
||||
if (
|
||||
not isinstance(completion_response, list)
|
||||
|
@ -295,7 +298,7 @@ class Huggingface(BaseLLM):
|
|||
)
|
||||
|
||||
if len(completion_response[0]["generated_text"]) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = output_parser(
|
||||
model_response.choices[0].message.content = output_parser( # type: ignore
|
||||
completion_response[0]["generated_text"]
|
||||
)
|
||||
## GETTING LOGPROBS + FINISH REASON
|
||||
|
@ -310,7 +313,7 @@ class Huggingface(BaseLLM):
|
|||
for token in completion_response[0]["details"]["tokens"]:
|
||||
if token["logprob"] != None:
|
||||
sum_logprob += token["logprob"]
|
||||
model_response["choices"][0]["message"]._logprob = sum_logprob
|
||||
setattr(model_response.choices[0].message, "_logprob", sum_logprob) # type: ignore
|
||||
if "best_of" in optional_params and optional_params["best_of"] > 1:
|
||||
if (
|
||||
"details" in completion_response[0]
|
||||
|
@ -337,14 +340,14 @@ class Huggingface(BaseLLM):
|
|||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"].extend(choices_list)
|
||||
model_response.choices.extend(choices_list)
|
||||
elif task == "text-classification":
|
||||
model_response["choices"][0]["message"]["content"] = json.dumps(
|
||||
model_response.choices[0].message.content = json.dumps( # type: ignore
|
||||
completion_response
|
||||
)
|
||||
else:
|
||||
if len(completion_response[0]["generated_text"]) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = output_parser(
|
||||
model_response.choices[0].message.content = output_parser( # type: ignore
|
||||
completion_response[0]["generated_text"]
|
||||
)
|
||||
## CALCULATING USAGE
|
||||
|
@ -371,14 +374,14 @@ class Huggingface(BaseLLM):
|
|||
else:
|
||||
completion_tokens = 0
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
model_response.usage = usage
|
||||
setattr(model_response, "usage", usage)
|
||||
model_response._hidden_params["original_response"] = completion_response
|
||||
return model_response
|
||||
|
||||
|
@ -763,10 +766,10 @@ class Huggingface(BaseLLM):
|
|||
self,
|
||||
model: str,
|
||||
input: list,
|
||||
model_response: litellm.EmbeddingResponse,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
logging_obj=None,
|
||||
model_response=None,
|
||||
encoding=None,
|
||||
):
|
||||
super().embedding()
|
||||
|
@ -867,15 +870,21 @@ class Huggingface(BaseLLM):
|
|||
], # flatten list returned from hf
|
||||
}
|
||||
)
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = output_data
|
||||
model_response["model"] = model
|
||||
model_response.object = "list"
|
||||
model_response.data = output_data
|
||||
model_response.model = model
|
||||
input_tokens = 0
|
||||
for text in input:
|
||||
input_tokens += len(encoding.encode(text))
|
||||
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": input_tokens,
|
||||
"total_tokens": input_tokens,
|
||||
}
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
litellm.Usage(
|
||||
**{
|
||||
"prompt_tokens": input_tokens,
|
||||
"total_tokens": input_tokens,
|
||||
}
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
import os, types
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
import types
|
||||
from enum import Enum
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import requests # type: ignore
|
||||
import time, traceback
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
|
||||
import litellm
|
||||
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||
|
||||
|
||||
class MaritalkError(Exception):
|
||||
|
@ -152,9 +156,9 @@ def completion(
|
|||
else:
|
||||
try:
|
||||
if len(completion_response["answer"]) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = (
|
||||
completion_response["answer"]
|
||||
)
|
||||
model_response.choices[0].message.content = completion_response[ # type: ignore
|
||||
"answer"
|
||||
]
|
||||
except Exception as e:
|
||||
raise MaritalkError(
|
||||
message=response.text, status_code=response.status_code
|
||||
|
@ -167,8 +171,8 @@ def completion(
|
|||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import os
|
||||
import time
|
||||
import types
|
||||
from enum import Enum
|
||||
from typing import Callable, Optional
|
||||
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
|
||||
|
@ -185,7 +188,7 @@ def completion(
|
|||
else:
|
||||
try:
|
||||
if len(completion_response["generated_text"]) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = (
|
||||
model_response.choices[0].message.content = ( # type: ignore
|
||||
completion_response["generated_text"]
|
||||
)
|
||||
except:
|
||||
|
@ -198,8 +201,8 @@ def completion(
|
|||
prompt_tokens = completion_response["nb_input_tokens"]
|
||||
completion_tokens = completion_response["nb_generated_tokens"]
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,13 +1,21 @@
|
|||
from itertools import chain
|
||||
import requests, types, time # type: ignore
|
||||
import json, uuid
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
import traceback
|
||||
from typing import Optional, List
|
||||
import types
|
||||
import uuid
|
||||
from itertools import chain
|
||||
from typing import List, Optional
|
||||
|
||||
import aiohttp
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.types.utils import ProviderField
|
||||
import httpx, aiohttp, asyncio # type: ignore
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm import verbose_logger
|
||||
from litellm.types.utils import ProviderField
|
||||
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class OllamaError(Exception):
|
||||
|
@ -138,7 +146,6 @@ class OllamaConfig:
|
|||
)
|
||||
]
|
||||
|
||||
|
||||
def get_supported_openai_params(
|
||||
self,
|
||||
):
|
||||
|
@ -157,7 +164,8 @@ class OllamaConfig:
|
|||
# ollama wants plain base64 jpeg/png files as images. strip any leading dataURI
|
||||
# and convert to jpeg if necessary.
|
||||
def _convert_image(image):
|
||||
import base64, io
|
||||
import base64
|
||||
import io
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
|
@ -183,13 +191,13 @@ def _convert_image(image):
|
|||
|
||||
# ollama implementation
|
||||
def get_ollama_response(
|
||||
model_response: litellm.ModelResponse,
|
||||
api_base="http://localhost:11434",
|
||||
model="llama2",
|
||||
prompt="Why is the sky blue?",
|
||||
optional_params=None,
|
||||
logging_obj=None,
|
||||
acompletion: bool = False,
|
||||
model_response=None,
|
||||
encoding=None,
|
||||
):
|
||||
if api_base.endswith("/api/generate"):
|
||||
|
@ -271,7 +279,7 @@ def get_ollama_response(
|
|||
response_json = response.json()
|
||||
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["finish_reason"] = "stop"
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
if data.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["response"])
|
||||
message = litellm.Message(
|
||||
|
@ -287,20 +295,24 @@ def get_ollama_response(
|
|||
}
|
||||
],
|
||||
)
|
||||
model_response["choices"][0]["message"] = message
|
||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||
model_response.choices[0].message = message # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
else:
|
||||
model_response["choices"][0]["message"]["content"] = response_json["response"]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + model
|
||||
model_response.choices[0].message.content = response_json["response"] # type: ignore
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = "ollama/" + model
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count", len(response_json.get("message", dict()).get("content", ""))
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
@ -346,8 +358,8 @@ def ollama_completion_stream(url, data, logging_obj):
|
|||
],
|
||||
)
|
||||
model_response = first_chunk
|
||||
model_response["choices"][0]["delta"] = delta
|
||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||
model_response.choices[0].delta = delta # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
yield model_response
|
||||
else:
|
||||
for transformed_chunk in streamwrapper:
|
||||
|
@ -401,8 +413,8 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
|||
],
|
||||
)
|
||||
model_response = first_chunk
|
||||
model_response["choices"][0]["delta"] = delta
|
||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||
model_response.choices[0].delta = delta # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
yield model_response
|
||||
else:
|
||||
async for transformed_chunk in streamwrapper:
|
||||
|
@ -418,7 +430,9 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
|||
raise e
|
||||
|
||||
|
||||
async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
||||
async def ollama_acompletion(
|
||||
url, data, model_response: litellm.ModelResponse, encoding, logging_obj
|
||||
):
|
||||
data["stream"] = False
|
||||
try:
|
||||
timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes
|
||||
|
@ -442,7 +456,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
|
||||
response_json = await resp.json()
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["finish_reason"] = "stop"
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
if data.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["response"])
|
||||
message = litellm.Message(
|
||||
|
@ -451,30 +465,34 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {
|
||||
"name": function_call.get("name", function_call.get("function", None)),
|
||||
"name": function_call.get(
|
||||
"name", function_call.get("function", None)
|
||||
),
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
)
|
||||
model_response["choices"][0]["message"] = message
|
||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||
model_response.choices[0].message = message # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
else:
|
||||
model_response["choices"][0]["message"]["content"] = response_json[
|
||||
"response"
|
||||
]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + data["model"]
|
||||
model_response.choices[0].message.content = response_json["response"] # type: ignore
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = "ollama/" + data["model"]
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count",
|
||||
len(response_json.get("message", dict()).get("content", "")),
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
except Exception as e:
|
||||
|
@ -491,9 +509,9 @@ async def ollama_aembeddings(
|
|||
api_base: str,
|
||||
model: str,
|
||||
prompts: list,
|
||||
model_response: litellm.EmbeddingResponse,
|
||||
optional_params=None,
|
||||
logging_obj=None,
|
||||
model_response=None,
|
||||
encoding=None,
|
||||
):
|
||||
if api_base.endswith("/api/embeddings"):
|
||||
|
@ -554,13 +572,19 @@ async def ollama_aembeddings(
|
|||
input_tokens = len(encoding.encode(prompt))
|
||||
total_input_tokens += input_tokens
|
||||
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = output_data
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": total_input_tokens,
|
||||
"total_tokens": total_input_tokens,
|
||||
}
|
||||
model_response.object = "list"
|
||||
model_response.data = output_data
|
||||
model_response.model = model
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
litellm.Usage(
|
||||
**{
|
||||
"prompt_tokens": total_input_tokens,
|
||||
"total_tokens": total_input_tokens,
|
||||
}
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
from itertools import chain
|
||||
import requests
|
||||
import types
|
||||
import time
|
||||
import json
|
||||
import uuid
|
||||
import time
|
||||
import traceback
|
||||
import types
|
||||
import uuid
|
||||
from itertools import chain
|
||||
from typing import Optional
|
||||
from litellm import verbose_logger
|
||||
import litellm
|
||||
import httpx
|
||||
|
||||
import aiohttp
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
|
||||
|
||||
class OllamaError(Exception):
|
||||
|
@ -195,6 +197,7 @@ class OllamaChatConfig:
|
|||
|
||||
# ollama implementation
|
||||
def get_ollama_response(
|
||||
model_response: litellm.ModelResponse,
|
||||
api_base="http://localhost:11434",
|
||||
api_key: Optional[str] = None,
|
||||
model="llama2",
|
||||
|
@ -202,7 +205,6 @@ def get_ollama_response(
|
|||
optional_params=None,
|
||||
logging_obj=None,
|
||||
acompletion: bool = False,
|
||||
model_response=None,
|
||||
encoding=None,
|
||||
):
|
||||
if api_base.endswith("/api/chat"):
|
||||
|
@ -295,7 +297,7 @@ def get_ollama_response(
|
|||
response_json = response.json()
|
||||
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["finish_reason"] = "stop"
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
if data.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["message"]["content"])
|
||||
message = litellm.Message(
|
||||
|
@ -311,22 +313,24 @@ def get_ollama_response(
|
|||
}
|
||||
],
|
||||
)
|
||||
model_response["choices"][0]["message"] = message
|
||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||
model_response.choices[0].message = message # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
else:
|
||||
model_response["choices"][0]["message"]["content"] = response_json["message"][
|
||||
"content"
|
||||
]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + model
|
||||
model_response.choices[0].message.content = response_json["message"]["content"] # type: ignore
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = "ollama/" + model
|
||||
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count", litellm.token_counter(text=response_json["message"]["content"])
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
|
||||
|
@ -379,8 +383,8 @@ def ollama_completion_stream(url, api_key, data, logging_obj):
|
|||
],
|
||||
)
|
||||
model_response = first_chunk
|
||||
model_response["choices"][0]["delta"] = delta
|
||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||
model_response.choices[0].delta = delta # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
yield model_response
|
||||
else:
|
||||
for transformed_chunk in streamwrapper:
|
||||
|
@ -434,7 +438,9 @@ async def ollama_async_streaming(
|
|||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {
|
||||
"name": function_call.get("name", function_call.get("function", None)),
|
||||
"name": function_call.get(
|
||||
"name", function_call.get("function", None)
|
||||
),
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
|
@ -442,8 +448,8 @@ async def ollama_async_streaming(
|
|||
],
|
||||
)
|
||||
model_response = first_chunk
|
||||
model_response["choices"][0]["delta"] = delta
|
||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||
model_response.choices[0].delta = delta # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
yield model_response
|
||||
else:
|
||||
async for transformed_chunk in streamwrapper:
|
||||
|
@ -457,7 +463,7 @@ async def ollama_acompletion(
|
|||
url,
|
||||
api_key: Optional[str],
|
||||
data,
|
||||
model_response,
|
||||
model_response: litellm.ModelResponse,
|
||||
encoding,
|
||||
logging_obj,
|
||||
function_name,
|
||||
|
@ -492,7 +498,7 @@ async def ollama_acompletion(
|
|||
)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["finish_reason"] = "stop"
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
if data.get("format", "") == "json":
|
||||
function_call = json.loads(response_json["message"]["content"])
|
||||
message = litellm.Message(
|
||||
|
@ -510,15 +516,17 @@ async def ollama_acompletion(
|
|||
}
|
||||
],
|
||||
)
|
||||
model_response["choices"][0]["message"] = message
|
||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||
model_response.choices[0].message = message # type: ignore
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
else:
|
||||
model_response["choices"][0]["message"]["content"] = response_json[
|
||||
model_response.choices[0].message.content = response_json[ # type: ignore
|
||||
"message"
|
||||
]["content"]
|
||||
][
|
||||
"content"
|
||||
]
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama_chat/" + data["model"]
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = "ollama_chat/" + data["model"]
|
||||
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count",
|
||||
|
@ -526,10 +534,14 @@ async def ollama_acompletion(
|
|||
text=response_json["message"]["content"], count_response_tokens=True
|
||||
),
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
),
|
||||
)
|
||||
return model_response
|
||||
except Exception as e:
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
import os
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import os
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
import requests # type: ignore
|
||||
|
||||
from litellm.utils import EmbeddingResponse, ModelResponse, Usage
|
||||
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class OobaboogaError(Exception):
|
||||
|
@ -99,17 +102,15 @@ def completion(
|
|||
)
|
||||
else:
|
||||
try:
|
||||
model_response["choices"][0]["message"]["content"] = (
|
||||
completion_response["choices"][0]["message"]["content"]
|
||||
)
|
||||
model_response.choices[0].message.content = completion_response["choices"][0]["message"]["content"] # type: ignore
|
||||
except:
|
||||
raise OobaboogaError(
|
||||
message=json.dumps(completion_response),
|
||||
status_code=response.status_code,
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=completion_response["usage"]["prompt_tokens"],
|
||||
completion_tokens=completion_response["usage"]["completion_tokens"],
|
||||
|
@ -122,10 +123,10 @@ def completion(
|
|||
def embedding(
|
||||
model: str,
|
||||
input: list,
|
||||
model_response: EmbeddingResponse,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
logging_obj=None,
|
||||
model_response=None,
|
||||
optional_params=None,
|
||||
encoding=None,
|
||||
):
|
||||
|
@ -166,7 +167,7 @@ def embedding(
|
|||
)
|
||||
|
||||
# Process response data
|
||||
model_response["data"] = [
|
||||
model_response.data = [
|
||||
{
|
||||
"embedding": completion_response["data"][0]["embedding"],
|
||||
"index": 0,
|
||||
|
@ -176,8 +177,12 @@ def embedding(
|
|||
|
||||
num_tokens = len(completion_response["data"][0]["embedding"])
|
||||
# Adding metadata to response
|
||||
model_response.usage = Usage(prompt_tokens=num_tokens, total_tokens=num_tokens)
|
||||
model_response["object"] = "list"
|
||||
model_response["model"] = model
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
Usage(prompt_tokens=num_tokens, total_tokens=num_tokens),
|
||||
)
|
||||
model_response.object = "list"
|
||||
model_response.model = model
|
||||
|
||||
return model_response
|
||||
|
|
|
@ -18,6 +18,7 @@ import httpx
|
|||
import openai
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
from openai.types.beta.assistant_deleted import AssistantDeleted
|
||||
from openai.types.file_deleted import FileDeleted
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import overload, override
|
||||
|
||||
|
@ -2064,6 +2065,151 @@ class OpenAIFilesAPI(BaseLLM):
|
|||
|
||||
return response
|
||||
|
||||
async def aretrieve_file(
|
||||
self,
|
||||
file_id: str,
|
||||
openai_client: AsyncOpenAI,
|
||||
) -> FileObject:
|
||||
response = await openai_client.files.retrieve(file_id=file_id)
|
||||
return response
|
||||
|
||||
def retrieve_file(
|
||||
self,
|
||||
_is_async: bool,
|
||||
file_id: str,
|
||||
api_base: str,
|
||||
api_key: Optional[str],
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
max_retries: Optional[int],
|
||||
organization: Optional[str],
|
||||
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||
):
|
||||
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
client=client,
|
||||
_is_async=_is_async,
|
||||
)
|
||||
if openai_client is None:
|
||||
raise ValueError(
|
||||
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
|
||||
)
|
||||
|
||||
if _is_async is True:
|
||||
if not isinstance(openai_client, AsyncOpenAI):
|
||||
raise ValueError(
|
||||
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
|
||||
)
|
||||
return self.aretrieve_file( # type: ignore
|
||||
file_id=file_id,
|
||||
openai_client=openai_client,
|
||||
)
|
||||
response = openai_client.files.retrieve(file_id=file_id)
|
||||
|
||||
return response
|
||||
|
||||
async def adelete_file(
|
||||
self,
|
||||
file_id: str,
|
||||
openai_client: AsyncOpenAI,
|
||||
) -> FileDeleted:
|
||||
response = await openai_client.files.delete(file_id=file_id)
|
||||
return response
|
||||
|
||||
def delete_file(
|
||||
self,
|
||||
_is_async: bool,
|
||||
file_id: str,
|
||||
api_base: str,
|
||||
api_key: Optional[str],
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
max_retries: Optional[int],
|
||||
organization: Optional[str],
|
||||
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||
):
|
||||
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
client=client,
|
||||
_is_async=_is_async,
|
||||
)
|
||||
if openai_client is None:
|
||||
raise ValueError(
|
||||
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
|
||||
)
|
||||
|
||||
if _is_async is True:
|
||||
if not isinstance(openai_client, AsyncOpenAI):
|
||||
raise ValueError(
|
||||
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
|
||||
)
|
||||
return self.adelete_file( # type: ignore
|
||||
file_id=file_id,
|
||||
openai_client=openai_client,
|
||||
)
|
||||
response = openai_client.files.delete(file_id=file_id)
|
||||
|
||||
return response
|
||||
|
||||
async def alist_files(
|
||||
self,
|
||||
openai_client: AsyncOpenAI,
|
||||
purpose: Optional[str] = None,
|
||||
):
|
||||
if isinstance(purpose, str):
|
||||
response = await openai_client.files.list(purpose=purpose)
|
||||
else:
|
||||
response = await openai_client.files.list()
|
||||
return response
|
||||
|
||||
def list_files(
|
||||
self,
|
||||
_is_async: bool,
|
||||
api_base: str,
|
||||
api_key: Optional[str],
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
max_retries: Optional[int],
|
||||
organization: Optional[str],
|
||||
purpose: Optional[str] = None,
|
||||
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||
):
|
||||
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
client=client,
|
||||
_is_async=_is_async,
|
||||
)
|
||||
if openai_client is None:
|
||||
raise ValueError(
|
||||
"OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
|
||||
)
|
||||
|
||||
if _is_async is True:
|
||||
if not isinstance(openai_client, AsyncOpenAI):
|
||||
raise ValueError(
|
||||
"OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
|
||||
)
|
||||
return self.alist_files( # type: ignore
|
||||
purpose=purpose,
|
||||
openai_client=openai_client,
|
||||
)
|
||||
|
||||
if isinstance(purpose, str):
|
||||
response = openai_client.files.list(purpose=purpose)
|
||||
else:
|
||||
response = openai_client.files.list()
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class OpenAIBatchesAPI(BaseLLM):
|
||||
"""
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
import types
|
||||
import traceback
|
||||
import copy
|
||||
import time
|
||||
import traceback
|
||||
import types
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
import litellm
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||
|
||||
|
||||
class PalmError(Exception):
|
||||
|
@ -164,7 +166,7 @@ def completion(
|
|||
message_obj = Message(content=None)
|
||||
choice_obj = Choices(index=idx + 1, message=message_obj)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"] = choices_list
|
||||
model_response.choices = choices_list # type: ignore
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
|
||||
|
@ -188,8 +190,8 @@ def completion(
|
|||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "palm/" + model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = "palm/" + model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,12 +1,16 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import os
|
||||
import time
|
||||
import types
|
||||
from enum import Enum
|
||||
from typing import Callable, Optional
|
||||
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class PetalsError(Exception):
|
||||
|
@ -151,8 +155,8 @@ def completion(
|
|||
else:
|
||||
try:
|
||||
import torch
|
||||
from transformers import AutoTokenizer
|
||||
from petals import AutoDistributedModelForCausalLM # type: ignore
|
||||
from transformers import AutoTokenizer
|
||||
except:
|
||||
raise Exception(
|
||||
"Importing torch, transformers, petals failed\nTry pip installing petals \npip install git+https://github.com/bigscience-workshop/petals"
|
||||
|
@ -189,15 +193,15 @@ def completion(
|
|||
output_text = tokenizer.decode(outputs[0])
|
||||
|
||||
if len(output_text) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = output_text
|
||||
model_response.choices[0].message.content = output_text # type: ignore
|
||||
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content"))
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -279,7 +279,7 @@ class PredibaseChatCompletion(BaseLLM):
|
|||
message=f"'generated_text' is not a key response dictionary - {completion_response}",
|
||||
)
|
||||
if len(completion_response["generated_text"]) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = self.output_parser(
|
||||
model_response.choices[0].message.content = self.output_parser( # type: ignore
|
||||
completion_response["generated_text"]
|
||||
)
|
||||
## GETTING LOGPROBS + FINISH REASON
|
||||
|
@ -294,10 +294,10 @@ class PredibaseChatCompletion(BaseLLM):
|
|||
for token in completion_response["details"]["tokens"]:
|
||||
if token["logprob"] is not None:
|
||||
sum_logprob += token["logprob"]
|
||||
model_response["choices"][0][
|
||||
"message"
|
||||
]._logprob = (
|
||||
sum_logprob # [TODO] move this to using the actual logprobs
|
||||
setattr(
|
||||
model_response.choices[0].message, # type: ignore
|
||||
"_logprob",
|
||||
sum_logprob, # [TODO] move this to using the actual logprobs
|
||||
)
|
||||
if "best_of" in optional_params and optional_params["best_of"] > 1:
|
||||
if (
|
||||
|
@ -325,7 +325,7 @@ class PredibaseChatCompletion(BaseLLM):
|
|||
message=message_obj,
|
||||
)
|
||||
choices_list.append(choice_obj)
|
||||
model_response["choices"].extend(choices_list)
|
||||
model_response.choices.extend(choices_list)
|
||||
|
||||
## CALCULATING USAGE
|
||||
prompt_tokens = 0
|
||||
|
@ -351,8 +351,8 @@ class PredibaseChatCompletion(BaseLLM):
|
|||
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -388,7 +388,7 @@ def process_response(
|
|||
|
||||
## Building RESPONSE OBJECT
|
||||
if len(result) > 1:
|
||||
model_response["choices"][0]["message"]["content"] = result
|
||||
model_response.choices[0].message.content = result # type: ignore
|
||||
|
||||
# Calculate usage
|
||||
prompt_tokens = len(encoding.encode(prompt, disallowed_special=()))
|
||||
|
@ -398,7 +398,7 @@ def process_response(
|
|||
disallowed_special=(),
|
||||
)
|
||||
)
|
||||
model_response["model"] = "replicate/" + model
|
||||
model_response.model = "replicate/" + model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -498,7 +498,7 @@ def completion(
|
|||
## Step1: Start Prediction: gets a prediction url
|
||||
## Step2: Poll prediction url for response
|
||||
## Step2: is handled with and without streaming
|
||||
model_response["created"] = int(
|
||||
model_response.created = int(
|
||||
time.time()
|
||||
) # for pricing this must remain right before calling api
|
||||
|
||||
|
|
|
@ -1,16 +1,21 @@
|
|||
import os, types, traceback
|
||||
from enum import Enum
|
||||
import json
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, Any
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
import httpx # type: ignore
|
||||
import io
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import types
|
||||
from copy import deepcopy
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.utils import EmbeddingResponse, ModelResponse, Usage, get_secret
|
||||
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class SagemakerError(Exception):
|
||||
|
@ -377,7 +382,7 @@ def completion(
|
|||
if completion_output.startswith(prompt) and "<s>" in prompt:
|
||||
completion_output = completion_output.replace(prompt, "", 1)
|
||||
|
||||
model_response["choices"][0]["message"]["content"] = completion_output
|
||||
model_response.choices[0].message.content = completion_output # type: ignore
|
||||
except:
|
||||
raise SagemakerError(
|
||||
message=f"LiteLLM Error: Unable to parse sagemaker RAW RESPONSE {json.dumps(completion_response)}",
|
||||
|
@ -390,8 +395,8 @@ def completion(
|
|||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -597,7 +602,7 @@ async def async_completion(
|
|||
if completion_output.startswith(data["inputs"]) and "<s>" in data["inputs"]:
|
||||
completion_output = completion_output.replace(data["inputs"], "", 1)
|
||||
|
||||
model_response["choices"][0]["message"]["content"] = completion_output
|
||||
model_response.choices[0].message.content = completion_output # type: ignore
|
||||
except:
|
||||
raise SagemakerError(
|
||||
message=f"LiteLLM Error: Unable to parse sagemaker RAW RESPONSE {json.dumps(completion_response)}",
|
||||
|
@ -610,8 +615,8 @@ async def async_completion(
|
|||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -741,16 +746,20 @@ def embedding(
|
|||
{"object": "embedding", "index": idx, "embedding": embedding}
|
||||
)
|
||||
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = output_data
|
||||
model_response["model"] = model
|
||||
model_response.object = "list"
|
||||
model_response.data = output_data
|
||||
model_response.model = model
|
||||
|
||||
input_tokens = 0
|
||||
for text in input:
|
||||
input_tokens += len(encoding.encode(text))
|
||||
|
||||
model_response["usage"] = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
setattr(
|
||||
model_response,
|
||||
"usage",
|
||||
Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
),
|
||||
)
|
||||
|
||||
return model_response
|
||||
|
|
|
@ -3,16 +3,20 @@ Deprecated. We now do together ai calls via the openai client.
|
|||
Reference: https://docs.together.ai/docs/openai-api-compatibility
|
||||
"""
|
||||
|
||||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests # type: ignore
|
||||
import os
|
||||
import time
|
||||
import types
|
||||
from enum import Enum
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class TogetherAIError(Exception):
|
||||
|
@ -91,145 +95,145 @@ class TogetherAIConfig:
|
|||
}
|
||||
|
||||
|
||||
def validate_environment(api_key):
|
||||
if api_key is None:
|
||||
raise ValueError(
|
||||
"Missing TogetherAI API Key - A call is being made to together_ai but no key is set either in the environment variables or via params"
|
||||
)
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
"Authorization": "Bearer " + api_key,
|
||||
}
|
||||
return headers
|
||||
# def validate_environment(api_key):
|
||||
# if api_key is None:
|
||||
# raise ValueError(
|
||||
# "Missing TogetherAI API Key - A call is being made to together_ai but no key is set either in the environment variables or via params"
|
||||
# )
|
||||
# headers = {
|
||||
# "accept": "application/json",
|
||||
# "content-type": "application/json",
|
||||
# "Authorization": "Bearer " + api_key,
|
||||
# }
|
||||
# return headers
|
||||
|
||||
|
||||
def completion(
|
||||
model: str,
|
||||
messages: list,
|
||||
api_base: str,
|
||||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
api_key,
|
||||
logging_obj,
|
||||
custom_prompt_dict={},
|
||||
optional_params=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
):
|
||||
headers = validate_environment(api_key)
|
||||
# def completion(
|
||||
# model: str,
|
||||
# messages: list,
|
||||
# api_base: str,
|
||||
# model_response: ModelResponse,
|
||||
# print_verbose: Callable,
|
||||
# encoding,
|
||||
# api_key,
|
||||
# logging_obj,
|
||||
# custom_prompt_dict={},
|
||||
# optional_params=None,
|
||||
# litellm_params=None,
|
||||
# logger_fn=None,
|
||||
# ):
|
||||
# headers = validate_environment(api_key)
|
||||
|
||||
## Load Config
|
||||
config = litellm.TogetherAIConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if (
|
||||
k not in optional_params
|
||||
): # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
optional_params[k] = v
|
||||
# ## Load Config
|
||||
# config = litellm.TogetherAIConfig.get_config()
|
||||
# for k, v in config.items():
|
||||
# if (
|
||||
# k not in optional_params
|
||||
# ): # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
# optional_params[k] = v
|
||||
|
||||
print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}")
|
||||
if model in custom_prompt_dict:
|
||||
# check if the model has a registered custom prompt
|
||||
model_prompt_details = custom_prompt_dict[model]
|
||||
prompt = custom_prompt(
|
||||
role_dict=model_prompt_details.get("roles", {}),
|
||||
initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
|
||||
final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
|
||||
bos_token=model_prompt_details.get("bos_token", ""),
|
||||
eos_token=model_prompt_details.get("eos_token", ""),
|
||||
messages=messages,
|
||||
)
|
||||
else:
|
||||
prompt = prompt_factory(
|
||||
model=model,
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
custom_llm_provider="together_ai",
|
||||
) # api key required to query together ai model list
|
||||
# print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}")
|
||||
# if model in custom_prompt_dict:
|
||||
# # check if the model has a registered custom prompt
|
||||
# model_prompt_details = custom_prompt_dict[model]
|
||||
# prompt = custom_prompt(
|
||||
# role_dict=model_prompt_details.get("roles", {}),
|
||||
# initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
|
||||
# final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
|
||||
# bos_token=model_prompt_details.get("bos_token", ""),
|
||||
# eos_token=model_prompt_details.get("eos_token", ""),
|
||||
# messages=messages,
|
||||
# )
|
||||
# else:
|
||||
# prompt = prompt_factory(
|
||||
# model=model,
|
||||
# messages=messages,
|
||||
# api_key=api_key,
|
||||
# custom_llm_provider="together_ai",
|
||||
# ) # api key required to query together ai model list
|
||||
|
||||
data = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"request_type": "language-model-inference",
|
||||
**optional_params,
|
||||
}
|
||||
# data = {
|
||||
# "model": model,
|
||||
# "prompt": prompt,
|
||||
# "request_type": "language-model-inference",
|
||||
# **optional_params,
|
||||
# }
|
||||
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=api_key,
|
||||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
"headers": headers,
|
||||
"api_base": api_base,
|
||||
},
|
||||
)
|
||||
## COMPLETION CALL
|
||||
if "stream_tokens" in optional_params and optional_params["stream_tokens"] == True:
|
||||
response = requests.post(
|
||||
api_base,
|
||||
headers=headers,
|
||||
data=json.dumps(data),
|
||||
stream=optional_params["stream_tokens"],
|
||||
)
|
||||
return response.iter_lines()
|
||||
else:
|
||||
response = requests.post(api_base, headers=headers, data=json.dumps(data))
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt,
|
||||
api_key=api_key,
|
||||
original_response=response.text,
|
||||
additional_args={"complete_input_dict": data},
|
||||
)
|
||||
print_verbose(f"raw model_response: {response.text}")
|
||||
## RESPONSE OBJECT
|
||||
if response.status_code != 200:
|
||||
raise TogetherAIError(
|
||||
status_code=response.status_code, message=response.text
|
||||
)
|
||||
completion_response = response.json()
|
||||
# ## LOGGING
|
||||
# logging_obj.pre_call(
|
||||
# input=prompt,
|
||||
# api_key=api_key,
|
||||
# additional_args={
|
||||
# "complete_input_dict": data,
|
||||
# "headers": headers,
|
||||
# "api_base": api_base,
|
||||
# },
|
||||
# )
|
||||
# ## COMPLETION CALL
|
||||
# if "stream_tokens" in optional_params and optional_params["stream_tokens"] == True:
|
||||
# response = requests.post(
|
||||
# api_base,
|
||||
# headers=headers,
|
||||
# data=json.dumps(data),
|
||||
# stream=optional_params["stream_tokens"],
|
||||
# )
|
||||
# return response.iter_lines()
|
||||
# else:
|
||||
# response = requests.post(api_base, headers=headers, data=json.dumps(data))
|
||||
# ## LOGGING
|
||||
# logging_obj.post_call(
|
||||
# input=prompt,
|
||||
# api_key=api_key,
|
||||
# original_response=response.text,
|
||||
# additional_args={"complete_input_dict": data},
|
||||
# )
|
||||
# print_verbose(f"raw model_response: {response.text}")
|
||||
# ## RESPONSE OBJECT
|
||||
# if response.status_code != 200:
|
||||
# raise TogetherAIError(
|
||||
# status_code=response.status_code, message=response.text
|
||||
# )
|
||||
# completion_response = response.json()
|
||||
|
||||
if "error" in completion_response:
|
||||
raise TogetherAIError(
|
||||
message=json.dumps(completion_response),
|
||||
status_code=response.status_code,
|
||||
)
|
||||
elif "error" in completion_response["output"]:
|
||||
raise TogetherAIError(
|
||||
message=json.dumps(completion_response["output"]),
|
||||
status_code=response.status_code,
|
||||
)
|
||||
# if "error" in completion_response:
|
||||
# raise TogetherAIError(
|
||||
# message=json.dumps(completion_response),
|
||||
# status_code=response.status_code,
|
||||
# )
|
||||
# elif "error" in completion_response["output"]:
|
||||
# raise TogetherAIError(
|
||||
# message=json.dumps(completion_response["output"]),
|
||||
# status_code=response.status_code,
|
||||
# )
|
||||
|
||||
if len(completion_response["output"]["choices"][0]["text"]) >= 0:
|
||||
model_response["choices"][0]["message"]["content"] = completion_response[
|
||||
"output"
|
||||
]["choices"][0]["text"]
|
||||
# if len(completion_response["output"]["choices"][0]["text"]) >= 0:
|
||||
# model_response.choices[0].message.content = completion_response["output"][
|
||||
# "choices"
|
||||
# ][0]["text"]
|
||||
|
||||
## CALCULATING USAGE
|
||||
print_verbose(
|
||||
f"CALCULATING TOGETHERAI TOKEN USAGE. Model Response: {model_response}; model_response['choices'][0]['message'].get('content', ''): {model_response['choices'][0]['message'].get('content', None)}"
|
||||
)
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
)
|
||||
if "finish_reason" in completion_response["output"]["choices"][0]:
|
||||
model_response.choices[0].finish_reason = completion_response["output"][
|
||||
"choices"
|
||||
][0]["finish_reason"]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "together_ai/" + model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
setattr(model_response, "usage", usage)
|
||||
return model_response
|
||||
# ## CALCULATING USAGE
|
||||
# print_verbose(
|
||||
# f"CALCULATING TOGETHERAI TOKEN USAGE. Model Response: {model_response}; model_response['choices'][0]['message'].get('content', ''): {model_response['choices'][0]['message'].get('content', None)}"
|
||||
# )
|
||||
# prompt_tokens = len(encoding.encode(prompt))
|
||||
# completion_tokens = len(
|
||||
# encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||
# )
|
||||
# if "finish_reason" in completion_response["output"]["choices"][0]:
|
||||
# model_response.choices[0].finish_reason = completion_response["output"][
|
||||
# "choices"
|
||||
# ][0]["finish_reason"]
|
||||
# model_response["created"] = int(time.time())
|
||||
# model_response["model"] = "together_ai/" + model
|
||||
# usage = Usage(
|
||||
# prompt_tokens=prompt_tokens,
|
||||
# completion_tokens=completion_tokens,
|
||||
# total_tokens=prompt_tokens + completion_tokens,
|
||||
# )
|
||||
# setattr(model_response, "usage", usage)
|
||||
# return model_response
|
||||
|
||||
|
||||
def embedding():
|
||||
# logic for parsing in - calling - parsing out model embedding calls
|
||||
pass
|
||||
# def embedding():
|
||||
# # logic for parsing in - calling - parsing out model embedding calls
|
||||
# pass
|
||||
|
|
|
@ -852,16 +852,14 @@ def completion(
|
|||
|
||||
## RESPONSE OBJECT
|
||||
if isinstance(completion_response, litellm.Message):
|
||||
model_response["choices"][0]["message"] = completion_response
|
||||
model_response.choices[0].message = completion_response # type: ignore
|
||||
elif len(str(completion_response)) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = str(
|
||||
completion_response
|
||||
)
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.choices[0].message.content = str(completion_response) # type: ignore
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
## CALCULATING USAGE
|
||||
if model in litellm.vertex_language_models and response_obj is not None:
|
||||
model_response["choices"][0].finish_reason = map_finish_reason(
|
||||
model_response.choices[0].finish_reason = map_finish_reason(
|
||||
response_obj.candidates[0].finish_reason.name
|
||||
)
|
||||
usage = Usage(
|
||||
|
@ -912,7 +910,7 @@ async def async_completion(
|
|||
request_str: str,
|
||||
print_verbose: Callable,
|
||||
logging_obj,
|
||||
encoding=None,
|
||||
encoding,
|
||||
client_options=None,
|
||||
instances=None,
|
||||
vertex_project=None,
|
||||
|
@ -1088,16 +1086,16 @@ async def async_completion(
|
|||
|
||||
## RESPONSE OBJECT
|
||||
if isinstance(completion_response, litellm.Message):
|
||||
model_response["choices"][0]["message"] = completion_response
|
||||
model_response.choices[0].message = completion_response # type: ignore
|
||||
elif len(str(completion_response)) > 0:
|
||||
model_response["choices"][0]["message"]["content"] = str(
|
||||
model_response.choices[0].message.content = str( # type: ignore
|
||||
completion_response
|
||||
)
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
## CALCULATING USAGE
|
||||
if model in litellm.vertex_language_models and response_obj is not None:
|
||||
model_response["choices"][0].finish_reason = map_finish_reason(
|
||||
model_response.choices[0].finish_reason = map_finish_reason(
|
||||
response_obj.candidates[0].finish_reason.name
|
||||
)
|
||||
usage = Usage(
|
||||
|
@ -1377,16 +1375,16 @@ class VertexAITextEmbeddingConfig(BaseModel):
|
|||
def embedding(
|
||||
model: str,
|
||||
input: Union[list, str],
|
||||
print_verbose,
|
||||
model_response: litellm.EmbeddingResponse,
|
||||
optional_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
logging_obj=None,
|
||||
model_response=None,
|
||||
optional_params=None,
|
||||
encoding=None,
|
||||
vertex_project=None,
|
||||
vertex_location=None,
|
||||
vertex_credentials=None,
|
||||
aembedding=False,
|
||||
print_verbose=None,
|
||||
):
|
||||
# logic for parsing in - calling - parsing out model embedding calls
|
||||
try:
|
||||
|
@ -1484,15 +1482,15 @@ def embedding(
|
|||
"embedding": embedding.values,
|
||||
}
|
||||
)
|
||||
input_tokens += embedding.statistics.token_count
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens += embedding.statistics.token_count # type: ignore
|
||||
model_response.object = "list"
|
||||
model_response.data = embedding_response
|
||||
model_response.model = model
|
||||
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
model_response.usage = usage
|
||||
setattr(model_response, "usage", usage)
|
||||
|
||||
return model_response
|
||||
|
||||
|
@ -1500,8 +1498,8 @@ def embedding(
|
|||
async def async_embedding(
|
||||
model: str,
|
||||
input: Union[list, str],
|
||||
model_response: litellm.EmbeddingResponse,
|
||||
logging_obj=None,
|
||||
model_response=None,
|
||||
optional_params=None,
|
||||
encoding=None,
|
||||
client=None,
|
||||
|
@ -1541,11 +1539,11 @@ async def async_embedding(
|
|||
)
|
||||
input_tokens += embedding.statistics.token_count
|
||||
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
model_response.object = "list"
|
||||
model_response.data = embedding_response
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
model_response.usage = usage
|
||||
setattr(model_response, "usage", usage)
|
||||
return model_response
|
||||
|
|
|
@ -367,8 +367,8 @@ async def async_completion(
|
|||
prompt_tokens = message.usage.input_tokens
|
||||
completion_tokens = message.usage.output_tokens
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
import os
|
||||
import json
|
||||
import os
|
||||
import time # type: ignore
|
||||
from enum import Enum
|
||||
from typing import Any, Callable
|
||||
|
||||
import httpx
|
||||
import requests # type: ignore
|
||||
import time, httpx # type: ignore
|
||||
from typing import Callable, Any
|
||||
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
llm = None
|
||||
|
||||
|
@ -91,14 +95,14 @@ def completion(
|
|||
)
|
||||
print_verbose(f"raw model_response: {outputs}")
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = outputs[0].outputs[0].text
|
||||
model_response.choices[0].message.content = outputs[0].outputs[0].text # type: ignore
|
||||
|
||||
## CALCULATING USAGE
|
||||
prompt_tokens = len(outputs[0].prompt_token_ids)
|
||||
completion_tokens = len(outputs[0].outputs[0].token_ids)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -173,14 +177,14 @@ def batch_completions(
|
|||
for output in outputs:
|
||||
model_response = ModelResponse()
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = output.outputs[0].text
|
||||
model_response.choices[0].message.content = output.outputs[0].text # type: ignore
|
||||
|
||||
## CALCULATING USAGE
|
||||
prompt_tokens = len(output.prompt_token_ids)
|
||||
completion_tokens = len(output.outputs[0].token_ids)
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
|
@ -25,7 +25,13 @@ import requests # type: ignore
|
|||
|
||||
import litellm
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from litellm.utils import ModelResponse, Usage, get_secret
|
||||
from litellm.utils import (
|
||||
EmbeddingResponse,
|
||||
ModelResponse,
|
||||
Usage,
|
||||
get_secret,
|
||||
map_finish_reason,
|
||||
)
|
||||
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates import factory as ptf
|
||||
|
@ -414,14 +420,16 @@ class IBMWatsonXAI(BaseLLM):
|
|||
generated_text = json_resp["results"][0]["generated_text"]
|
||||
prompt_tokens = json_resp["results"][0]["input_token_count"]
|
||||
completion_tokens = json_resp["results"][0]["generated_token_count"]
|
||||
model_response["choices"][0]["message"]["content"] = generated_text
|
||||
model_response["finish_reason"] = json_resp["results"][0]["stop_reason"]
|
||||
model_response.choices[0].message.content = generated_text # type: ignore
|
||||
model_response.choices[0].finish_reason = map_finish_reason(
|
||||
json_resp["results"][0]["stop_reason"]
|
||||
)
|
||||
if json_resp.get("created_at"):
|
||||
model_response["created"] = datetime.fromisoformat(
|
||||
json_resp["created_at"]
|
||||
).timestamp()
|
||||
model_response.created = int(
|
||||
datetime.fromisoformat(json_resp["created_at"]).timestamp()
|
||||
)
|
||||
else:
|
||||
model_response["created"] = int(time.time())
|
||||
model_response.created = int(time.time())
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -463,7 +471,7 @@ class IBMWatsonXAI(BaseLLM):
|
|||
prompt = convert_messages_to_prompt(
|
||||
model, messages, provider, custom_prompt_dict
|
||||
)
|
||||
model_response["model"] = model
|
||||
model_response.model = model
|
||||
|
||||
def process_stream_response(
|
||||
stream_resp: Union[Iterator[str], AsyncIterator],
|
||||
|
@ -551,10 +559,10 @@ class IBMWatsonXAI(BaseLLM):
|
|||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
|
||||
def _process_embedding_response(
|
||||
self, json_resp: dict, model_response: Union[ModelResponse, None] = None
|
||||
) -> ModelResponse:
|
||||
self, json_resp: dict, model_response: Optional[EmbeddingResponse] = None
|
||||
) -> EmbeddingResponse:
|
||||
if model_response is None:
|
||||
model_response = ModelResponse(model=json_resp.get("model_id", None))
|
||||
model_response = EmbeddingResponse(model=json_resp.get("model_id", None))
|
||||
results = json_resp.get("results", [])
|
||||
embedding_response = []
|
||||
for idx, result in enumerate(results):
|
||||
|
@ -565,8 +573,8 @@ class IBMWatsonXAI(BaseLLM):
|
|||
"embedding": result["embedding"],
|
||||
}
|
||||
)
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response.object = "list"
|
||||
model_response.data = embedding_response
|
||||
input_tokens = json_resp.get("input_token_count", 0)
|
||||
setattr(
|
||||
model_response,
|
||||
|
@ -583,9 +591,9 @@ class IBMWatsonXAI(BaseLLM):
|
|||
self,
|
||||
model: str,
|
||||
input: Union[list, str],
|
||||
model_response: litellm.EmbeddingResponse,
|
||||
api_key: Optional[str] = None,
|
||||
logging_obj=None,
|
||||
model_response=None,
|
||||
optional_params=None,
|
||||
encoding=None,
|
||||
print_verbose=None,
|
||||
|
@ -602,7 +610,7 @@ class IBMWatsonXAI(BaseLLM):
|
|||
if k not in optional_params:
|
||||
optional_params[k] = v
|
||||
|
||||
model_response["model"] = model
|
||||
model_response.model = model
|
||||
|
||||
# Load auth variables from environment variables
|
||||
if isinstance(input, str):
|
||||
|
@ -635,12 +643,12 @@ class IBMWatsonXAI(BaseLLM):
|
|||
}
|
||||
request_manager = RequestManager(logging_obj)
|
||||
|
||||
def handle_embedding(request_params: dict) -> ModelResponse:
|
||||
def handle_embedding(request_params: dict) -> EmbeddingResponse:
|
||||
with request_manager.request(request_params, input=input) as resp:
|
||||
json_resp = resp.json()
|
||||
return self._process_embedding_response(json_resp, model_response)
|
||||
|
||||
async def handle_aembedding(request_params: dict) -> ModelResponse:
|
||||
async def handle_aembedding(request_params: dict) -> EmbeddingResponse:
|
||||
async with request_manager.async_request(
|
||||
request_params, input=input
|
||||
) as resp:
|
||||
|
|
147
litellm/main.py
147
litellm/main.py
|
@ -38,6 +38,7 @@ import dotenv
|
|||
import httpx
|
||||
import openai
|
||||
import tiktoken
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import overload
|
||||
|
||||
import litellm
|
||||
|
@ -48,6 +49,7 @@ from litellm import ( # type: ignore
|
|||
get_litellm_params,
|
||||
get_optional_params,
|
||||
)
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.utils import (
|
||||
CustomStreamWrapper,
|
||||
|
@ -520,7 +522,7 @@ def mock_completion(
|
|||
)
|
||||
return response
|
||||
if n is None:
|
||||
model_response["choices"][0]["message"]["content"] = mock_response
|
||||
model_response.choices[0].message.content = mock_response # type: ignore
|
||||
else:
|
||||
_all_choices = []
|
||||
for i in range(n):
|
||||
|
@ -531,12 +533,12 @@ def mock_completion(
|
|||
),
|
||||
)
|
||||
_all_choices.append(_choice)
|
||||
model_response["choices"] = _all_choices
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.choices = _all_choices # type: ignore
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
|
||||
if mock_tool_calls:
|
||||
model_response["choices"][0]["message"]["tool_calls"] = [
|
||||
model_response.choices[0].message.tool_calls = [ # type: ignore
|
||||
ChatCompletionMessageToolCall(**tool_call)
|
||||
for tool_call in mock_tool_calls
|
||||
]
|
||||
|
@ -1932,51 +1934,7 @@ def completion(
|
|||
"""
|
||||
Deprecated. We now do together ai calls via the openai client - https://docs.together.ai/docs/openai-api-compatibility
|
||||
"""
|
||||
custom_llm_provider = "together_ai"
|
||||
together_ai_key = (
|
||||
api_key
|
||||
or litellm.togetherai_api_key
|
||||
or get_secret("TOGETHER_AI_TOKEN")
|
||||
or get_secret("TOGETHERAI_API_KEY")
|
||||
or litellm.api_key
|
||||
)
|
||||
|
||||
api_base = (
|
||||
api_base
|
||||
or litellm.api_base
|
||||
or get_secret("TOGETHERAI_API_BASE")
|
||||
or "https://api.together.xyz/inference"
|
||||
)
|
||||
|
||||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||||
|
||||
model_response = together_ai.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
api_base=api_base,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
encoding=encoding,
|
||||
api_key=together_ai_key,
|
||||
logging_obj=logging,
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
)
|
||||
if (
|
||||
"stream_tokens" in optional_params
|
||||
and optional_params["stream_tokens"] == True
|
||||
):
|
||||
# don't try to access stream object,
|
||||
response = CustomStreamWrapper(
|
||||
model_response,
|
||||
model,
|
||||
custom_llm_provider="together_ai",
|
||||
logging_obj=logging,
|
||||
)
|
||||
return response
|
||||
response = model_response
|
||||
pass
|
||||
elif custom_llm_provider == "palm":
|
||||
palm_api_key = api_key or get_secret("PALM_API_KEY") or litellm.api_key
|
||||
|
||||
|
@ -2459,10 +2417,10 @@ def completion(
|
|||
|
||||
## LOGGING
|
||||
generator = ollama.get_ollama_response(
|
||||
api_base,
|
||||
model,
|
||||
prompt,
|
||||
optional_params,
|
||||
api_base=api_base,
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
optional_params=optional_params,
|
||||
logging_obj=logging,
|
||||
acompletion=acompletion,
|
||||
model_response=model_response,
|
||||
|
@ -2488,11 +2446,11 @@ def completion(
|
|||
)
|
||||
## LOGGING
|
||||
generator = ollama_chat.get_ollama_response(
|
||||
api_base,
|
||||
api_key,
|
||||
model,
|
||||
messages,
|
||||
optional_params,
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
logging_obj=logging,
|
||||
acompletion=acompletion,
|
||||
model_response=model_response,
|
||||
|
@ -2670,9 +2628,9 @@ def completion(
|
|||
"""
|
||||
string_response = response_json["data"][0]["output"][0]
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = string_response
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = model
|
||||
model_response.choices[0].message.content = string_response # type: ignore
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = model
|
||||
response = model_response
|
||||
else:
|
||||
raise ValueError(
|
||||
|
@ -3463,7 +3421,7 @@ def embedding(
|
|||
or api_base
|
||||
or get_secret("OLLAMA_API_BASE")
|
||||
or "http://localhost:11434"
|
||||
)
|
||||
) # type: ignore
|
||||
if isinstance(input, str):
|
||||
input = [input]
|
||||
if not all(isinstance(item, str) for item in input):
|
||||
|
@ -3473,9 +3431,11 @@ def embedding(
|
|||
llm_provider="ollama", # type: ignore
|
||||
)
|
||||
ollama_embeddings_fn = (
|
||||
ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
|
||||
ollama.ollama_aembeddings
|
||||
if aembedding is True
|
||||
else ollama.ollama_embeddings
|
||||
)
|
||||
response = ollama_embeddings_fn(
|
||||
response = ollama_embeddings_fn( # type: ignore
|
||||
api_base=api_base,
|
||||
model=model,
|
||||
prompts=input,
|
||||
|
@ -3943,6 +3903,63 @@ def text_completion(
|
|||
return text_completion_response
|
||||
|
||||
|
||||
###### Adapter Completion ################
|
||||
|
||||
|
||||
async def aadapter_completion(*, adapter_id: str, **kwargs) -> Optional[BaseModel]:
|
||||
"""
|
||||
Implemented to handle async calls for adapter_completion()
|
||||
"""
|
||||
try:
|
||||
translation_obj: Optional[CustomLogger] = None
|
||||
for item in litellm.adapters:
|
||||
if item["id"] == adapter_id:
|
||||
translation_obj = item["adapter"]
|
||||
|
||||
if translation_obj is None:
|
||||
raise ValueError(
|
||||
"No matching adapter given. Received 'adapter_id'={}, litellm.adapters={}".format(
|
||||
adapter_id, litellm.adapters
|
||||
)
|
||||
)
|
||||
|
||||
new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
|
||||
|
||||
response: ModelResponse = await acompletion(**new_kwargs) # type: ignore
|
||||
|
||||
translated_response = translation_obj.translate_completion_output_params(
|
||||
response=response
|
||||
)
|
||||
|
||||
return translated_response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def adapter_completion(*, adapter_id: str, **kwargs) -> Optional[BaseModel]:
|
||||
translation_obj: Optional[CustomLogger] = None
|
||||
for item in litellm.adapters:
|
||||
if item["id"] == adapter_id:
|
||||
translation_obj = item["adapter"]
|
||||
|
||||
if translation_obj is None:
|
||||
raise ValueError(
|
||||
"No matching adapter given. Received 'adapter_id'={}, litellm.adapters={}".format(
|
||||
adapter_id, litellm.adapters
|
||||
)
|
||||
)
|
||||
|
||||
new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
|
||||
|
||||
response: ModelResponse = completion(**new_kwargs) # type: ignore
|
||||
|
||||
translated_response = translation_obj.translate_completion_output_params(
|
||||
response=response
|
||||
)
|
||||
|
||||
return translated_response
|
||||
|
||||
|
||||
##### Moderation #######################
|
||||
|
||||
|
||||
|
|
1
litellm/proxy/_experimental/out/404.html
Normal file
1
litellm/proxy/_experimental/out/404.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-1c3809c50f029674.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-1c3809c50f029674.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/275ab6ee150b4fea.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-3264d0959a54279d.js\",\"931\",\"static/chunks/app/page-0cfbdaa2bf8fb022.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/275ab6ee150b4fea.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"LmgW0mreu0hjU2N9CAPDM\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-1c3809c50f029674.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-1c3809c50f029674.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/275ab6ee150b4fea.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-3264d0959a54279d.js\",\"931\",\"static/chunks/app/page-1cc1412fb406fc70.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/275ab6ee150b4fea.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"aCz2wdplG6aqWrQnod4_6\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-3264d0959a54279d.js","931","static/chunks/app/page-0cfbdaa2bf8fb022.js"],""]
|
||||
3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-3264d0959a54279d.js","931","static/chunks/app/page-1cc1412fb406fc70.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["LmgW0mreu0hjU2N9CAPDM",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/275ab6ee150b4fea.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["aCz2wdplG6aqWrQnod4_6",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/275ab6ee150b4fea.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-3264d0959a54279d.js","418","static/chunks/app/model_hub/page-6575356e2cde4d07.js"],""]
|
||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-3264d0959a54279d.js","418","static/chunks/app/model_hub/page-6575356e2cde4d07.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["LmgW0mreu0hjU2N9CAPDM",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/275ab6ee150b4fea.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["aCz2wdplG6aqWrQnod4_6",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/275ab6ee150b4fea.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -2,6 +2,6 @@
|
|||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-3264d0959a54279d.js","461","static/chunks/app/onboarding/page-c73480cdcfdbe5ac.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["LmgW0mreu0hjU2N9CAPDM",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/275ab6ee150b4fea.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["aCz2wdplG6aqWrQnod4_6",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/275ab6ee150b4fea.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,19 +1,14 @@
|
|||
model_list:
|
||||
- model_name: "*"
|
||||
- model_name: azure-ai-mistral
|
||||
litellm_params:
|
||||
model: "openai/*"
|
||||
- model_name: gemini-1.5-flash
|
||||
api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
|
||||
api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
|
||||
model: azure_ai/Mistral-large-nmefg
|
||||
- model_name: azure-ai-phi
|
||||
litellm_params:
|
||||
model: gemini/gemini-1.5-flash
|
||||
- model_name: whisper
|
||||
litellm_params:
|
||||
model: azure/azure-whisper
|
||||
api_version: 2024-02-15-preview
|
||||
api_base: os.environ/AZURE_EUROPE_API_BASE
|
||||
api_key: os.environ/AZURE_EUROPE_API_KEY
|
||||
model_info:
|
||||
mode: audio_transcription
|
||||
|
||||
api_base: os.environ/AZURE_AI_PHI_API_BASE
|
||||
api_key: os.environ/AZURE_AI_PHI_API_KEY
|
||||
model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
|
||||
|
||||
|
||||
general_settings:
|
||||
|
|
|
@ -204,6 +204,10 @@ class LiteLLMRoutes(enum.Enum):
|
|||
# files
|
||||
"/v1/files",
|
||||
"/files",
|
||||
"/v1/files/{file_id}",
|
||||
"/files/{file_id}",
|
||||
"/v1/files/{file_id}/content",
|
||||
"/files/{file_id}/content",
|
||||
# assistants-related routes
|
||||
"/assistants",
|
||||
"/v1/assistants",
|
||||
|
|
|
@ -71,6 +71,11 @@ azure_api_key_header = APIKeyHeader(
|
|||
auto_error=False,
|
||||
description="Some older versions of the openai Python package will send an API-Key header with just the API key ",
|
||||
)
|
||||
anthropic_api_key_header = APIKeyHeader(
|
||||
name="x-api-key",
|
||||
auto_error=False,
|
||||
description="If anthropic client used.",
|
||||
)
|
||||
|
||||
|
||||
def _get_bearer_token(
|
||||
|
@ -87,6 +92,9 @@ async def user_api_key_auth(
|
|||
request: Request,
|
||||
api_key: str = fastapi.Security(api_key_header),
|
||||
azure_api_key_header: str = fastapi.Security(azure_api_key_header),
|
||||
anthropic_api_key_header: Optional[str] = fastapi.Security(
|
||||
anthropic_api_key_header
|
||||
),
|
||||
) -> UserAPIKeyAuth:
|
||||
|
||||
from litellm.proxy.proxy_server import (
|
||||
|
@ -114,6 +122,9 @@ async def user_api_key_auth(
|
|||
elif isinstance(azure_api_key_header, str):
|
||||
api_key = azure_api_key_header
|
||||
|
||||
elif isinstance(anthropic_api_key_header, str):
|
||||
api_key = anthropic_api_key_header
|
||||
|
||||
parent_otel_span: Optional[Span] = None
|
||||
if open_telemetry_logger is not None:
|
||||
parent_otel_span = open_telemetry_logger.tracer.start_span(
|
||||
|
|
|
@ -25,3 +25,38 @@ if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":
|
|||
result.append(f"{stat.traceback.format()}: {stat.size / 1024} KiB")
|
||||
|
||||
return {"top_50_memory_usage": result}
|
||||
|
||||
|
||||
@router.get("/otel-spans", include_in_schema=False)
|
||||
async def get_otel_spans():
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
open_telemetry_logger: OpenTelemetry = open_telemetry_logger
|
||||
otel_exporter = open_telemetry_logger.OTEL_EXPORTER
|
||||
recorded_spans = otel_exporter.get_finished_spans()
|
||||
|
||||
print("Spans: ", recorded_spans) # noqa
|
||||
|
||||
most_recent_parent = None
|
||||
most_recent_start_time = 1000000
|
||||
spans_grouped_by_parent = {}
|
||||
for span in recorded_spans:
|
||||
if span.parent is not None:
|
||||
parent_trace_id = span.parent.trace_id
|
||||
if parent_trace_id not in spans_grouped_by_parent:
|
||||
spans_grouped_by_parent[parent_trace_id] = []
|
||||
spans_grouped_by_parent[parent_trace_id].append(span.name)
|
||||
|
||||
# check time of span
|
||||
if span.start_time > most_recent_start_time:
|
||||
most_recent_parent = parent_trace_id
|
||||
most_recent_start_time = span.start_time
|
||||
|
||||
# these are otel spans - get the span name
|
||||
span_names = [span.name for span in recorded_spans]
|
||||
return {
|
||||
"otel_spans": span_names,
|
||||
"spans_grouped_by_parent": spans_grouped_by_parent,
|
||||
"most_recent_parent": most_recent_parent,
|
||||
}
|
||||
|
|
|
@ -35,6 +35,10 @@ def initialize_callbacks_on_proxy(
|
|||
|
||||
open_telemetry_logger = OpenTelemetry()
|
||||
|
||||
# Add Otel as a service callback
|
||||
if "otel" not in litellm.service_callback:
|
||||
litellm.service_callback.append("otel")
|
||||
|
||||
imported_list.append(open_telemetry_logger)
|
||||
setattr(proxy_server, "open_telemetry_logger", open_telemetry_logger)
|
||||
elif isinstance(callback, str) and callback == "presidio":
|
||||
|
|
11
litellm/proxy/example_config_yaml/otel_test_config.yaml
Normal file
11
litellm/proxy/example_config_yaml/otel_test_config.yaml
Normal file
|
@ -0,0 +1,11 @@
|
|||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
cache: true
|
||||
callbacks: ["otel"]
|
||||
|
|
@ -406,6 +406,19 @@ async def active_callbacks():
|
|||
}
|
||||
|
||||
|
||||
def callback_name(callback):
|
||||
if isinstance(callback, str):
|
||||
return callback
|
||||
|
||||
try:
|
||||
return callback.__name__
|
||||
except AttributeError:
|
||||
try:
|
||||
return callback.__class__.__name__
|
||||
except AttributeError:
|
||||
return str(callback)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health/readiness",
|
||||
tags=["health"],
|
||||
|
@ -424,8 +437,8 @@ async def health_readiness():
|
|||
try:
|
||||
# this was returning a JSON of the values in some of the callbacks
|
||||
# all we need is the callback name, hence we do str(callback)
|
||||
success_callback_names = [str(x) for x in litellm.success_callback]
|
||||
except:
|
||||
success_callback_names = [callback_name(x) for x in litellm.success_callback]
|
||||
except AttributeError:
|
||||
# don't let this block the /health/readiness response, if we can't convert to str -> return litellm.success_callback
|
||||
success_callback_names = litellm.success_callback
|
||||
|
||||
|
|
599
litellm/proxy/openai_files_endpoints/files_endpoints.py
Normal file
599
litellm/proxy/openai_files_endpoints/files_endpoints.py
Normal file
|
@ -0,0 +1,599 @@
|
|||
######################################################################
|
||||
|
||||
# /v1/files Endpoints
|
||||
|
||||
# Equivalent of https://platform.openai.com/docs/api-reference/files
|
||||
######################################################################
|
||||
|
||||
import asyncio
|
||||
import traceback
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Optional
|
||||
|
||||
import fastapi
|
||||
import httpx
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Depends,
|
||||
File,
|
||||
Form,
|
||||
Header,
|
||||
HTTPException,
|
||||
Request,
|
||||
Response,
|
||||
UploadFile,
|
||||
status,
|
||||
)
|
||||
|
||||
import litellm
|
||||
from litellm import CreateFileRequest, FileContentRequest
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.batches.main import FileObject
|
||||
from litellm.proxy._types import *
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v1/files",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
@router.post(
|
||||
"/files",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
async def create_file(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
purpose: str = Form(...),
|
||||
file: UploadFile = File(...),
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Upload a file that can be used across - Assistants API, Batch API
|
||||
This is the equivalent of POST https://api.openai.com/v1/files
|
||||
|
||||
Supports Identical Params as: https://platform.openai.com/docs/api-reference/files/create
|
||||
|
||||
Example Curl
|
||||
```
|
||||
curl http://localhost:4000/v1/files \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-F purpose="batch" \
|
||||
-F file="@mydata.jsonl"
|
||||
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
add_litellm_data_to_request,
|
||||
general_settings,
|
||||
get_custom_headers,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
version,
|
||||
)
|
||||
|
||||
data: Dict = {}
|
||||
try:
|
||||
# Use orjson to parse JSON data, orjson speeds up requests significantly
|
||||
# Read the file content
|
||||
file_content = await file.read()
|
||||
# Prepare the data for forwarding
|
||||
|
||||
data = {"purpose": purpose}
|
||||
|
||||
# Include original request and headers in the data
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
# Prepare the file data according to FileTypes
|
||||
file_data = (file.filename, file_content, file.content_type)
|
||||
|
||||
_create_file_request = CreateFileRequest(file=file_data, **data)
|
||||
|
||||
# for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch
|
||||
response = await litellm.acreate_file(
|
||||
custom_llm_provider="openai", **_create_file_request
|
||||
)
|
||||
|
||||
### ALERTING ###
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.update_request_status(
|
||||
litellm_call_id=data.get("litellm_call_id", ""), status="success"
|
||||
)
|
||||
)
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
version=version,
|
||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||
)
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.create_file(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e.detail)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
else:
|
||||
error_msg = f"{str(e)}"
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/v1/files/{file_id:path}",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
@router.get(
|
||||
"/files/{file_id:path}",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
async def get_file(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
file_id: str,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Returns information about a specific file. that can be used across - Assistants API, Batch API
|
||||
This is the equivalent of GET https://api.openai.com/v1/files/{file_id}
|
||||
|
||||
Supports Identical Params as: https://platform.openai.com/docs/api-reference/files/retrieve
|
||||
|
||||
Example Curl
|
||||
```
|
||||
curl http://localhost:4000/v1/files/file-abc123 \
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
add_litellm_data_to_request,
|
||||
general_settings,
|
||||
get_custom_headers,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
version,
|
||||
)
|
||||
|
||||
data: Dict = {}
|
||||
try:
|
||||
|
||||
# Include original request and headers in the data
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
# for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch
|
||||
response = await litellm.afile_retrieve(
|
||||
custom_llm_provider="openai", file_id=file_id, **data
|
||||
)
|
||||
|
||||
### ALERTING ###
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.update_request_status(
|
||||
litellm_call_id=data.get("litellm_call_id", ""), status="success"
|
||||
)
|
||||
)
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
version=version,
|
||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||
)
|
||||
)
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.retrieve_file(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e.detail)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
else:
|
||||
error_msg = f"{str(e)}"
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/v1/files/{file_id:path}",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
@router.delete(
|
||||
"/files/{file_id:path}",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
async def delete_file(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
file_id: str,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Deletes a specified file. that can be used across - Assistants API, Batch API
|
||||
This is the equivalent of DELETE https://api.openai.com/v1/files/{file_id}
|
||||
|
||||
Supports Identical Params as: https://platform.openai.com/docs/api-reference/files/delete
|
||||
|
||||
Example Curl
|
||||
```
|
||||
curl http://localhost:4000/v1/files/file-abc123 \
|
||||
-X DELETE \
|
||||
-H "Authorization: Bearer $OPENAI_API_KEY"
|
||||
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
add_litellm_data_to_request,
|
||||
general_settings,
|
||||
get_custom_headers,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
version,
|
||||
)
|
||||
|
||||
data: Dict = {}
|
||||
try:
|
||||
|
||||
# Include original request and headers in the data
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
# for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch
|
||||
response = await litellm.afile_delete(
|
||||
custom_llm_provider="openai", file_id=file_id, **data
|
||||
)
|
||||
|
||||
### ALERTING ###
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.update_request_status(
|
||||
litellm_call_id=data.get("litellm_call_id", ""), status="success"
|
||||
)
|
||||
)
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
version=version,
|
||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||
)
|
||||
)
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.retrieve_file(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e.detail)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
else:
|
||||
error_msg = f"{str(e)}"
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/v1/files",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
@router.get(
|
||||
"/files",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
async def list_files(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
purpose: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Returns information about a specific file. that can be used across - Assistants API, Batch API
|
||||
This is the equivalent of GET https://api.openai.com/v1/files/
|
||||
|
||||
Supports Identical Params as: https://platform.openai.com/docs/api-reference/files/list
|
||||
|
||||
Example Curl
|
||||
```
|
||||
curl http://localhost:4000/v1/files\
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
add_litellm_data_to_request,
|
||||
general_settings,
|
||||
get_custom_headers,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
version,
|
||||
)
|
||||
|
||||
data: Dict = {}
|
||||
try:
|
||||
|
||||
# Include original request and headers in the data
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
# for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch
|
||||
response = await litellm.afile_list(
|
||||
custom_llm_provider="openai", purpose=purpose, **data
|
||||
)
|
||||
|
||||
### ALERTING ###
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.update_request_status(
|
||||
litellm_call_id=data.get("litellm_call_id", ""), status="success"
|
||||
)
|
||||
)
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
version=version,
|
||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||
)
|
||||
)
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.list_files(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e.detail)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
else:
|
||||
error_msg = f"{str(e)}"
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/v1/files/{file_id:path}/content",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
@router.get(
|
||||
"/files/{file_id:path}/content",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
async def get_file_content(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
file_id: str,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Returns information about a specific file. that can be used across - Assistants API, Batch API
|
||||
This is the equivalent of GET https://api.openai.com/v1/files/{file_id}/content
|
||||
|
||||
Supports Identical Params as: https://platform.openai.com/docs/api-reference/files/retrieve-contents
|
||||
|
||||
Example Curl
|
||||
```
|
||||
curl http://localhost:4000/v1/files/file-abc123/content \
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
|
||||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
add_litellm_data_to_request,
|
||||
general_settings,
|
||||
get_custom_headers,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
version,
|
||||
)
|
||||
|
||||
data: Dict = {}
|
||||
try:
|
||||
|
||||
# Include original request and headers in the data
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
# for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch
|
||||
response = await litellm.afile_content(
|
||||
custom_llm_provider="openai", file_id=file_id, **data
|
||||
)
|
||||
|
||||
### ALERTING ###
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.update_request_status(
|
||||
litellm_call_id=data.get("litellm_call_id", ""), status="success"
|
||||
)
|
||||
)
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
version=version,
|
||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||
)
|
||||
)
|
||||
httpx_response: Optional[httpx.Response] = getattr(response, "response", None)
|
||||
if httpx_response is None:
|
||||
raise ValueError(
|
||||
f"Invalid response - response.response is None - got {response}"
|
||||
)
|
||||
return Response(
|
||||
content=httpx_response.content,
|
||||
status_code=httpx_response.status_code,
|
||||
headers=httpx_response.headers,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.retrieve_file_content(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e.detail)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
else:
|
||||
error_msg = f"{str(e)}"
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
|
@ -4,47 +4,14 @@ model_list:
|
|||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: llama3
|
||||
- model_name: gemini-flash
|
||||
litellm_params:
|
||||
model: groq/llama3-8b-8192
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: mistral-embed
|
||||
litellm_params:
|
||||
model: mistral/mistral-embed
|
||||
model: gemini/gemini-1.5-flash
|
||||
|
||||
general_settings:
|
||||
pass_through_endpoints:
|
||||
- path: "/v1/rerank"
|
||||
target: "https://api.cohere.com/v1/rerank"
|
||||
auth: true # 👈 Key change to use LiteLLM Auth / Keys
|
||||
headers:
|
||||
Authorization: "bearer os.environ/COHERE_API_KEY"
|
||||
content-type: application/json
|
||||
accept: application/json
|
||||
- path: "/api/public/ingestion"
|
||||
target: "https://us.cloud.langfuse.com/api/public/ingestion"
|
||||
auth: true
|
||||
headers:
|
||||
LANGFUSE_PUBLIC_KEY: "os.environ/LANGFUSE_DEV_PUBLIC_KEY"
|
||||
LANGFUSE_SECRET_KEY: "os.environ/LANGFUSE_DEV_SK_KEY"
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
||||
litellm_settings:
|
||||
guardrails:
|
||||
- prompt_injection:
|
||||
callbacks: [lakera_prompt_injection, hide_secrets]
|
||||
default_on: true
|
||||
- hide_secrets:
|
||||
callbacks: [hide_secrets]
|
||||
default_on: true
|
||||
|
||||
assistant_settings:
|
||||
custom_llm_provider: openai
|
||||
litellm_params:
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
cache: true
|
||||
callbacks: ["otel"]
|
||||
|
||||
|
|
|
@ -1,24 +1,18 @@
|
|||
import ast
|
||||
import asyncio
|
||||
import copy
|
||||
import hashlib
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import secrets
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
import warnings
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Set, get_args
|
||||
from datetime import datetime, timedelta
|
||||
from typing import TYPE_CHECKING, Any, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
@ -106,7 +100,6 @@ import litellm
|
|||
from litellm import (
|
||||
CancelBatchRequest,
|
||||
CreateBatchRequest,
|
||||
CreateFileRequest,
|
||||
ListBatchRequest,
|
||||
RetrieveBatchRequest,
|
||||
)
|
||||
|
@ -174,6 +167,9 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
|
|||
router as key_management_router,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.team_endpoints import router as team_router
|
||||
from litellm.proxy.openai_files_endpoints.files_endpoints import (
|
||||
router as openai_files_router,
|
||||
)
|
||||
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
|
||||
initialize_pass_through_endpoints,
|
||||
)
|
||||
|
@ -213,6 +209,12 @@ from litellm.router import (
|
|||
from litellm.router import ModelInfo as RouterModelInfo
|
||||
from litellm.router import updateDeployment
|
||||
from litellm.scheduler import DefaultPriorities, FlowItem, Scheduler
|
||||
from litellm.types.llms.anthropic import (
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicResponse,
|
||||
AnthropicResponseContentBlockText,
|
||||
AnthropicResponseUsageBlock,
|
||||
)
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.router import RouterGeneralSettings
|
||||
|
||||
|
@ -2667,6 +2669,11 @@ async def startup_event():
|
|||
def model_list(
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Use `/model/info` - to get detailed model information, example - pricing, mode, etc.
|
||||
|
||||
This is just for compatibility with openai projects like aider.
|
||||
"""
|
||||
global llm_model_list, general_settings
|
||||
all_models = []
|
||||
## CHECK IF MODEL RESTRICTIONS ARE SET AT KEY/TEAM LEVEL ##
|
||||
|
@ -2791,7 +2798,7 @@ async def chat_completion(
|
|||
|
||||
## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
|
||||
## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
|
||||
data["litellm_call_id"] = str(uuid.uuid4())
|
||||
data["litellm_call_id"] = request.headers.get('x-litellm-call-id', str(uuid.uuid4()))
|
||||
logging_obj, data = litellm.utils.function_setup(
|
||||
original_function="acompletion",
|
||||
rules_obj=litellm.utils.Rules(),
|
||||
|
@ -3243,6 +3250,12 @@ async def completion(
|
|||
response_class=ORJSONResponse,
|
||||
tags=["embeddings"],
|
||||
)
|
||||
@router.post(
|
||||
"/engines/{model:path}/embeddings",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_class=ORJSONResponse,
|
||||
tags=["embeddings"],
|
||||
) # azure compatible endpoint
|
||||
@router.post(
|
||||
"/openai/deployments/{model:path}/embeddings",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
|
@ -4891,117 +4904,6 @@ async def retrieve_batch(
|
|||
|
||||
######################################################################
|
||||
|
||||
######################################################################
|
||||
|
||||
# /v1/files Endpoints
|
||||
|
||||
|
||||
######################################################################
|
||||
@router.post(
|
||||
"/v1/files",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
@router.post(
|
||||
"/files",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["files"],
|
||||
)
|
||||
async def create_file(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Upload a file that can be used across - Assistants API, Batch API
|
||||
This is the equivalent of POST https://api.openai.com/v1/files
|
||||
|
||||
Supports Identical Params as: https://platform.openai.com/docs/api-reference/files/create
|
||||
|
||||
Example Curl
|
||||
```
|
||||
curl https://api.openai.com/v1/files \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-F purpose="batch" \
|
||||
-F file="@mydata.jsonl"
|
||||
|
||||
```
|
||||
"""
|
||||
global proxy_logging_obj
|
||||
data: Dict = {}
|
||||
try:
|
||||
# Use orjson to parse JSON data, orjson speeds up requests significantly
|
||||
form_data = await request.form()
|
||||
data = {key: value for key, value in form_data.items() if key != "file"}
|
||||
|
||||
# Include original request and headers in the data
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
_create_file_request = CreateFileRequest()
|
||||
|
||||
# for now use custom_llm_provider=="openai" -> this will change as LiteLLM adds more providers for acreate_batch
|
||||
response = await litellm.acreate_file(
|
||||
custom_llm_provider="openai", **_create_file_request
|
||||
)
|
||||
|
||||
### ALERTING ###
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.update_request_status(
|
||||
litellm_call_id=data.get("litellm_call_id", ""), status="success"
|
||||
)
|
||||
)
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
version=version,
|
||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||
)
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.create_file(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e.detail)),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
)
|
||||
else:
|
||||
error_msg = f"{str(e)}"
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v1/moderations",
|
||||
|
@ -5150,6 +5052,198 @@ async def moderations(
|
|||
)
|
||||
|
||||
|
||||
#### ANTHROPIC ENDPOINTS ####
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v1/messages",
|
||||
tags=["[beta] Anthropic `/v1/messages`"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=AnthropicResponse,
|
||||
)
|
||||
async def anthropic_response(
|
||||
anthropic_data: AnthropicMessagesRequest,
|
||||
fastapi_response: Response,
|
||||
request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
from litellm import adapter_completion
|
||||
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
||||
|
||||
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
||||
|
||||
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
||||
data: dict = {**anthropic_data, "adapter_id": "anthropic"}
|
||||
try:
|
||||
data["model"] = (
|
||||
general_settings.get("completion_model", None) # server default
|
||||
or user_model # model name passed via cli args
|
||||
or data["model"] # default passed in http request
|
||||
)
|
||||
if user_model:
|
||||
data["model"] = user_model
|
||||
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data, # type: ignore
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
# override with user settings, these are params passed via cli
|
||||
if user_temperature:
|
||||
data["temperature"] = user_temperature
|
||||
if user_request_timeout:
|
||||
data["request_timeout"] = user_request_timeout
|
||||
if user_max_tokens:
|
||||
data["max_tokens"] = user_max_tokens
|
||||
if user_api_base:
|
||||
data["api_base"] = user_api_base
|
||||
|
||||
### MODEL ALIAS MAPPING ###
|
||||
# check if model name in model alias map
|
||||
# get the actual model name
|
||||
if data["model"] in litellm.model_alias_map:
|
||||
data["model"] = litellm.model_alias_map[data["model"]]
|
||||
|
||||
### CALL HOOKS ### - modify incoming data before calling the model
|
||||
data = await proxy_logging_obj.pre_call_hook( # type: ignore
|
||||
user_api_key_dict=user_api_key_dict, data=data, call_type="text_completion"
|
||||
)
|
||||
|
||||
### ROUTE THE REQUESTs ###
|
||||
router_model_names = llm_router.model_names if llm_router is not None else []
|
||||
# skip router if user passed their key
|
||||
if "api_key" in data:
|
||||
llm_response = asyncio.create_task(litellm.aadapter_completion(**data))
|
||||
elif (
|
||||
llm_router is not None and data["model"] in router_model_names
|
||||
): # model in router model list
|
||||
llm_response = asyncio.create_task(llm_router.aadapter_completion(**data))
|
||||
elif (
|
||||
llm_router is not None
|
||||
and llm_router.model_group_alias is not None
|
||||
and data["model"] in llm_router.model_group_alias
|
||||
): # model set in model_group_alias
|
||||
llm_response = asyncio.create_task(llm_router.aadapter_completion(**data))
|
||||
elif (
|
||||
llm_router is not None and data["model"] in llm_router.deployment_names
|
||||
): # model in router deployments, calling a specific deployment on the router
|
||||
llm_response = asyncio.create_task(
|
||||
llm_router.aadapter_completion(**data, specific_deployment=True)
|
||||
)
|
||||
elif (
|
||||
llm_router is not None and data["model"] in llm_router.get_model_ids()
|
||||
): # model in router model list
|
||||
llm_response = asyncio.create_task(llm_router.aadapter_completion(**data))
|
||||
elif (
|
||||
llm_router is not None
|
||||
and data["model"] not in router_model_names
|
||||
and llm_router.default_deployment is not None
|
||||
): # model in router deployments, calling a specific deployment on the router
|
||||
llm_response = asyncio.create_task(llm_router.aadapter_completion(**data))
|
||||
elif user_model is not None: # `litellm --model <your-model-name>`
|
||||
llm_response = asyncio.create_task(litellm.aadapter_completion(**data))
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={
|
||||
"error": "completion: Invalid model name passed in model="
|
||||
+ data.get("model", "")
|
||||
},
|
||||
)
|
||||
|
||||
# Await the llm_response task
|
||||
response = await llm_response
|
||||
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
response_cost = hidden_params.get("response_cost", None) or ""
|
||||
|
||||
### ALERTING ###
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.update_request_status(
|
||||
litellm_call_id=data.get("litellm_call_id", ""), status="success"
|
||||
)
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug("final response: %s", response)
|
||||
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
version=version,
|
||||
response_cost=response_cost,
|
||||
)
|
||||
)
|
||||
|
||||
verbose_proxy_logger.info("\nResponse from Litellm:\n{}".format(response))
|
||||
return response
|
||||
except RejectedRequestError as e:
|
||||
_data = e.request_data
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
original_exception=e,
|
||||
request_data=_data,
|
||||
)
|
||||
if _data.get("stream", None) is not None and _data["stream"] == True:
|
||||
_chat_response = litellm.ModelResponse()
|
||||
_usage = litellm.Usage(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
)
|
||||
_chat_response.usage = _usage # type: ignore
|
||||
_chat_response.choices[0].message.content = e.message # type: ignore
|
||||
_iterator = litellm.utils.ModelResponseIterator(
|
||||
model_response=_chat_response, convert_to_delta=True
|
||||
)
|
||||
_streaming_response = litellm.TextCompletionStreamWrapper(
|
||||
completion_stream=_iterator,
|
||||
model=_data.get("model", ""),
|
||||
)
|
||||
|
||||
selected_data_generator = select_data_generator(
|
||||
response=_streaming_response,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
request_data=data,
|
||||
)
|
||||
|
||||
return StreamingResponse(
|
||||
selected_data_generator,
|
||||
media_type="text/event-stream",
|
||||
headers={},
|
||||
)
|
||||
else:
|
||||
_response = litellm.TextCompletionResponse()
|
||||
_response.choices[0].text = e.message
|
||||
return _response
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||
)
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
error_msg = f"{str(e)}"
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
|
||||
|
||||
#### DEV UTILS ####
|
||||
|
||||
# @router.get(
|
||||
|
@ -9302,3 +9396,4 @@ app.include_router(caching_router)
|
|||
app.include_router(analytics_router)
|
||||
app.include_router(debugging_endpoints_router)
|
||||
app.include_router(ui_crud_endpoints_router)
|
||||
app.include_router(openai_files_router)
|
||||
|
|
|
@ -1765,6 +1765,125 @@ class Router:
|
|||
self.fail_calls[model] += 1
|
||||
raise e
|
||||
|
||||
async def aadapter_completion(
|
||||
self,
|
||||
adapter_id: str,
|
||||
model: str,
|
||||
is_retry: Optional[bool] = False,
|
||||
is_fallback: Optional[bool] = False,
|
||||
is_async: Optional[bool] = False,
|
||||
**kwargs,
|
||||
):
|
||||
try:
|
||||
kwargs["model"] = model
|
||||
kwargs["adapter_id"] = adapter_id
|
||||
kwargs["original_function"] = self._aadapter_completion
|
||||
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
|
||||
timeout = kwargs.get("request_timeout", self.timeout)
|
||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||
response = await self.async_function_with_fallbacks(**kwargs)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def _aadapter_completion(self, adapter_id: str, model: str, **kwargs):
|
||||
try:
|
||||
verbose_router_logger.debug(
|
||||
f"Inside _aadapter_completion()- model: {model}; kwargs: {kwargs}"
|
||||
)
|
||||
deployment = await self.async_get_available_deployment(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "default text"}],
|
||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||
)
|
||||
kwargs.setdefault("metadata", {}).update(
|
||||
{
|
||||
"deployment": deployment["litellm_params"]["model"],
|
||||
"model_info": deployment.get("model_info", {}),
|
||||
"api_base": deployment.get("litellm_params", {}).get("api_base"),
|
||||
}
|
||||
)
|
||||
kwargs["model_info"] = deployment.get("model_info", {})
|
||||
data = deployment["litellm_params"].copy()
|
||||
model_name = data["model"]
|
||||
for k, v in self.default_litellm_params.items():
|
||||
if (
|
||||
k not in kwargs
|
||||
): # prioritize model-specific params > default router params
|
||||
kwargs[k] = v
|
||||
elif k == "metadata":
|
||||
kwargs[k].update(v)
|
||||
|
||||
potential_model_client = self._get_client(
|
||||
deployment=deployment, kwargs=kwargs, client_type="async"
|
||||
)
|
||||
# check if provided keys == client keys #
|
||||
dynamic_api_key = kwargs.get("api_key", None)
|
||||
if (
|
||||
dynamic_api_key is not None
|
||||
and potential_model_client is not None
|
||||
and dynamic_api_key != potential_model_client.api_key
|
||||
):
|
||||
model_client = None
|
||||
else:
|
||||
model_client = potential_model_client
|
||||
self.total_calls[model_name] += 1
|
||||
|
||||
response = litellm.aadapter_completion(
|
||||
**{
|
||||
**data,
|
||||
"adapter_id": adapter_id,
|
||||
"caching": self.cache_responses,
|
||||
"client": model_client,
|
||||
"timeout": self.timeout,
|
||||
**kwargs,
|
||||
}
|
||||
)
|
||||
|
||||
rpm_semaphore = self._get_client(
|
||||
deployment=deployment,
|
||||
kwargs=kwargs,
|
||||
client_type="max_parallel_requests",
|
||||
)
|
||||
|
||||
if rpm_semaphore is not None and isinstance(
|
||||
rpm_semaphore, asyncio.Semaphore
|
||||
):
|
||||
async with rpm_semaphore:
|
||||
"""
|
||||
- Check rpm limits before making the call
|
||||
- If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
|
||||
"""
|
||||
await self.async_routing_strategy_pre_call_checks(
|
||||
deployment=deployment
|
||||
)
|
||||
response = await response # type: ignore
|
||||
else:
|
||||
await self.async_routing_strategy_pre_call_checks(deployment=deployment)
|
||||
response = await response # type: ignore
|
||||
|
||||
self.success_calls[model_name] += 1
|
||||
verbose_router_logger.info(
|
||||
f"litellm.aadapter_completion(model={model_name})\033[32m 200 OK\033[0m"
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
verbose_router_logger.info(
|
||||
f"litellm.aadapter_completion(model={model})\033[31m Exception {str(e)}\033[0m"
|
||||
)
|
||||
if model is not None:
|
||||
self.fail_calls[model] += 1
|
||||
raise e
|
||||
|
||||
def embedding(
|
||||
self,
|
||||
model: str,
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -237,6 +237,8 @@ async def test_langfuse_logging_without_request_response(stream, langfuse_client
|
|||
assert _trace_data[0].output == {
|
||||
"role": "assistant",
|
||||
"content": "redacted-by-litellm",
|
||||
"function_call": None,
|
||||
"tool_calls": None,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
@ -273,7 +275,12 @@ async def test_langfuse_masked_input_output(langfuse_client):
|
|||
expected_output = (
|
||||
"redacted-by-litellm"
|
||||
if mask_value
|
||||
else {"content": "This is a test response", "role": "assistant"}
|
||||
else {
|
||||
"content": "This is a test response",
|
||||
"role": "assistant",
|
||||
"function_call": None,
|
||||
"tool_calls": None,
|
||||
}
|
||||
)
|
||||
langfuse_client.flush()
|
||||
await asyncio.sleep(2)
|
||||
|
|
103
litellm/tests/test_anthropic_completion.py
Normal file
103
litellm/tests/test_anthropic_completion.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
# What is this?
|
||||
## Unit tests for Anthropic Adapter
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import io
|
||||
import os
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
import litellm
|
||||
from litellm import AnthropicConfig, Router, adapter_completion
|
||||
from litellm.adapters.anthropic_adapter import anthropic_adapter
|
||||
from litellm.types.llms.anthropic import AnthropicResponse
|
||||
|
||||
|
||||
def test_anthropic_completion_messages_translation():
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
||||
translated_messages = AnthropicConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore
|
||||
|
||||
assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
||||
|
||||
def test_anthropic_completion_input_translation():
|
||||
data = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "Hey, how's it going?"}],
|
||||
}
|
||||
translated_input = anthropic_adapter.translate_completion_input_params(kwargs=data)
|
||||
|
||||
assert translated_input is not None
|
||||
|
||||
assert translated_input["model"] == "gpt-3.5-turbo"
|
||||
assert translated_input["messages"] == [
|
||||
{"role": "user", "content": "Hey, how's it going?"}
|
||||
]
|
||||
|
||||
|
||||
def test_anthropic_completion_e2e():
|
||||
litellm.set_verbose = True
|
||||
|
||||
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
||||
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
response = adapter_completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=messages,
|
||||
adapter_id="anthropic",
|
||||
mock_response="This is a fake call",
|
||||
)
|
||||
|
||||
print("Response: {}".format(response))
|
||||
|
||||
assert response is not None
|
||||
|
||||
assert isinstance(response, AnthropicResponse)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_router_completion_e2e():
|
||||
litellm.set_verbose = True
|
||||
|
||||
litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
|
||||
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "claude-3-5-sonnet-20240620",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"mock_response": "hi this is macintosh.",
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
||||
response = await router.aadapter_completion(
|
||||
model="claude-3-5-sonnet-20240620",
|
||||
messages=messages,
|
||||
adapter_id="anthropic",
|
||||
mock_response="This is a fake call",
|
||||
)
|
||||
|
||||
print("Response: {}".format(response))
|
||||
|
||||
assert response is not None
|
||||
|
||||
assert isinstance(response, AnthropicResponse)
|
||||
|
||||
assert response.model == "gpt-3.5-turbo"
|
|
@ -1,21 +1,20 @@
|
|||
import asyncio
|
||||
import litellm
|
||||
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
from litellm._logging import verbose_logger
|
||||
import logging
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
|
||||
|
||||
verbose_logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="new test. WIP. works locally but not on CI. Still figuring this out"
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_otel_callback():
|
||||
@pytest.mark.skip(reason="Local only test. WIP.")
|
||||
async def test_async_otel_callback():
|
||||
exporter = InMemorySpanExporter()
|
||||
litellm.set_verbose = True
|
||||
litellm.callbacks = [OpenTelemetry(OpenTelemetryConfig(exporter=exporter))]
|
||||
|
|
|
@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
|
|||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||
|
||||
# litellm.num_retries = 3
|
||||
# litellm.num_retries=3
|
||||
litellm.cache = None
|
||||
litellm.success_callback = []
|
||||
user_message = "Write a short poem about the sky"
|
||||
|
@ -3065,32 +3065,38 @@ def response_format_tests(response: litellm.ModelResponse):
|
|||
@pytest.mark.asyncio
|
||||
async def test_completion_bedrock_httpx_models(sync_mode, model):
|
||||
litellm.set_verbose = True
|
||||
try:
|
||||
|
||||
if sync_mode:
|
||||
response = completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||
temperature=0.2,
|
||||
max_tokens=200,
|
||||
)
|
||||
if sync_mode:
|
||||
response = completion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||
temperature=0.2,
|
||||
max_tokens=200,
|
||||
)
|
||||
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
|
||||
response_format_tests(response=response)
|
||||
else:
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||
temperature=0.2,
|
||||
max_tokens=100,
|
||||
)
|
||||
response_format_tests(response=response)
|
||||
else:
|
||||
response = await litellm.acompletion(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||
temperature=0.2,
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
|
||||
print(f"response: {response}")
|
||||
response_format_tests(response=response)
|
||||
|
||||
print(f"response: {response}")
|
||||
response_format_tests(response=response)
|
||||
|
||||
print(f"response: {response}")
|
||||
except litellm.RateLimitError as e:
|
||||
print("got rate limit error=", e)
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"An error occurred - {str(e)}")
|
||||
|
||||
|
||||
def test_completion_bedrock_titan_null_response():
|
||||
|
|
|
@ -712,6 +712,79 @@ def test_vertex_ai_claude_completion_cost():
|
|||
assert cost == predicted_cost
|
||||
|
||||
|
||||
def test_vertex_ai_embedding_completion_cost(caplog):
|
||||
"""
|
||||
Relevant issue - https://github.com/BerriAI/litellm/issues/4630
|
||||
"""
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
text = "The quick brown fox jumps over the lazy dog."
|
||||
input_tokens = litellm.token_counter(
|
||||
model="vertex_ai/textembedding-gecko", text=text
|
||||
)
|
||||
|
||||
model_info = litellm.get_model_info(model="vertex_ai/textembedding-gecko")
|
||||
|
||||
print("\nExpected model info:\n{}\n\n".format(model_info))
|
||||
|
||||
expected_input_cost = input_tokens * model_info["input_cost_per_token"]
|
||||
|
||||
## CALCULATED COST
|
||||
calculated_input_cost, calculated_output_cost = cost_per_token(
|
||||
model="textembedding-gecko",
|
||||
custom_llm_provider="vertex_ai",
|
||||
prompt_tokens=input_tokens,
|
||||
call_type="aembedding",
|
||||
)
|
||||
|
||||
assert round(expected_input_cost, 6) == round(calculated_input_cost, 6)
|
||||
print("expected_input_cost: {}".format(expected_input_cost))
|
||||
print("calculated_input_cost: {}".format(calculated_input_cost))
|
||||
|
||||
captured_logs = [rec.message for rec in caplog.records]
|
||||
for item in captured_logs:
|
||||
print("\nitem:{}\n".format(item))
|
||||
if (
|
||||
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured "
|
||||
in item
|
||||
):
|
||||
raise Exception("Error log raised for calculating embedding cost")
|
||||
|
||||
|
||||
# def test_vertex_ai_embedding_completion_cost_e2e():
|
||||
# """
|
||||
# Relevant issue - https://github.com/BerriAI/litellm/issues/4630
|
||||
# """
|
||||
# from litellm.tests.test_amazing_vertex_completion import load_vertex_ai_credentials
|
||||
|
||||
# load_vertex_ai_credentials()
|
||||
# os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
# litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
# text = "The quick brown fox jumps over the lazy dog."
|
||||
# input_tokens = litellm.token_counter(
|
||||
# model="vertex_ai/textembedding-gecko", text=text
|
||||
# )
|
||||
|
||||
# model_info = litellm.get_model_info(model="vertex_ai/textembedding-gecko")
|
||||
|
||||
# print("\nExpected model info:\n{}\n\n".format(model_info))
|
||||
|
||||
# expected_input_cost = input_tokens * model_info["input_cost_per_token"]
|
||||
|
||||
# ## CALCULATED COST
|
||||
# resp = litellm.embedding(model="textembedding-gecko", input=[text])
|
||||
|
||||
# calculated_input_cost = resp._hidden_params["response_cost"]
|
||||
|
||||
# assert round(expected_input_cost, 6) == round(calculated_input_cost, 6)
|
||||
# print("expected_input_cost: {}".format(expected_input_cost))
|
||||
# print("calculated_input_cost: {}".format(calculated_input_cost))
|
||||
|
||||
# assert False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_completion_cost_hidden_params(sync_mode):
|
||||
|
|
|
@ -1,13 +1,16 @@
|
|||
# What is this?
|
||||
## Unit testing for the 'get_model_info()' function
|
||||
import os, sys, traceback
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
|
||||
import litellm
|
||||
from litellm import get_model_info
|
||||
import pytest
|
||||
|
||||
|
||||
def test_get_model_info_simple_model_name():
|
||||
|
@ -37,3 +40,9 @@ def test_get_model_info_custom_llm_with_same_name_vllm():
|
|||
pytest.fail("Expected get model info to fail for an unmapped model/provider")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def test_get_model_info_shows_correct_supports_vision():
|
||||
info = litellm.get_model_info("gemini/gemini-1.5-flash")
|
||||
print("info", info)
|
||||
assert info["supports_vision"] is True
|
||||
|
|
|
@ -1,22 +1,26 @@
|
|||
# What is this?
|
||||
## Unit Tests for OpenAI Batches API
|
||||
import sys, os, json
|
||||
import traceback
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest, logging, asyncio
|
||||
import litellm
|
||||
from litellm import (
|
||||
create_batch,
|
||||
create_file,
|
||||
)
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
import litellm
|
||||
from litellm import create_batch, create_file
|
||||
|
||||
|
||||
def test_create_batch():
|
||||
"""
|
||||
|
@ -144,6 +148,28 @@ async def test_async_create_batch():
|
|||
|
||||
print("file content = ", file_content)
|
||||
|
||||
# file obj
|
||||
file_obj = await litellm.afile_retrieve(
|
||||
file_id=batch_input_file_id, custom_llm_provider="openai"
|
||||
)
|
||||
print("file obj = ", file_obj)
|
||||
assert file_obj.id == batch_input_file_id
|
||||
|
||||
# delete file
|
||||
delete_file_response = await litellm.afile_delete(
|
||||
file_id=batch_input_file_id, custom_llm_provider="openai"
|
||||
)
|
||||
|
||||
print("delete file response = ", delete_file_response)
|
||||
|
||||
assert delete_file_response.id == batch_input_file_id
|
||||
|
||||
all_files_list = await litellm.afile_list(
|
||||
custom_llm_provider="openai",
|
||||
)
|
||||
|
||||
print("all_files_list = ", all_files_list)
|
||||
|
||||
# # write this file content to a file
|
||||
# with open("file_content.json", "w") as f:
|
||||
# json.dump(file_content, f)
|
|
@ -20,7 +20,7 @@ import pytest
|
|||
import litellm
|
||||
from litellm.proxy._types import LiteLLMRoutes
|
||||
from litellm.proxy.auth.auth_utils import is_openai_route
|
||||
from litellm.proxy.proxy_server import router
|
||||
from litellm.proxy.proxy_server import app
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
|
@ -37,7 +37,7 @@ def test_routes_on_litellm_proxy():
|
|||
this prevents accidentelly deleting /threads, or /batches etc
|
||||
"""
|
||||
_all_routes = []
|
||||
for route in router.routes:
|
||||
for route in app.routes:
|
||||
|
||||
_path_as_str = str(route.path)
|
||||
if ":path" in _path_as_str:
|
||||
|
|
|
@ -21,6 +21,8 @@ sys.path.insert(
|
|||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import random
|
||||
|
||||
import litellm
|
||||
from litellm import (
|
||||
AuthenticationError,
|
||||
|
@ -1373,7 +1375,8 @@ async def test_bedrock_httpx_streaming(sync_mode, model):
|
|||
if complete_response.strip() == "":
|
||||
raise Exception("Empty response received")
|
||||
print(f"completion_response: {complete_response}\n\nFinalChunk: {final_chunk}")
|
||||
except RateLimitError:
|
||||
except RateLimitError as e:
|
||||
print("got rate limit error=", e)
|
||||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
@ -3037,8 +3040,11 @@ def test_completion_claude_3_function_call_with_streaming():
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model", ["gemini/gemini-1.5-flash"]
|
||||
) # "claude-3-opus-20240229",
|
||||
"model",
|
||||
[
|
||||
"gemini/gemini-1.5-flash",
|
||||
], # "claude-3-opus-20240229"
|
||||
) #
|
||||
@pytest.mark.asyncio
|
||||
async def test_acompletion_claude_3_function_call_with_streaming(model):
|
||||
litellm.set_verbose = True
|
||||
|
@ -3046,41 +3052,45 @@ async def test_acompletion_claude_3_function_call_with_streaming(model):
|
|||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"name": "generate_series_of_questions",
|
||||
"description": "Generate a series of questions, given a topic.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
"questions": {
|
||||
"type": "array",
|
||||
"description": "The questions to be generated.",
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
"required": ["questions"],
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
SYSTEM_PROMPT = "You are an AI assistant"
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Boston today in fahrenheit?",
|
||||
}
|
||||
"content": "Generate 3 questions about civil engineering.",
|
||||
},
|
||||
]
|
||||
try:
|
||||
# test without max tokens
|
||||
response = await acompletion(
|
||||
model=model,
|
||||
# model="claude-3-5-sonnet-20240620",
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="required",
|
||||
stream=True,
|
||||
temperature=0.75,
|
||||
tools=tools,
|
||||
stream_options={"include_usage": True},
|
||||
)
|
||||
idx = 0
|
||||
print(f"response: {response}")
|
||||
async for chunk in response:
|
||||
# print(f"chunk: {chunk}")
|
||||
print(f"chunk in test: {chunk}")
|
||||
if idx == 0:
|
||||
assert (
|
||||
chunk.choices[0].delta.tool_calls[0].function.arguments is not None
|
||||
|
@ -3510,3 +3520,56 @@ def test_unit_test_custom_stream_wrapper_function_call():
|
|||
if chunk.choices[0].finish_reason is not None:
|
||||
finish_reason = chunk.choices[0].finish_reason
|
||||
assert finish_reason == "tool_calls"
|
||||
|
||||
## UNIT TEST RECREATING MODEL RESPONSE
|
||||
from litellm.types.utils import (
|
||||
ChatCompletionDeltaToolCall,
|
||||
Delta,
|
||||
Function,
|
||||
StreamingChoices,
|
||||
Usage,
|
||||
)
|
||||
|
||||
initial_model_response = litellm.ModelResponse(
|
||||
id="chatcmpl-842826b6-75a1-4ed4-8a68-7655e60654b3",
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
finish_reason=None,
|
||||
index=0,
|
||||
delta=Delta(
|
||||
content="",
|
||||
role="assistant",
|
||||
function_call=None,
|
||||
tool_calls=[
|
||||
ChatCompletionDeltaToolCall(
|
||||
id="7ee88721-bfee-4584-8662-944a23d4c7a5",
|
||||
function=Function(
|
||||
arguments='{"questions": ["What are the main challenges facing civil engineers today?", "How has technology impacted the field of civil engineering?", "What are some of the most innovative projects in civil engineering in recent years?"]}',
|
||||
name="generate_series_of_questions",
|
||||
),
|
||||
type="function",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
),
|
||||
logprobs=None,
|
||||
)
|
||||
],
|
||||
created=1720755257,
|
||||
model="gemini-1.5-flash",
|
||||
object="chat.completion.chunk",
|
||||
system_fingerprint=None,
|
||||
usage=Usage(prompt_tokens=67, completion_tokens=55, total_tokens=122),
|
||||
stream=True,
|
||||
)
|
||||
|
||||
obj_dict = initial_model_response.dict()
|
||||
|
||||
if "usage" in obj_dict:
|
||||
del obj_dict["usage"]
|
||||
|
||||
new_model = response.model_response_creator(chunk=obj_dict)
|
||||
|
||||
print("\n\n{}\n\n".format(new_model))
|
||||
|
||||
assert len(new_model.choices[0].delta.tool_calls) > 0
|
||||
|
|
|
@ -258,6 +258,13 @@ def test_validate_environment_empty_model():
|
|||
raise Exception()
|
||||
|
||||
|
||||
def test_validate_environment_api_key():
|
||||
response_obj = validate_environment(model="gpt-3.5-turbo", api_key="sk-my-test-key")
|
||||
assert (
|
||||
response_obj["keys_in_environment"] is True
|
||||
), f"Missing keys={response_obj['missing_keys']}"
|
||||
|
||||
|
||||
@mock.patch.dict(os.environ, {"OLLAMA_API_BASE": "foo"}, clear=True)
|
||||
def test_validate_environment_ollama():
|
||||
for provider in ["ollama", "ollama_chat"]:
|
||||
|
|
10
litellm/types/adapter.py
Normal file
10
litellm/types/adapter.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
from typing import List
|
||||
|
||||
from typing_extensions import Dict, Required, TypedDict, override
|
||||
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
class AdapterItem(TypedDict):
|
||||
id: str
|
||||
adapter: CustomLogger
|
|
@ -9,25 +9,27 @@ class AnthropicMessagesToolChoice(TypedDict, total=False):
|
|||
name: str
|
||||
|
||||
|
||||
class AnthopicMessagesAssistantMessageTextContentParam(TypedDict, total=False):
|
||||
type: Required[Literal["text"]]
|
||||
class AnthropicMessagesTool(TypedDict, total=False):
|
||||
name: Required[str]
|
||||
description: str
|
||||
input_schema: Required[dict]
|
||||
|
||||
|
||||
class AnthropicMessagesTextParam(TypedDict):
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
|
||||
|
||||
class AnthopicMessagesAssistantMessageToolCallParam(TypedDict, total=False):
|
||||
type: Required[Literal["tool_use"]]
|
||||
|
||||
class AnthropicMessagesToolUseParam(TypedDict):
|
||||
type: Literal["tool_use"]
|
||||
id: str
|
||||
|
||||
name: str
|
||||
|
||||
input: dict
|
||||
|
||||
|
||||
AnthropicMessagesAssistantMessageValues = Union[
|
||||
AnthopicMessagesAssistantMessageTextContentParam,
|
||||
AnthopicMessagesAssistantMessageToolCallParam,
|
||||
AnthropicMessagesTextParam,
|
||||
AnthropicMessagesToolUseParam,
|
||||
]
|
||||
|
||||
|
||||
|
@ -46,6 +48,72 @@ class AnthopicMessagesAssistantMessageParam(TypedDict, total=False):
|
|||
"""
|
||||
|
||||
|
||||
class AnthropicImageParamSource(TypedDict):
|
||||
type: Literal["base64"]
|
||||
media_type: str
|
||||
data: str
|
||||
|
||||
|
||||
class AnthropicMessagesImageParam(TypedDict):
|
||||
type: Literal["image"]
|
||||
source: AnthropicImageParamSource
|
||||
|
||||
|
||||
class AnthropicMessagesToolResultContent(TypedDict):
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
|
||||
|
||||
class AnthropicMessagesToolResultParam(TypedDict, total=False):
|
||||
type: Required[Literal["tool_result"]]
|
||||
tool_use_id: Required[str]
|
||||
is_error: bool
|
||||
content: Union[
|
||||
str,
|
||||
Iterable[
|
||||
Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]
|
||||
],
|
||||
]
|
||||
|
||||
|
||||
AnthropicMessagesUserMessageValues = Union[
|
||||
AnthropicMessagesTextParam,
|
||||
AnthropicMessagesImageParam,
|
||||
AnthropicMessagesToolResultParam,
|
||||
]
|
||||
|
||||
|
||||
class AnthropicMessagesUserMessageParam(TypedDict, total=False):
|
||||
role: Required[Literal["user"]]
|
||||
content: Required[Union[str, Iterable[AnthropicMessagesUserMessageValues]]]
|
||||
|
||||
|
||||
class AnthropicMetadata(TypedDict, total=False):
|
||||
user_id: str
|
||||
|
||||
|
||||
class AnthropicMessagesRequest(TypedDict, total=False):
|
||||
model: Required[str]
|
||||
messages: Required[
|
||||
List[
|
||||
Union[
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
]
|
||||
]
|
||||
]
|
||||
max_tokens: Required[int]
|
||||
metadata: AnthropicMetadata
|
||||
stop_sequences: List[str]
|
||||
stream: bool
|
||||
system: str
|
||||
temperature: float
|
||||
tool_choice: AnthropicMessagesToolChoice
|
||||
tools: List[AnthropicMessagesTool]
|
||||
top_k: int
|
||||
top_p: float
|
||||
|
||||
|
||||
class ContentTextBlockDelta(TypedDict):
|
||||
"""
|
||||
'delta': {'type': 'text_delta', 'text': 'Hello'}
|
||||
|
@ -155,3 +223,51 @@ class MessageStartBlock(TypedDict):
|
|||
|
||||
type: Literal["message_start"]
|
||||
message: MessageChunk
|
||||
|
||||
|
||||
class AnthropicResponseContentBlockText(BaseModel):
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
|
||||
|
||||
class AnthropicResponseContentBlockToolUse(BaseModel):
|
||||
type: Literal["tool_use"]
|
||||
id: str
|
||||
name: str
|
||||
input: dict
|
||||
|
||||
|
||||
class AnthropicResponseUsageBlock(BaseModel):
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
|
||||
|
||||
AnthropicFinishReason = Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
|
||||
|
||||
|
||||
class AnthropicResponse(BaseModel):
|
||||
id: str
|
||||
"""Unique object identifier."""
|
||||
|
||||
type: Literal["message"]
|
||||
"""For Messages, this is always "message"."""
|
||||
|
||||
role: Literal["assistant"]
|
||||
"""Conversational role of the generated message. This will always be "assistant"."""
|
||||
|
||||
content: List[
|
||||
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
|
||||
]
|
||||
"""Content generated by the model."""
|
||||
|
||||
model: str
|
||||
"""The model that handled the request."""
|
||||
|
||||
stop_reason: Optional[AnthropicFinishReason]
|
||||
"""The reason that we stopped."""
|
||||
|
||||
stop_sequence: Optional[str]
|
||||
"""Which custom stop sequence was generated, if any."""
|
||||
|
||||
usage: AnthropicResponseUsageBlock
|
||||
"""Billing and rate-limit usage."""
|
||||
|
|
|
@ -305,7 +305,13 @@ class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
|
|||
arguments: str
|
||||
|
||||
|
||||
class ChatCompletionToolCallChunk(TypedDict):
|
||||
class ChatCompletionAssistantToolCall(TypedDict):
|
||||
id: Optional[str]
|
||||
type: Literal["function"]
|
||||
function: ChatCompletionToolCallFunctionChunk
|
||||
|
||||
|
||||
class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call
|
||||
id: Optional[str]
|
||||
type: Literal["function"]
|
||||
function: ChatCompletionToolCallFunctionChunk
|
||||
|
@ -319,6 +325,107 @@ class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
|
|||
index: int
|
||||
|
||||
|
||||
class ChatCompletionTextObject(TypedDict):
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
|
||||
|
||||
class ChatCompletionImageUrlObject(TypedDict, total=False):
|
||||
url: Required[str]
|
||||
detail: str
|
||||
|
||||
|
||||
class ChatCompletionImageObject(TypedDict):
|
||||
type: Literal["image_url"]
|
||||
image_url: ChatCompletionImageUrlObject
|
||||
|
||||
|
||||
class ChatCompletionUserMessage(TypedDict):
|
||||
role: Literal["user"]
|
||||
content: Union[
|
||||
str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]]
|
||||
]
|
||||
|
||||
|
||||
class ChatCompletionAssistantMessage(TypedDict, total=False):
|
||||
role: Required[Literal["assistant"]]
|
||||
content: Optional[str]
|
||||
name: str
|
||||
tool_calls: List[ChatCompletionAssistantToolCall]
|
||||
|
||||
|
||||
class ChatCompletionToolMessage(TypedDict):
|
||||
role: Literal["tool"]
|
||||
content: str
|
||||
tool_call_id: str
|
||||
|
||||
|
||||
class ChatCompletionSystemMessage(TypedDict, total=False):
|
||||
role: Required[Literal["system"]]
|
||||
content: Required[str]
|
||||
name: str
|
||||
|
||||
|
||||
AllMessageValues = Union[
|
||||
ChatCompletionUserMessage,
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionToolMessage,
|
||||
ChatCompletionSystemMessage,
|
||||
]
|
||||
|
||||
|
||||
class ChatCompletionToolChoiceFunctionParam(TypedDict):
|
||||
name: str
|
||||
|
||||
|
||||
class ChatCompletionToolChoiceObjectParam(TypedDict):
|
||||
type: Literal["function"]
|
||||
function: ChatCompletionToolChoiceFunctionParam
|
||||
|
||||
|
||||
ChatCompletionToolChoiceStringValues = Literal["none", "auto", "required"]
|
||||
|
||||
ChatCompletionToolChoiceValues = Union[
|
||||
ChatCompletionToolChoiceStringValues, ChatCompletionToolChoiceObjectParam
|
||||
]
|
||||
|
||||
|
||||
class ChatCompletionToolParamFunctionChunk(TypedDict, total=False):
|
||||
name: Required[str]
|
||||
description: str
|
||||
parameters: dict
|
||||
|
||||
|
||||
class ChatCompletionToolParam(TypedDict):
|
||||
type: Literal["function"]
|
||||
function: ChatCompletionToolParamFunctionChunk
|
||||
|
||||
|
||||
class ChatCompletionRequest(TypedDict, total=False):
|
||||
model: Required[str]
|
||||
messages: Required[List[AllMessageValues]]
|
||||
frequency_penalty: float
|
||||
logit_bias: dict
|
||||
logprobs: bool
|
||||
top_logprobs: int
|
||||
max_tokens: int
|
||||
n: int
|
||||
presence_penalty: float
|
||||
response_format: dict
|
||||
seed: int
|
||||
service_tier: str
|
||||
stop: Union[str, List[str]]
|
||||
stream_options: dict
|
||||
temperature: float
|
||||
top_p: float
|
||||
tools: List[ChatCompletionToolParam]
|
||||
tool_choice: ChatCompletionToolChoiceValues
|
||||
parallel_tool_calls: bool
|
||||
function_call: Union[str, dict]
|
||||
functions: List
|
||||
user: str
|
||||
|
||||
|
||||
class ChatCompletionDeltaChunk(TypedDict, total=False):
|
||||
content: Optional[str]
|
||||
tool_calls: List[ChatCompletionDeltaToolCallChunk]
|
||||
|
|
|
@ -73,6 +73,7 @@ class ModelInfo(TypedDict, total=False):
|
|||
supported_openai_params: Required[Optional[List[str]]]
|
||||
supports_system_messages: Optional[bool]
|
||||
supports_response_schema: Optional[bool]
|
||||
supports_vision: Optional[bool]
|
||||
|
||||
|
||||
class GenericStreamingChunk(TypedDict):
|
||||
|
@ -166,7 +167,9 @@ class FunctionCall(OpenAIObject):
|
|||
|
||||
class Function(OpenAIObject):
|
||||
arguments: str
|
||||
name: Optional[str] = None
|
||||
name: Optional[
|
||||
str
|
||||
] # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -280,29 +283,43 @@ class ChatCompletionMessageToolCall(OpenAIObject):
|
|||
setattr(self, key, value)
|
||||
|
||||
|
||||
"""
|
||||
Reference:
|
||||
ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None))
|
||||
"""
|
||||
|
||||
|
||||
class Message(OpenAIObject):
|
||||
|
||||
content: Optional[str]
|
||||
role: Literal["assistant"]
|
||||
tool_calls: Optional[List[ChatCompletionMessageToolCall]]
|
||||
function_call: Optional[FunctionCall]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
content: Optional[str] = "default",
|
||||
role="assistant",
|
||||
logprobs=None,
|
||||
content: Optional[str] = None,
|
||||
role: Literal["assistant"] = "assistant",
|
||||
function_call=None,
|
||||
tool_calls=None,
|
||||
**params,
|
||||
):
|
||||
super(Message, self).__init__(**params)
|
||||
self.content = content
|
||||
self.role = role
|
||||
if function_call is not None:
|
||||
self.function_call = FunctionCall(**function_call)
|
||||
|
||||
if tool_calls is not None:
|
||||
self.tool_calls = []
|
||||
for tool_call in tool_calls:
|
||||
self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call))
|
||||
|
||||
if logprobs is not None:
|
||||
self._logprobs = ChoiceLogprobs(**logprobs)
|
||||
init_values = {
|
||||
"content": content,
|
||||
"role": "assistant",
|
||||
"function_call": (
|
||||
FunctionCall(**function_call) if function_call is not None else None
|
||||
),
|
||||
"tool_calls": (
|
||||
[ChatCompletionMessageToolCall(**tool_call) for tool_call in tool_calls]
|
||||
if tool_calls is not None
|
||||
else None
|
||||
),
|
||||
}
|
||||
super(Message, self).__init__(
|
||||
**init_values,
|
||||
**params,
|
||||
)
|
||||
|
||||
def get(self, key, default=None):
|
||||
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
||||
|
@ -556,6 +573,8 @@ class ModelResponse(OpenAIObject):
|
|||
_new_choice = choice # type: ignore
|
||||
elif isinstance(choice, dict):
|
||||
_new_choice = Choices(**choice) # type: ignore
|
||||
else:
|
||||
_new_choice = choice
|
||||
new_choices.append(_new_choice)
|
||||
choices = new_choices
|
||||
else:
|
||||
|
@ -608,10 +627,6 @@ class ModelResponse(OpenAIObject):
|
|||
# Allow dictionary-style access to attributes
|
||||
return getattr(self, key)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
# Allow dictionary-style assignment of attributes
|
||||
setattr(self, key, value)
|
||||
|
||||
def json(self, **kwargs):
|
||||
try:
|
||||
return self.model_dump() # noqa
|
||||
|
|
|
@ -4829,6 +4829,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
|||
supports_response_schema=_model_info.get(
|
||||
"supports_response_schema", None
|
||||
),
|
||||
supports_vision=_model_info.get("supports_vision", False),
|
||||
)
|
||||
except Exception:
|
||||
raise Exception(
|
||||
|
@ -5048,12 +5049,15 @@ def create_proxy_transport_and_mounts():
|
|||
return sync_proxy_mounts, async_proxy_mounts
|
||||
|
||||
|
||||
def validate_environment(model: Optional[str] = None) -> dict:
|
||||
def validate_environment(
|
||||
model: Optional[str] = None, api_key: Optional[str] = None
|
||||
) -> dict:
|
||||
"""
|
||||
Checks if the environment variables are valid for the given model.
|
||||
|
||||
Args:
|
||||
model (Optional[str]): The name of the model. Defaults to None.
|
||||
api_key (Optional[str]): If the user passed in an api key, of their own.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the following keys:
|
||||
|
@ -5329,6 +5333,13 @@ def validate_environment(model: Optional[str] = None) -> dict:
|
|||
keys_in_environment = True
|
||||
else:
|
||||
missing_keys.append("NLP_CLOUD_API_KEY")
|
||||
|
||||
if api_key is not None:
|
||||
new_missing_keys = []
|
||||
for key in missing_keys:
|
||||
if "api_key" not in key.lower():
|
||||
new_missing_keys.append(key)
|
||||
missing_keys = new_missing_keys
|
||||
return {"keys_in_environment": keys_in_environment, "missing_keys": missing_keys}
|
||||
|
||||
|
||||
|
@ -8126,7 +8137,7 @@ class CustomStreamWrapper:
|
|||
|
||||
if chunk.startswith(self.complete_response):
|
||||
# Remove last_sent_chunk only if it appears at the start of the new chunk
|
||||
chunk = chunk[len(self.complete_response):]
|
||||
chunk = chunk[len(self.complete_response) :]
|
||||
|
||||
self.complete_response += chunk
|
||||
return chunk
|
||||
|
@ -8940,7 +8951,16 @@ class CustomStreamWrapper:
|
|||
model_response.system_fingerprint = self.system_fingerprint
|
||||
model_response._hidden_params["custom_llm_provider"] = _logging_obj_llm_provider
|
||||
model_response._hidden_params["created_at"] = time.time()
|
||||
model_response.choices = [StreamingChoices(finish_reason=None)]
|
||||
|
||||
if (
|
||||
len(model_response.choices) > 0
|
||||
and hasattr(model_response.choices[0], "delta")
|
||||
and model_response.choices[0].delta is not None
|
||||
):
|
||||
# do nothing, if object instantiated
|
||||
pass
|
||||
else:
|
||||
model_response.choices = [StreamingChoices(finish_reason=None)]
|
||||
return model_response
|
||||
|
||||
def is_delta_empty(self, delta: Delta) -> bool:
|
||||
|
@ -9483,8 +9503,8 @@ class CustomStreamWrapper:
|
|||
model_response.choices[0].delta = Delta(**_json_delta)
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
|
||||
str(e)
|
||||
"litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}\n{}".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
verbose_logger.debug(traceback.format_exc())
|
||||
|
@ -9881,7 +9901,6 @@ class CustomStreamWrapper:
|
|||
self.rules.post_call_rules(
|
||||
input=self.response_uptil_now, model=self.model
|
||||
)
|
||||
print_verbose(f"final returned processed chunk: {processed_chunk}")
|
||||
self.chunks.append(processed_chunk)
|
||||
if hasattr(
|
||||
processed_chunk, "usage"
|
||||
|
@ -9895,6 +9914,7 @@ class CustomStreamWrapper:
|
|||
|
||||
# Create a new object without the removed attribute
|
||||
processed_chunk = self.model_response_creator(chunk=obj_dict)
|
||||
print_verbose(f"final returned processed chunk: {processed_chunk}")
|
||||
return processed_chunk
|
||||
raise StopAsyncIteration
|
||||
else: # temporary patch for non-aiohttp async calls
|
||||
|
@ -10124,7 +10144,7 @@ def mock_completion_streaming_obj(
|
|||
model_response, mock_response, model, n: Optional[int] = None
|
||||
):
|
||||
for i in range(0, len(mock_response), 3):
|
||||
completion_obj = Delta(role="assistant", content=mock_response[i: i + 3])
|
||||
completion_obj = Delta(role="assistant", content=mock_response[i : i + 3])
|
||||
if n is None:
|
||||
model_response.choices[0].delta = completion_obj
|
||||
else:
|
||||
|
@ -10133,7 +10153,7 @@ def mock_completion_streaming_obj(
|
|||
_streaming_choice = litellm.utils.StreamingChoices(
|
||||
index=j,
|
||||
delta=litellm.utils.Delta(
|
||||
role="assistant", content=mock_response[i: i + 3]
|
||||
role="assistant", content=mock_response[i : i + 3]
|
||||
),
|
||||
)
|
||||
_all_choices.append(_streaming_choice)
|
||||
|
@ -10145,7 +10165,7 @@ async def async_mock_completion_streaming_obj(
|
|||
model_response, mock_response, model, n: Optional[int] = None
|
||||
):
|
||||
for i in range(0, len(mock_response), 3):
|
||||
completion_obj = Delta(role="assistant", content=mock_response[i: i + 3])
|
||||
completion_obj = Delta(role="assistant", content=mock_response[i : i + 3])
|
||||
if n is None:
|
||||
model_response.choices[0].delta = completion_obj
|
||||
else:
|
||||
|
@ -10154,7 +10174,7 @@ async def async_mock_completion_streaming_obj(
|
|||
_streaming_choice = litellm.utils.StreamingChoices(
|
||||
index=j,
|
||||
delta=litellm.utils.Delta(
|
||||
role="assistant", content=mock_response[i: i + 3]
|
||||
role="assistant", content=mock_response[i : i + 3]
|
||||
),
|
||||
)
|
||||
_all_choices.append(_streaming_choice)
|
||||
|
|
6
poetry.lock
generated
6
poetry.lock
generated
|
@ -225,13 +225,13 @@ aio = ["aiohttp (>=3.0)"]
|
|||
|
||||
[[package]]
|
||||
name = "azure-identity"
|
||||
version = "1.16.0"
|
||||
version = "1.16.1"
|
||||
description = "Microsoft Azure Identity Library for Python"
|
||||
optional = true
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "azure-identity-1.16.0.tar.gz", hash = "sha256:6ff1d667cdcd81da1ceab42f80a0be63ca846629f518a922f7317a7e3c844e1b"},
|
||||
{file = "azure_identity-1.16.0-py3-none-any.whl", hash = "sha256:722fdb60b8fdd55fa44dc378b8072f4b419b56a5e54c0de391f644949f3a826f"},
|
||||
{file = "azure-identity-1.16.1.tar.gz", hash = "sha256:6d93f04468f240d59246d8afde3091494a5040d4f141cad0f49fc0c399d0d91e"},
|
||||
{file = "azure_identity-1.16.1-py3-none-any.whl", hash = "sha256:8fb07c25642cd4ac422559a8b50d3e77f73dcc2bbfaba419d06d6c9d7cff6726"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.41.15"
|
||||
version = "1.41.18"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -91,10 +91,16 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.41.15"
|
||||
version = "1.41.18"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
||||
[tool.mypy]
|
||||
plugins = "pydantic.mypy"
|
||||
|
||||
[tool.prisma]
|
||||
# cache engine binaries in a directory relative to your project
|
||||
# binary_cache_dir = '.binaries'
|
||||
home_dir = '.prisma'
|
||||
nodeenv_cache_dir = '.nodeenv'
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue