mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge branch 'main' into litellm_streaming_format_fix
This commit is contained in:
commit
d34cd7ec9a
10 changed files with 227 additions and 3 deletions
|
@ -130,6 +130,7 @@ jobs:
|
||||||
pip install "langfuse>=2.0.0"
|
pip install "langfuse>=2.0.0"
|
||||||
pip install numpydoc
|
pip install numpydoc
|
||||||
pip install prisma
|
pip install prisma
|
||||||
|
pip install fastapi
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.24.1"
|
||||||
pip install "gunicorn==21.2.0"
|
pip install "gunicorn==21.2.0"
|
||||||
pip install "anyio==3.7.1"
|
pip install "anyio==3.7.1"
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# VertexAI - Google [Gemini, Model Garden]
|
# VertexAI - Google [Gemini, Model Garden]
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
|
||||||
|
@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co
|
||||||
|
|
||||||
## OpenAI Proxy Usage
|
## OpenAI Proxy Usage
|
||||||
|
|
||||||
|
Here's how to use Vertex AI with the LiteLLM Proxy Server
|
||||||
|
|
||||||
1. Modify the config.yaml
|
1. Modify the config.yaml
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="completion_param" label="Different location per model">
|
||||||
|
|
||||||
|
Use this when you need to set a different location for each vertex model
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gemini-vision
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||||
|
vertex_project: "project-id"
|
||||||
|
vertex_location: "us-central1"
|
||||||
|
- model_name: gemini-vision
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||||
|
vertex_project: "project-id2"
|
||||||
|
vertex_location: "us-east"
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="litellm_param" label="One location all vertex models">
|
||||||
|
|
||||||
|
Use this when you have one vertex location for all models
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
vertex_project: "hardy-device-38811" # Your Project ID
|
vertex_project: "hardy-device-38811" # Your Project ID
|
||||||
|
@ -35,6 +66,10 @@ model_list:
|
||||||
model: gemini-pro
|
model: gemini-pro
|
||||||
```
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
2. Start the proxy
|
2. Start the proxy
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
|
||||||
"log_count": num_rows,
|
"log_count": num_rows,
|
||||||
}
|
}
|
||||||
return response_data
|
return response_data
|
||||||
|
|
||||||
|
|
||||||
|
def _create_clickhouse_material_views(client=None, table_names=[]):
|
||||||
|
# Create Materialized Views if they don't exist
|
||||||
|
# Materialized Views send new inserted rows to the aggregate tables
|
||||||
|
|
||||||
|
verbose_logger.debug("Clickhouse: Creating Materialized Views")
|
||||||
|
if "daily_aggregated_spend_per_model_mv" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
|
||||||
|
TO daily_aggregated_spend_per_model
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
toDate(startTime) as day,
|
||||||
|
sumState(spend) AS DailySpend,
|
||||||
|
model as model
|
||||||
|
FROM spend_logs
|
||||||
|
GROUP BY
|
||||||
|
day, model
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
|
||||||
|
)
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
|
||||||
|
TO daily_aggregated_spend_per_api_key
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
toDate(startTime) as day,
|
||||||
|
sumState(spend) AS DailySpend,
|
||||||
|
api_key as api_key
|
||||||
|
FROM spend_logs
|
||||||
|
GROUP BY
|
||||||
|
day, api_key
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_per_user_mv" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
|
||||||
|
TO daily_aggregated_spend_per_user
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
toDate(startTime) as day,
|
||||||
|
sumState(spend) AS DailySpend,
|
||||||
|
user as user
|
||||||
|
FROM spend_logs
|
||||||
|
GROUP BY
|
||||||
|
day, user
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_mv" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
|
||||||
|
TO daily_aggregated_spend
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
toDate(startTime) as day,
|
||||||
|
sumState(spend) AS DailySpend
|
||||||
|
FROM spend_logs
|
||||||
|
GROUP BY
|
||||||
|
day
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
||||||
|
# Basic Logging works without this - this is only used for low latency reporting apis
|
||||||
|
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
|
||||||
|
|
||||||
|
# Create Aggregeate Tables if they don't exist
|
||||||
|
if "daily_aggregated_spend_per_model" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE TABLE daily_aggregated_spend_per_model
|
||||||
|
(
|
||||||
|
`day` Date,
|
||||||
|
`DailySpend` AggregateFunction(sum, Float64),
|
||||||
|
`model` String
|
||||||
|
)
|
||||||
|
ENGINE = SummingMergeTree()
|
||||||
|
ORDER BY (day, model);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_per_api_key" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE TABLE daily_aggregated_spend_per_api_key
|
||||||
|
(
|
||||||
|
`day` Date,
|
||||||
|
`DailySpend` AggregateFunction(sum, Float64),
|
||||||
|
`api_key` String
|
||||||
|
)
|
||||||
|
ENGINE = SummingMergeTree()
|
||||||
|
ORDER BY (day, api_key);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend_per_user" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE TABLE daily_aggregated_spend_per_user
|
||||||
|
(
|
||||||
|
`day` Date,
|
||||||
|
`DailySpend` AggregateFunction(sum, Float64),
|
||||||
|
`user` String
|
||||||
|
)
|
||||||
|
ENGINE = SummingMergeTree()
|
||||||
|
ORDER BY (day, user);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if "daily_aggregated_spend" not in table_names:
|
||||||
|
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
|
||||||
|
client.command(
|
||||||
|
"""
|
||||||
|
CREATE TABLE daily_aggregated_spend
|
||||||
|
(
|
||||||
|
`day` Date,
|
||||||
|
`DailySpend` AggregateFunction(sum, Float64),
|
||||||
|
)
|
||||||
|
ENGINE = SummingMergeTree()
|
||||||
|
ORDER BY (day);
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
|
@ -86,6 +86,14 @@ def _start_clickhouse():
|
||||||
response = client.query("DESCRIBE default.spend_logs")
|
response = client.query("DESCRIBE default.spend_logs")
|
||||||
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
||||||
|
|
||||||
|
# RUN Enterprise Clickhouse Setup
|
||||||
|
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
|
||||||
|
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
|
||||||
|
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
|
||||||
|
|
||||||
|
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
|
||||||
|
_create_clickhouse_material_views(client=client, table_names=table_names)
|
||||||
|
|
||||||
|
|
||||||
class ClickhouseLogger:
|
class ClickhouseLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
|
|
|
@ -278,7 +278,11 @@ def completion(
|
||||||
import google.auth
|
import google.auth
|
||||||
|
|
||||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||||
|
print_verbose(
|
||||||
|
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
|
||||||
|
)
|
||||||
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
||||||
|
print_verbose(f"VERTEX AI: creds={creds}")
|
||||||
vertexai.init(
|
vertexai.init(
|
||||||
project=vertex_project, location=vertex_location, credentials=creds
|
project=vertex_project, location=vertex_location, credentials=creds
|
||||||
)
|
)
|
||||||
|
|
|
@ -687,6 +687,15 @@
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.5-pro-preview-0215": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 1000000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"gemini-pro-vision": {
|
"gemini-pro-vision": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_output_tokens": 2048,
|
"max_output_tokens": 2048,
|
||||||
|
@ -706,6 +715,17 @@
|
||||||
"litellm_provider": "vertex_ai-vision-models",
|
"litellm_provider": "vertex_ai-vision-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.0-pro-vision-001": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"max_images_per_prompt": 16,
|
||||||
|
"max_videos_per_prompt": 1,
|
||||||
|
"max_video_length": 2,
|
||||||
|
"input_cost_per_token": 0.00000025,
|
||||||
|
"output_cost_per_token": 0.0000005,
|
||||||
|
"litellm_provider": "vertex_ai-vision-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"gemini-1.5-pro-vision": {
|
"gemini-1.5-pro-vision": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
|
|
|
@ -43,7 +43,7 @@ model_list:
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
||||||
success_callback: ['langfuse']
|
success_callback: ['clickhouse', 'langfuse']
|
||||||
# setting callback class
|
# setting callback class
|
||||||
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||||
|
|
||||||
|
|
|
@ -4357,6 +4357,7 @@ def get_optional_params(
|
||||||
or model in litellm.vertex_code_text_models
|
or model in litellm.vertex_code_text_models
|
||||||
or model in litellm.vertex_language_models
|
or model in litellm.vertex_language_models
|
||||||
or model in litellm.vertex_embedding_models
|
or model in litellm.vertex_embedding_models
|
||||||
|
or model in litellm.vertex_vision_models
|
||||||
):
|
):
|
||||||
print_verbose(f"(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK")
|
print_verbose(f"(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK")
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
|
|
|
@ -687,6 +687,15 @@
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.5-pro-preview-0215": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 1000000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"gemini-pro-vision": {
|
"gemini-pro-vision": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_output_tokens": 2048,
|
"max_output_tokens": 2048,
|
||||||
|
@ -706,6 +715,17 @@
|
||||||
"litellm_provider": "vertex_ai-vision-models",
|
"litellm_provider": "vertex_ai-vision-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.0-pro-vision-001": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"max_images_per_prompt": 16,
|
||||||
|
"max_videos_per_prompt": 1,
|
||||||
|
"max_video_length": 2,
|
||||||
|
"input_cost_per_token": 0.00000025,
|
||||||
|
"output_cost_per_token": 0.0000005,
|
||||||
|
"litellm_provider": "vertex_ai-vision-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"gemini-1.5-pro-vision": {
|
"gemini-1.5-pro-vision": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.27.12.dev1"
|
version = "1.27.12"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.27.12.dev1"
|
version = "1.27.12"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue