mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge branch 'main' into litellm_streaming_format_fix
This commit is contained in:
commit
d34cd7ec9a
10 changed files with 227 additions and 3 deletions
|
@ -130,6 +130,7 @@ jobs:
|
|||
pip install "langfuse>=2.0.0"
|
||||
pip install numpydoc
|
||||
pip install prisma
|
||||
pip install fastapi
|
||||
pip install "httpx==0.24.1"
|
||||
pip install "gunicorn==21.2.0"
|
||||
pip install "anyio==3.7.1"
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# VertexAI - Google [Gemini, Model Garden]
|
||||
|
||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
|
||||
|
@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co
|
|||
|
||||
## OpenAI Proxy Usage
|
||||
|
||||
Here's how to use Vertex AI with the LiteLLM Proxy Server
|
||||
|
||||
1. Modify the config.yaml
|
||||
|
||||
<Tabs>
|
||||
|
||||
<TabItem value="completion_param" label="Different location per model">
|
||||
|
||||
Use this when you need to set a different location for each vertex model
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||
vertex_project: "project-id"
|
||||
vertex_location: "us-central1"
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||
vertex_project: "project-id2"
|
||||
vertex_location: "us-east"
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="litellm_param" label="One location all vertex models">
|
||||
|
||||
Use this when you have one vertex location for all models
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
vertex_project: "hardy-device-38811" # Your Project ID
|
||||
|
@ -35,6 +66,10 @@ model_list:
|
|||
model: gemini-pro
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
2. Start the proxy
|
||||
|
||||
```bash
|
||||
|
|
|
@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
|
|||
"log_count": num_rows,
|
||||
}
|
||||
return response_data
|
||||
|
||||
|
||||
def _create_clickhouse_material_views(client=None, table_names=[]):
|
||||
# Create Materialized Views if they don't exist
|
||||
# Materialized Views send new inserted rows to the aggregate tables
|
||||
|
||||
verbose_logger.debug("Clickhouse: Creating Materialized Views")
|
||||
if "daily_aggregated_spend_per_model_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
|
||||
TO daily_aggregated_spend_per_model
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
model as model
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, model
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
|
||||
verbose_logger.debug(
|
||||
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
|
||||
)
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
|
||||
TO daily_aggregated_spend_per_api_key
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
api_key as api_key
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, api_key
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_user_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
|
||||
TO daily_aggregated_spend_per_user
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend,
|
||||
user as user
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day, user
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_mv" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
|
||||
client.command(
|
||||
"""
|
||||
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
|
||||
TO daily_aggregated_spend
|
||||
AS
|
||||
SELECT
|
||||
toDate(startTime) as day,
|
||||
sumState(spend) AS DailySpend
|
||||
FROM spend_logs
|
||||
GROUP BY
|
||||
day
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
||||
# Basic Logging works without this - this is only used for low latency reporting apis
|
||||
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
|
||||
|
||||
# Create Aggregeate Tables if they don't exist
|
||||
if "daily_aggregated_spend_per_model" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_model
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`model` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, model);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_api_key" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_api_key
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`api_key` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, api_key);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend_per_user" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend_per_user
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
`user` String
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day, user);
|
||||
"""
|
||||
)
|
||||
if "daily_aggregated_spend" not in table_names:
|
||||
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
|
||||
client.command(
|
||||
"""
|
||||
CREATE TABLE daily_aggregated_spend
|
||||
(
|
||||
`day` Date,
|
||||
`DailySpend` AggregateFunction(sum, Float64),
|
||||
)
|
||||
ENGINE = SummingMergeTree()
|
||||
ORDER BY (day);
|
||||
"""
|
||||
)
|
||||
return
|
||||
|
|
|
@ -86,6 +86,14 @@ def _start_clickhouse():
|
|||
response = client.query("DESCRIBE default.spend_logs")
|
||||
verbose_logger.debug(f"spend logs schema ={response.result_rows}")
|
||||
|
||||
# RUN Enterprise Clickhouse Setup
|
||||
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
|
||||
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
|
||||
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
|
||||
|
||||
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
|
||||
_create_clickhouse_material_views(client=client, table_names=table_names)
|
||||
|
||||
|
||||
class ClickhouseLogger:
|
||||
# Class variables or attributes
|
||||
|
|
|
@ -278,7 +278,11 @@ def completion(
|
|||
import google.auth
|
||||
|
||||
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
|
||||
print_verbose(
|
||||
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
|
||||
)
|
||||
creds, _ = google.auth.default(quota_project_id=vertex_project)
|
||||
print_verbose(f"VERTEX AI: creds={creds}")
|
||||
vertexai.init(
|
||||
project=vertex_project, location=vertex_location, credentials=creds
|
||||
)
|
||||
|
|
|
@ -687,6 +687,15 @@
|
|||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.5-pro-preview-0215": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro-vision": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
|
@ -706,6 +715,17 @@
|
|||
"litellm_provider": "vertex_ai-vision-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.0-pro-vision-001": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
"max_images_per_prompt": 16,
|
||||
"max_videos_per_prompt": 1,
|
||||
"max_video_length": 2,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-vision-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.5-pro-vision": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
|
|
|
@ -43,7 +43,7 @@ model_list:
|
|||
api_key: os.environ/OPENAI_API_KEY
|
||||
litellm_settings:
|
||||
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
||||
success_callback: ['langfuse']
|
||||
success_callback: ['clickhouse', 'langfuse']
|
||||
# setting callback class
|
||||
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||
|
||||
|
|
|
@ -4357,6 +4357,7 @@ def get_optional_params(
|
|||
or model in litellm.vertex_code_text_models
|
||||
or model in litellm.vertex_language_models
|
||||
or model in litellm.vertex_embedding_models
|
||||
or model in litellm.vertex_vision_models
|
||||
):
|
||||
print_verbose(f"(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK")
|
||||
## check if unsupported param passed in
|
||||
|
|
|
@ -687,6 +687,15 @@
|
|||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.5-pro-preview-0215": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro-vision": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
|
@ -706,6 +715,17 @@
|
|||
"litellm_provider": "vertex_ai-vision-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.0-pro-vision-001": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
"max_images_per_prompt": 16,
|
||||
"max_videos_per_prompt": 1,
|
||||
"max_video_length": 2,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-vision-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.5-pro-vision": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.27.12.dev1"
|
||||
version = "1.27.12"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.27.12.dev1"
|
||||
version = "1.27.12"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue