Merge branch 'main' into litellm_streaming_format_fix

This commit is contained in:
Krish Dholakia 2024-02-27 20:16:09 -08:00 committed by GitHub
commit d34cd7ec9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 227 additions and 3 deletions

View file

@ -130,6 +130,7 @@ jobs:
pip install "langfuse>=2.0.0" pip install "langfuse>=2.0.0"
pip install numpydoc pip install numpydoc
pip install prisma pip install prisma
pip install fastapi
pip install "httpx==0.24.1" pip install "httpx==0.24.1"
pip install "gunicorn==21.2.0" pip install "gunicorn==21.2.0"
pip install "anyio==3.7.1" pip install "anyio==3.7.1"

View file

@ -1,3 +1,6 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# VertexAI - Google [Gemini, Model Garden] # VertexAI - Google [Gemini, Model Garden]
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb"> <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co
## OpenAI Proxy Usage ## OpenAI Proxy Usage
Here's how to use Vertex AI with the LiteLLM Proxy Server
1. Modify the config.yaml 1. Modify the config.yaml
<Tabs>
<TabItem value="completion_param" label="Different location per model">
Use this when you need to set a different location for each vertex model
```yaml
model_list:
- model_name: gemini-vision
litellm_params:
model: vertex_ai/gemini-1.0-pro-vision-001
vertex_project: "project-id"
vertex_location: "us-central1"
- model_name: gemini-vision
litellm_params:
model: vertex_ai/gemini-1.0-pro-vision-001
vertex_project: "project-id2"
vertex_location: "us-east"
```
</TabItem>
<TabItem value="litellm_param" label="One location all vertex models">
Use this when you have one vertex location for all models
```yaml ```yaml
litellm_settings: litellm_settings:
vertex_project: "hardy-device-38811" # Your Project ID vertex_project: "hardy-device-38811" # Your Project ID
@ -35,6 +66,10 @@ model_list:
model: gemini-pro model: gemini-pro
``` ```
</TabItem>
</Tabs>
2. Start the proxy 2. Start the proxy
```bash ```bash

View file

@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
"log_count": num_rows, "log_count": num_rows,
} }
return response_data return response_data
def _create_clickhouse_material_views(client=None, table_names=[]):
# Create Materialized Views if they don't exist
# Materialized Views send new inserted rows to the aggregate tables
verbose_logger.debug("Clickhouse: Creating Materialized Views")
if "daily_aggregated_spend_per_model_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
TO daily_aggregated_spend_per_model
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
model as model
FROM spend_logs
GROUP BY
day, model
"""
)
if "daily_aggregated_spend_per_api_key_mv" not in table_names:
verbose_logger.debug(
"Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
)
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
TO daily_aggregated_spend_per_api_key
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
api_key as api_key
FROM spend_logs
GROUP BY
day, api_key
"""
)
if "daily_aggregated_spend_per_user_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
TO daily_aggregated_spend_per_user
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend,
user as user
FROM spend_logs
GROUP BY
day, user
"""
)
if "daily_aggregated_spend_mv" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
client.command(
"""
CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
TO daily_aggregated_spend
AS
SELECT
toDate(startTime) as day,
sumState(spend) AS DailySpend
FROM spend_logs
GROUP BY
day
"""
)
def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
# Basic Logging works without this - this is only used for low latency reporting apis
verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
# Create Aggregeate Tables if they don't exist
if "daily_aggregated_spend_per_model" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_model
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`model` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, model);
"""
)
if "daily_aggregated_spend_per_api_key" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_api_key
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`api_key` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, api_key);
"""
)
if "daily_aggregated_spend_per_user" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
client.command(
"""
CREATE TABLE daily_aggregated_spend_per_user
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
`user` String
)
ENGINE = SummingMergeTree()
ORDER BY (day, user);
"""
)
if "daily_aggregated_spend" not in table_names:
verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
client.command(
"""
CREATE TABLE daily_aggregated_spend
(
`day` Date,
`DailySpend` AggregateFunction(sum, Float64),
)
ENGINE = SummingMergeTree()
ORDER BY (day);
"""
)
return

View file

@ -86,6 +86,14 @@ def _start_clickhouse():
response = client.query("DESCRIBE default.spend_logs") response = client.query("DESCRIBE default.spend_logs")
verbose_logger.debug(f"spend logs schema ={response.result_rows}") verbose_logger.debug(f"spend logs schema ={response.result_rows}")
# RUN Enterprise Clickhouse Setup
# TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
_create_clickhouse_aggregate_tables(client=client, table_names=table_names)
_create_clickhouse_material_views(client=client, table_names=table_names)
class ClickhouseLogger: class ClickhouseLogger:
# Class variables or attributes # Class variables or attributes

View file

@ -278,7 +278,11 @@ def completion(
import google.auth import google.auth
## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744 ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
print_verbose(
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
)
creds, _ = google.auth.default(quota_project_id=vertex_project) creds, _ = google.auth.default(quota_project_id=vertex_project)
print_verbose(f"VERTEX AI: creds={creds}")
vertexai.init( vertexai.init(
project=vertex_project, location=vertex_location, credentials=creds project=vertex_project, location=vertex_location, credentials=creds
) )

View file

@ -687,6 +687,15 @@
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat" "mode": "chat"
}, },
"gemini-1.5-pro-preview-0215": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat"
},
"gemini-pro-vision": { "gemini-pro-vision": {
"max_tokens": 16384, "max_tokens": 16384,
"max_output_tokens": 2048, "max_output_tokens": 2048,
@ -706,6 +715,17 @@
"litellm_provider": "vertex_ai-vision-models", "litellm_provider": "vertex_ai-vision-models",
"mode": "chat" "mode": "chat"
}, },
"gemini-1.0-pro-vision-001": {
"max_tokens": 16384,
"max_output_tokens": 2048,
"max_images_per_prompt": 16,
"max_videos_per_prompt": 1,
"max_video_length": 2,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005,
"litellm_provider": "vertex_ai-vision-models",
"mode": "chat"
},
"gemini-1.5-pro-vision": { "gemini-1.5-pro-vision": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,

View file

@ -43,7 +43,7 @@ model_list:
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
litellm_settings: litellm_settings:
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
success_callback: ['langfuse'] success_callback: ['clickhouse', 'langfuse']
# setting callback class # setting callback class
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]

View file

@ -4357,6 +4357,7 @@ def get_optional_params(
or model in litellm.vertex_code_text_models or model in litellm.vertex_code_text_models
or model in litellm.vertex_language_models or model in litellm.vertex_language_models
or model in litellm.vertex_embedding_models or model in litellm.vertex_embedding_models
or model in litellm.vertex_vision_models
): ):
print_verbose(f"(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK") print_verbose(f"(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK")
## check if unsupported param passed in ## check if unsupported param passed in

View file

@ -687,6 +687,15 @@
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat" "mode": "chat"
}, },
"gemini-1.5-pro-preview-0215": {
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_token": 0,
"output_cost_per_token": 0,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat"
},
"gemini-pro-vision": { "gemini-pro-vision": {
"max_tokens": 16384, "max_tokens": 16384,
"max_output_tokens": 2048, "max_output_tokens": 2048,
@ -706,6 +715,17 @@
"litellm_provider": "vertex_ai-vision-models", "litellm_provider": "vertex_ai-vision-models",
"mode": "chat" "mode": "chat"
}, },
"gemini-1.0-pro-vision-001": {
"max_tokens": 16384,
"max_output_tokens": 2048,
"max_images_per_prompt": 16,
"max_videos_per_prompt": 1,
"max_video_length": 2,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.0000005,
"litellm_provider": "vertex_ai-vision-models",
"mode": "chat"
},
"gemini-1.5-pro-vision": { "gemini-1.5-pro-vision": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.27.12.dev1" version = "1.27.12"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.27.12.dev1" version = "1.27.12"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]