Merge branch 'main' into litellm_streaming_format_fix

2025-04-26 11:14:04 +00:00 · 2024-02-27 20:16:09 -08:00 · 2024-02-27 20:16:09 -08:00 · d34cd7ec9a
commit d34cd7ec9a
parent 94f4f96994 89af1fbe47
10 changed files with 227 additions and 3 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -130,6 +130,7 @@ jobs:
            pip install "langfuse>=2.0.0"
            pip install numpydoc
            pip install prisma            
+            pip install fastapi            
            pip install "httpx==0.24.1"
            pip install "gunicorn==21.2.0"
            pip install "anyio==3.7.1"
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -1,3 +1,6 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # VertexAI - Google [Gemini, Model Garden]

 <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co

 ## OpenAI Proxy Usage 

+Here's how to use Vertex AI with the LiteLLM Proxy Server
+
 1. Modify the config.yaml 

+<Tabs>
+
+<TabItem value="completion_param" label="Different location per model">
+
+Use this when you need to set a different location for each vertex model
+
+```yaml
+model_list:
+  - model_name: gemini-vision
+    litellm_params:
+      model: vertex_ai/gemini-1.0-pro-vision-001
+      vertex_project: "project-id"
+      vertex_location: "us-central1"
+  - model_name: gemini-vision
+    litellm_params:
+      model: vertex_ai/gemini-1.0-pro-vision-001
+      vertex_project: "project-id2"
+      vertex_location: "us-east"
+```
+
+</TabItem>
+
+<TabItem value="litellm_param" label="One location all vertex models">
+
+Use this when you have one vertex location for all models
+
 ```yaml
 litellm_settings: 
  vertex_project: "hardy-device-38811" # Your Project ID
@ -35,6 +66,10 @@ model_list:
     model: gemini-pro
 ```

+</TabItem>
+
+</Tabs>
+
 2. Start the proxy 

 ```bash
--- a/enterprise/utils.py
+++ b/enterprise/utils.py
@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
            "log_count": num_rows,
        }
        return response_data
+
+
+def _create_clickhouse_material_views(client=None, table_names=[]):
+    # Create Materialized Views if they don't exist
+    # Materialized Views send new inserted rows to the aggregate tables
+
+    verbose_logger.debug("Clickhouse: Creating Materialized Views")
+    if "daily_aggregated_spend_per_model_mv" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
+        client.command(
+            """
+            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
+            TO daily_aggregated_spend_per_model
+            AS
+            SELECT
+                toDate(startTime) as day,
+                sumState(spend) AS DailySpend,
+                model as model
+            FROM spend_logs
+            GROUP BY
+                day, model
+            """
+        )
+    if "daily_aggregated_spend_per_api_key_mv" not in table_names:
+        verbose_logger.debug(
+            "Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
+        )
+        client.command(
+            """
+            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
+            TO daily_aggregated_spend_per_api_key
+            AS
+            SELECT
+                toDate(startTime) as day,
+                sumState(spend) AS DailySpend,
+                api_key as api_key
+            FROM spend_logs
+            GROUP BY
+                day, api_key
+            """
+        )
+    if "daily_aggregated_spend_per_user_mv" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
+        client.command(
+            """
+            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
+            TO daily_aggregated_spend_per_user
+            AS
+            SELECT
+                toDate(startTime) as day,
+                sumState(spend) AS DailySpend,
+                user as user
+            FROM spend_logs
+            GROUP BY
+                day, user
+            """
+        )
+    if "daily_aggregated_spend_mv" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
+        client.command(
+            """
+            CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
+            TO daily_aggregated_spend
+            AS
+            SELECT
+                toDate(startTime) as day,
+                sumState(spend) AS DailySpend
+            FROM spend_logs
+            GROUP BY
+                day
+            """
+        )
+
+
+def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
+    # Basic Logging works without this - this is only used for low latency reporting apis
+    verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
+
+    # Create Aggregeate Tables if they don't exist
+    if "daily_aggregated_spend_per_model" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
+        client.command(
+            """
+            CREATE TABLE daily_aggregated_spend_per_model
+            (
+                `day` Date,
+                `DailySpend` AggregateFunction(sum, Float64),
+                `model` String
+            )
+            ENGINE = SummingMergeTree()
+            ORDER BY (day, model);
+            """
+        )
+    if "daily_aggregated_spend_per_api_key" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
+        client.command(
+            """
+            CREATE TABLE daily_aggregated_spend_per_api_key
+            (
+                `day` Date,
+                `DailySpend` AggregateFunction(sum, Float64),
+                `api_key` String
+            )
+            ENGINE = SummingMergeTree()
+            ORDER BY (day, api_key);
+            """
+        )
+    if "daily_aggregated_spend_per_user" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
+        client.command(
+            """
+            CREATE TABLE daily_aggregated_spend_per_user
+            (
+                `day` Date,
+                `DailySpend` AggregateFunction(sum, Float64),
+                `user` String
+            )
+            ENGINE = SummingMergeTree()
+            ORDER BY (day, user);
+            """
+        )
+    if "daily_aggregated_spend" not in table_names:
+        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
+        client.command(
+            """
+            CREATE TABLE daily_aggregated_spend
+            (
+                `day` Date,
+                `DailySpend` AggregateFunction(sum, Float64),
+            )
+            ENGINE = SummingMergeTree()
+            ORDER BY (day);
+            """
+        )
+    return
--- a/litellm/integrations/clickhouse.py
+++ b/litellm/integrations/clickhouse.py
@ -86,6 +86,14 @@ def _start_clickhouse():
            response = client.query("DESCRIBE default.spend_logs")
            verbose_logger.debug(f"spend logs schema ={response.result_rows}")

+        # RUN Enterprise Clickhouse Setup
+        # TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
+        from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
+        from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
+
+        _create_clickhouse_aggregate_tables(client=client, table_names=table_names)
+        _create_clickhouse_material_views(client=client, table_names=table_names)
+

 class ClickhouseLogger:
    # Class variables or attributes
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -278,7 +278,11 @@ def completion(
        import google.auth

        ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
+        print_verbose(
+            f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
+        )
        creds, _ = google.auth.default(quota_project_id=vertex_project)
+        print_verbose(f"VERTEX AI: creds={creds}")
        vertexai.init(
            project=vertex_project, location=vertex_location, credentials=creds
        )
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -687,6 +687,15 @@
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat"
    },
+    "gemini-1.5-pro-preview-0215": { 
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0, 
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat"
+    },
    "gemini-pro-vision": {
        "max_tokens": 16384,
        "max_output_tokens": 2048,
@ -706,6 +715,17 @@
        "litellm_provider": "vertex_ai-vision-models",
        "mode": "chat"
    },
+    "gemini-1.0-pro-vision-001": {
+        "max_tokens": 16384,
+        "max_output_tokens": 2048,
+        "max_images_per_prompt": 16,
+        "max_videos_per_prompt": 1,
+        "max_video_length": 2,
+        "input_cost_per_token": 0.00000025, 
+        "output_cost_per_token": 0.0000005,
+        "litellm_provider": "vertex_ai-vision-models",
+        "mode": "chat"
+    },
    "gemini-1.5-pro-vision": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -43,7 +43,7 @@ model_list:
      api_key: os.environ/OPENAI_API_KEY
 litellm_settings:
  fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
-  success_callback: ['langfuse']
+  success_callback: ['clickhouse', 'langfuse']
  # setting callback class
  # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]

--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -4357,6 +4357,7 @@ def get_optional_params(
        or model in litellm.vertex_code_text_models
        or model in litellm.vertex_language_models
        or model in litellm.vertex_embedding_models
+        or model in litellm.vertex_vision_models
    ):
        print_verbose(f"(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK")
        ## check if unsupported param passed in
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -687,6 +687,15 @@
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat"
    },
+    "gemini-1.5-pro-preview-0215": { 
+        "max_tokens": 8192,
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0, 
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat"
+    },
    "gemini-pro-vision": {
        "max_tokens": 16384,
        "max_output_tokens": 2048,
@ -706,6 +715,17 @@
        "litellm_provider": "vertex_ai-vision-models",
        "mode": "chat"
    },
+    "gemini-1.0-pro-vision-001": {
+        "max_tokens": 16384,
+        "max_output_tokens": 2048,
+        "max_images_per_prompt": 16,
+        "max_videos_per_prompt": 1,
+        "max_video_length": 2,
+        "input_cost_per_token": 0.00000025, 
+        "output_cost_per_token": 0.0000005,
+        "litellm_provider": "vertex_ai-vision-models",
+        "mode": "chat"
+    },
    "gemini-1.5-pro-vision": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.27.12.dev1"
+version = "1.27.12"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.27.12.dev1"
+version = "1.27.12"
 version_files = [
    "pyproject.toml:^version"
 ]