Merge branch 'main' into litellm_streaming_format_fix

2025-04-26 11:14:04 +00:00 · 2024-02-27 20:16:09 -08:00 · 2024-02-27 20:16:09 -08:00 · d34cd7ec9a
commit d34cd7ec9a
parent 94f4f96994 89af1fbe47
10 changed files with 227 additions and 3 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -130,6 +130,7 @@ jobs:
            pip install "langfuse>=2.0.0"
            pip install numpydoc
            pip install prisma            
            pip install fastapi            
            pip install "httpx==0.24.1"
            pip install "gunicorn==21.2.0"
            pip install "anyio==3.7.1"
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -1,3 +1,6 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # VertexAI - Google [Gemini, Model Garden]
 <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_VertextAI_Example.ipynb">
@ -22,8 +25,36 @@ response = litellm.completion(model="gemini-pro", messages=[{"role": "user", "co
 ## OpenAI Proxy Usage 
 Here's how to use Vertex AI with the LiteLLM Proxy Server
 1. Modify the config.yaml 
 <Tabs>
 <TabItem value="completion_param" label="Different location per model">
 Use this when you need to set a different location for each vertex model
 ```yaml
 model_list:
  - model_name: gemini-vision
    litellm_params:
      model: vertex_ai/gemini-1.0-pro-vision-001
      vertex_project: "project-id"
      vertex_location: "us-central1"
  - model_name: gemini-vision
    litellm_params:
      model: vertex_ai/gemini-1.0-pro-vision-001
      vertex_project: "project-id2"
      vertex_location: "us-east"
 ```
 </TabItem>
 <TabItem value="litellm_param" label="One location all vertex models">
 Use this when you have one vertex location for all models
 ```yaml
 litellm_settings: 
  vertex_project: "hardy-device-38811" # Your Project ID
@ -35,6 +66,10 @@ model_list:
     model: gemini-pro
 ```
 </TabItem>
 </Tabs>
 2. Start the proxy 
 ```bash
--- a/enterprise/utils.py
+++ b/enterprise/utils.py
@ -110,3 +110,138 @@ async def view_spend_logs_from_clickhouse(
            "log_count": num_rows,
        }
        return response_data
 def _create_clickhouse_material_views(client=None, table_names=[]):
    # Create Materialized Views if they don't exist
    # Materialized Views send new inserted rows to the aggregate tables
    verbose_logger.debug("Clickhouse: Creating Materialized Views")
    if "daily_aggregated_spend_per_model_mv" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model_mv")
        client.command(
            """
            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_model_mv
            TO daily_aggregated_spend_per_model
            AS
            SELECT
                toDate(startTime) as day,
                sumState(spend) AS DailySpend,
                model as model
            FROM spend_logs
            GROUP BY
                day, model
            """
        )
    if "daily_aggregated_spend_per_api_key_mv" not in table_names:
        verbose_logger.debug(
            "Clickhouse: Creating daily_aggregated_spend_per_api_key_mv"
        )
        client.command(
            """
            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_api_key_mv
            TO daily_aggregated_spend_per_api_key
            AS
            SELECT
                toDate(startTime) as day,
                sumState(spend) AS DailySpend,
                api_key as api_key
            FROM spend_logs
            GROUP BY
                day, api_key
            """
        )
    if "daily_aggregated_spend_per_user_mv" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user_mv")
        client.command(
            """
            CREATE MATERIALIZED VIEW daily_aggregated_spend_per_user_mv
            TO daily_aggregated_spend_per_user
            AS
            SELECT
                toDate(startTime) as day,
                sumState(spend) AS DailySpend,
                user as user
            FROM spend_logs
            GROUP BY
                day, user
            """
        )
    if "daily_aggregated_spend_mv" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_mv")
        client.command(
            """
            CREATE MATERIALIZED VIEW daily_aggregated_spend_mv
            TO daily_aggregated_spend
            AS
            SELECT
                toDate(startTime) as day,
                sumState(spend) AS DailySpend
            FROM spend_logs
            GROUP BY
                day
            """
        )
 def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
    # Basic Logging works without this - this is only used for low latency reporting apis
    verbose_logger.debug("Clickhouse: Creating Aggregate Tables")
    # Create Aggregeate Tables if they don't exist
    if "daily_aggregated_spend_per_model" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_model")
        client.command(
            """
            CREATE TABLE daily_aggregated_spend_per_model
            (
                `day` Date,
                `DailySpend` AggregateFunction(sum, Float64),
                `model` String
            )
            ENGINE = SummingMergeTree()
            ORDER BY (day, model);
            """
        )
    if "daily_aggregated_spend_per_api_key" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_api_key")
        client.command(
            """
            CREATE TABLE daily_aggregated_spend_per_api_key
            (
                `day` Date,
                `DailySpend` AggregateFunction(sum, Float64),
                `api_key` String
            )
            ENGINE = SummingMergeTree()
            ORDER BY (day, api_key);
            """
        )
    if "daily_aggregated_spend_per_user" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend_per_user")
        client.command(
            """
            CREATE TABLE daily_aggregated_spend_per_user
            (
                `day` Date,
                `DailySpend` AggregateFunction(sum, Float64),
                `user` String
            )
            ENGINE = SummingMergeTree()
            ORDER BY (day, user);
            """
        )
    if "daily_aggregated_spend" not in table_names:
        verbose_logger.debug("Clickhouse: Creating daily_aggregated_spend")
        client.command(
            """
            CREATE TABLE daily_aggregated_spend
            (
                `day` Date,
                `DailySpend` AggregateFunction(sum, Float64),
            )
            ENGINE = SummingMergeTree()
            ORDER BY (day);
            """
        )
    return
--- a/litellm/integrations/clickhouse.py
+++ b/litellm/integrations/clickhouse.py
@ -86,6 +86,14 @@ def _start_clickhouse():
            response = client.query("DESCRIBE default.spend_logs")
            verbose_logger.debug(f"spend logs schema ={response.result_rows}")
        # RUN Enterprise Clickhouse Setup
        # TLDR: For Enterprise - we create views / aggregate tables for low latency reporting APIs
        from litellm.proxy.enterprise.utils import _create_clickhouse_aggregate_tables
        from litellm.proxy.enterprise.utils import _create_clickhouse_material_views
        _create_clickhouse_aggregate_tables(client=client, table_names=table_names)
        _create_clickhouse_material_views(client=client, table_names=table_names)
 class ClickhouseLogger:
    # Class variables or attributes
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -278,7 +278,11 @@ def completion(
        import google.auth
        ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
        print_verbose(
            f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
        )
        creds, _ = google.auth.default(quota_project_id=vertex_project)
        print_verbose(f"VERTEX AI: creds={creds}")
        vertexai.init(
            project=vertex_project, location=vertex_location, credentials=creds
        )
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -687,6 +687,15 @@
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat"
    },
    "gemini-1.5-pro-preview-0215": { 
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0, 
        "output_cost_per_token": 0,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat"
    },
    "gemini-pro-vision": {
        "max_tokens": 16384,
        "max_output_tokens": 2048,
@ -706,6 +715,17 @@
        "litellm_provider": "vertex_ai-vision-models",
        "mode": "chat"
    },
    "gemini-1.0-pro-vision-001": {
        "max_tokens": 16384,
        "max_output_tokens": 2048,
        "max_images_per_prompt": 16,
        "max_videos_per_prompt": 1,
        "max_video_length": 2,
        "input_cost_per_token": 0.00000025, 
        "output_cost_per_token": 0.0000005,
        "litellm_provider": "vertex_ai-vision-models",
        "mode": "chat"
    },
    "gemini-1.5-pro-vision": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -43,7 +43,7 @@ model_list:
      api_key: os.environ/OPENAI_API_KEY
 litellm_settings:
  fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
-  success_callback: ['langfuse']
+  success_callback: ['clickhouse', 'langfuse']
  # setting callback class
  # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -4357,6 +4357,7 @@ def get_optional_params(
        or model in litellm.vertex_code_text_models
        or model in litellm.vertex_language_models
        or model in litellm.vertex_embedding_models
        or model in litellm.vertex_vision_models
    ):
        print_verbose(f"(start) INSIDE THE VERTEX AI OPTIONAL PARAM BLOCK")
        ## check if unsupported param passed in
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -687,6 +687,15 @@
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat"
    },
    "gemini-1.5-pro-preview-0215": { 
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
        "max_output_tokens": 8192,
        "input_cost_per_token": 0, 
        "output_cost_per_token": 0,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat"
    },
    "gemini-pro-vision": {
        "max_tokens": 16384,
        "max_output_tokens": 2048,
@ -706,6 +715,17 @@
        "litellm_provider": "vertex_ai-vision-models",
        "mode": "chat"
    },
    "gemini-1.0-pro-vision-001": {
        "max_tokens": 16384,
        "max_output_tokens": 2048,
        "max_images_per_prompt": 16,
        "max_videos_per_prompt": 1,
        "max_video_length": 2,
        "input_cost_per_token": 0.00000025, 
        "output_cost_per_token": 0.0000005,
        "litellm_provider": "vertex_ai-vision-models",
        "mode": "chat"
    },
    "gemini-1.5-pro-vision": {
        "max_tokens": 8192,
        "max_input_tokens": 1000000,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.27.12.dev1"
+version = "1.27.12"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 [tool.commitizen]
-version = "1.27.12.dev1"
+version = "1.27.12"
 version_files = [
    "pyproject.toml:^version"
 ]