Litellm dev 11 20 2024 (#6838)

* feat(customer_endpoints.py): support passing budget duration via `/customer/new` endpoint Closes https://github.com/BerriAI/litellm/issues/5651 * docs: add missing params to swagger + api documentation test * docs: add documentation for all key endpoints documents all params on swagger * docs(internal_user_endpoints.py): document all /user/new params Ensures all params are documented * docs(team_endpoints.py): add missing documentation for team endpoints Ensures 100% param documentation on swagger * docs(organization_endpoints.py): document all org params Adds documentation for all params in org endpoint * docs(customer_endpoints.py): add coverage for all params on /customer endpoints ensures all /customer/* params are documented * ci(config.yml): add endpoint doc testing to ci/cd * fix: fix internal_user_endpoints.py * fix(internal_user_endpoints.py): support 'duration' param * fix(partner_models/main.py): fix anthropic re-raise exception on vertex * fix: fix pydantic obj * build(model_prices_and_context_window.json): add new vertex claude model names vertex claude changed model names - causes cost tracking errors
2024-11-21 05:20:37 +05:30 · 2024-11-21 05:20:37 +05:30 · b11bc0374e
commit b11bc0374e
parent 0b0253f7ad
6 changed files with 148 additions and 8 deletions
--- a/docs/my-website/docs/observability/opentelemetry_integration.md
+++ b/docs/my-website/docs/observability/opentelemetry_integration.md
@ -86,3 +86,20 @@ Be aware that if you are continuing an existing trace, and you set `update_trace
 ## Support

 For any question or issue with the integration you can reach out to the OpenLLMetry maintainers on [Slack](https://traceloop.com/slack) or via [email](mailto:dev@traceloop.com).
+
+## Troubleshooting
+
+### Trace LiteLLM Proxy user/key/org/team information on failed requests
+
+LiteLLM emits the user_api_key_metadata
+- key hash
+- key_alias
+- org_id
+- user_id
+- team_id
+
+for successful + failed requests 
+
+click under `litellm_request` in the trace
+
+<Image img={require('../../img/otel_debug_trace.png')} />
--- a/docs/my-website/img/otel_debug_trace.png
+++ b/docs/my-website/img/otel_debug_trace.png
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -2827,6 +2827,18 @@
        "mode": "chat",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
+    "vertex_ai/claude-3-sonnet": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-sonnet@20240229": {
        "max_tokens": 4096,
        "max_input_tokens": 200000,
@ -2839,6 +2851,18 @@
        "supports_vision": true,
        "supports_assistant_prefill": true
    },
+    "vertex_ai/claude-3-5-sonnet": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-5-sonnet@20240620": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
@ -2851,6 +2875,18 @@
        "supports_vision": true,
        "supports_assistant_prefill": true
    },
+    "vertex_ai/claude-3-5-sonnet-v2": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-5-sonnet-v2@20241022": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
@ -2863,10 +2899,10 @@
        "supports_vision": true,
        "supports_assistant_prefill": true
    },
-    "vertex_ai/claude-3-5-haiku@20241022": {
-        "max_tokens": 8192, 
+    "vertex_ai/claude-3-haiku": {
+        "max_tokens": 4096, 
        "max_input_tokens": 200000,
-        "max_output_tokens": 8192,
+        "max_output_tokens": 4096,
        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.00000125,
        "litellm_provider": "vertex_ai-anthropic_models",
@ -2887,6 +2923,17 @@
        "supports_vision": true,
        "supports_assistant_prefill": true
    },
+    "vertex_ai/claude-3-5-haiku": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000001,
+        "output_cost_per_token": 0.000005,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-5-haiku@20241022": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
@ -2898,6 +2945,18 @@
        "supports_function_calling": true,
        "supports_assistant_prefill": true
    },
+    "vertex_ai/claude-3-opus": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000075,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-opus@20240229": {
        "max_tokens": 4096,
        "max_input_tokens": 200000,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -6,6 +6,11 @@ model_list:
  - model_name: rerank-model
    litellm_params:
      model: jina_ai/jina-reranker-v2-base-multilingual
+  - model_name: anthropic-vertex
+    litellm_params:
+      model: vertex_ai/claude-3-5-sonnet-v2
+      vertex_ai_project: "adroit-crow-413218"
+      vertex_ai_location: "us-east5"  


 router_settings:
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -2827,6 +2827,18 @@
        "mode": "chat",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
    },
+    "vertex_ai/claude-3-sonnet": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-sonnet@20240229": {
        "max_tokens": 4096,
        "max_input_tokens": 200000,
@ -2839,6 +2851,18 @@
        "supports_vision": true,
        "supports_assistant_prefill": true
    },
+    "vertex_ai/claude-3-5-sonnet": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-5-sonnet@20240620": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
@ -2851,6 +2875,18 @@
        "supports_vision": true,
        "supports_assistant_prefill": true
    },
+    "vertex_ai/claude-3-5-sonnet-v2": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-5-sonnet-v2@20241022": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
@ -2863,10 +2899,10 @@
        "supports_vision": true,
        "supports_assistant_prefill": true
    },
-    "vertex_ai/claude-3-5-haiku@20241022": {
-        "max_tokens": 8192, 
+    "vertex_ai/claude-3-haiku": {
+        "max_tokens": 4096, 
        "max_input_tokens": 200000,
-        "max_output_tokens": 8192,
+        "max_output_tokens": 4096,
        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.00000125,
        "litellm_provider": "vertex_ai-anthropic_models",
@ -2887,6 +2923,17 @@
        "supports_vision": true,
        "supports_assistant_prefill": true
    },
+    "vertex_ai/claude-3-5-haiku": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.000001,
+        "output_cost_per_token": 0.000005,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-5-haiku@20241022": {
        "max_tokens": 8192,
        "max_input_tokens": 200000,
@ -2898,6 +2945,18 @@
        "supports_function_calling": true,
        "supports_assistant_prefill": true
    },
+    "vertex_ai/claude-3-opus": {
+        "max_tokens": 4096,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000075,
+        "litellm_provider": "vertex_ai-anthropic_models",
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_assistant_prefill": true
+    },
    "vertex_ai/claude-3-opus@20240229": {
        "max_tokens": 4096,
        "max_input_tokens": 200000,
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -770,7 +770,7 @@ def test_vertex_ai_claude_completion_cost():
            )
        ],
        created=1700775391,
-        model="vertex_ai/claude-3-sonnet@20240229",
+        model="claude-3-sonnet",
        object="chat.completion",
        system_fingerprint=None,
        usage=Usage(
@ -780,7 +780,7 @@ def test_vertex_ai_claude_completion_cost():
        ),
    )
    cost = litellm.completion_cost(
-        model="vertex_ai/claude-3-sonnet@20240229",
+        model="vertex_ai/claude-3-sonnet",
        completion_response=response,
        messages=[{"role": "user", "content": "Hey, how's it going?"}],
    )