diff --git a/docs/my-website/docs/observability/opentelemetry_integration.md b/docs/my-website/docs/observability/opentelemetry_integration.md index 218064b3d..5df82c93c 100644 --- a/docs/my-website/docs/observability/opentelemetry_integration.md +++ b/docs/my-website/docs/observability/opentelemetry_integration.md @@ -86,3 +86,20 @@ Be aware that if you are continuing an existing trace, and you set `update_trace ## Support For any question or issue with the integration you can reach out to the OpenLLMetry maintainers on [Slack](https://traceloop.com/slack) or via [email](mailto:dev@traceloop.com). + +## Troubleshooting + +### Trace LiteLLM Proxy user/key/org/team information on failed requests + +LiteLLM emits the user_api_key_metadata +- key hash +- key_alias +- org_id +- user_id +- team_id + +for successful + failed requests + +click under `litellm_request` in the trace + + \ No newline at end of file diff --git a/docs/my-website/img/otel_debug_trace.png b/docs/my-website/img/otel_debug_trace.png new file mode 100644 index 000000000..94fe5742f Binary files /dev/null and b/docs/my-website/img/otel_debug_trace.png differ diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 5e4f851e9..606a2756b 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2827,6 +2827,18 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "vertex_ai/claude-3-sonnet": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-sonnet@20240229": { "max_tokens": 4096, "max_input_tokens": 200000, @@ -2839,6 +2851,18 @@ "supports_vision": true, "supports_assistant_prefill": true }, + "vertex_ai/claude-3-5-sonnet": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-5-sonnet@20240620": { "max_tokens": 8192, "max_input_tokens": 200000, @@ -2851,6 +2875,18 @@ "supports_vision": true, "supports_assistant_prefill": true }, + "vertex_ai/claude-3-5-sonnet-v2": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-5-sonnet-v2@20241022": { "max_tokens": 8192, "max_input_tokens": 200000, @@ -2863,10 +2899,10 @@ "supports_vision": true, "supports_assistant_prefill": true }, - "vertex_ai/claude-3-5-haiku@20241022": { - "max_tokens": 8192, + "vertex_ai/claude-3-haiku": { + "max_tokens": 4096, "max_input_tokens": 200000, - "max_output_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.00000125, "litellm_provider": "vertex_ai-anthropic_models", @@ -2887,6 +2923,17 @@ "supports_vision": true, "supports_assistant_prefill": true }, + "vertex_ai/claude-3-5-haiku": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-5-haiku@20241022": { "max_tokens": 8192, "max_input_tokens": 200000, @@ -2898,6 +2945,18 @@ "supports_function_calling": true, "supports_assistant_prefill": true }, + "vertex_ai/claude-3-opus": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-opus@20240229": { "max_tokens": 4096, "max_input_tokens": 200000, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index b06a9e667..1155e0466 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -6,6 +6,11 @@ model_list: - model_name: rerank-model litellm_params: model: jina_ai/jina-reranker-v2-base-multilingual + - model_name: anthropic-vertex + litellm_params: + model: vertex_ai/claude-3-5-sonnet-v2 + vertex_ai_project: "adroit-crow-413218" + vertex_ai_location: "us-east5" router_settings: diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 5e4f851e9..606a2756b 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2827,6 +2827,18 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "vertex_ai/claude-3-sonnet": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-sonnet@20240229": { "max_tokens": 4096, "max_input_tokens": 200000, @@ -2839,6 +2851,18 @@ "supports_vision": true, "supports_assistant_prefill": true }, + "vertex_ai/claude-3-5-sonnet": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-5-sonnet@20240620": { "max_tokens": 8192, "max_input_tokens": 200000, @@ -2851,6 +2875,18 @@ "supports_vision": true, "supports_assistant_prefill": true }, + "vertex_ai/claude-3-5-sonnet-v2": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-5-sonnet-v2@20241022": { "max_tokens": 8192, "max_input_tokens": 200000, @@ -2863,10 +2899,10 @@ "supports_vision": true, "supports_assistant_prefill": true }, - "vertex_ai/claude-3-5-haiku@20241022": { - "max_tokens": 8192, + "vertex_ai/claude-3-haiku": { + "max_tokens": 4096, "max_input_tokens": 200000, - "max_output_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.00000125, "litellm_provider": "vertex_ai-anthropic_models", @@ -2887,6 +2923,17 @@ "supports_vision": true, "supports_assistant_prefill": true }, + "vertex_ai/claude-3-5-haiku": { + "max_tokens": 8192, + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-5-haiku@20241022": { "max_tokens": 8192, "max_input_tokens": 200000, @@ -2898,6 +2945,18 @@ "supports_function_calling": true, "supports_assistant_prefill": true }, + "vertex_ai/claude-3-opus": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true + }, "vertex_ai/claude-3-opus@20240229": { "max_tokens": 4096, "max_input_tokens": 200000, diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index 21659a4c1..cce8d6d67 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -770,7 +770,7 @@ def test_vertex_ai_claude_completion_cost(): ) ], created=1700775391, - model="vertex_ai/claude-3-sonnet@20240229", + model="claude-3-sonnet", object="chat.completion", system_fingerprint=None, usage=Usage( @@ -780,7 +780,7 @@ def test_vertex_ai_claude_completion_cost(): ), ) cost = litellm.completion_cost( - model="vertex_ai/claude-3-sonnet@20240229", + model="vertex_ai/claude-3-sonnet", completion_response=response, messages=[{"role": "user", "content": "Hey, how's it going?"}], )