Litellm dev 11 20 2024 (#6838)

* feat(customer_endpoints.py): support passing budget duration via `/customer/new` endpoint

Closes https://github.com/BerriAI/litellm/issues/5651

* docs: add missing params to swagger + api documentation test

* docs: add documentation for all key endpoints

documents all params on swagger

* docs(internal_user_endpoints.py): document all /user/new params

Ensures all params are documented

* docs(team_endpoints.py): add missing documentation for team endpoints

Ensures 100% param documentation on swagger

* docs(organization_endpoints.py): document all org params

Adds documentation for all params in org endpoint

* docs(customer_endpoints.py): add coverage for all params on /customer endpoints

ensures all /customer/* params are documented

* ci(config.yml): add endpoint doc testing to ci/cd

* fix: fix internal_user_endpoints.py

* fix(internal_user_endpoints.py): support 'duration' param

* fix(partner_models/main.py): fix anthropic re-raise exception on vertex

* fix: fix pydantic obj

* build(model_prices_and_context_window.json): add new vertex claude model names

vertex claude changed model names - causes cost tracking errors
This commit is contained in:
Krish Dholakia 2024-11-21 05:20:37 +05:30 committed by GitHub
parent 0b0253f7ad
commit b11bc0374e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 148 additions and 8 deletions

View file

@ -86,3 +86,20 @@ Be aware that if you are continuing an existing trace, and you set `update_trace
## Support ## Support
For any question or issue with the integration you can reach out to the OpenLLMetry maintainers on [Slack](https://traceloop.com/slack) or via [email](mailto:dev@traceloop.com). For any question or issue with the integration you can reach out to the OpenLLMetry maintainers on [Slack](https://traceloop.com/slack) or via [email](mailto:dev@traceloop.com).
## Troubleshooting
### Trace LiteLLM Proxy user/key/org/team information on failed requests
LiteLLM emits the user_api_key_metadata
- key hash
- key_alias
- org_id
- user_id
- team_id
for successful + failed requests
click under `litellm_request` in the trace
<Image img={require('../../img/otel_debug_trace.png')} />

Binary file not shown.

After

Width:  |  Height:  |  Size: 437 KiB

View file

@ -2827,6 +2827,18 @@
"mode": "chat", "mode": "chat",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
}, },
"vertex_ai/claude-3-sonnet": {
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-sonnet@20240229": { "vertex_ai/claude-3-sonnet@20240229": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 200000, "max_input_tokens": 200000,
@ -2839,6 +2851,18 @@
"supports_vision": true, "supports_vision": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-5-sonnet": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-sonnet@20240620": { "vertex_ai/claude-3-5-sonnet@20240620": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 200000, "max_input_tokens": 200000,
@ -2851,6 +2875,18 @@
"supports_vision": true, "supports_vision": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-5-sonnet-v2": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-sonnet-v2@20241022": { "vertex_ai/claude-3-5-sonnet-v2@20241022": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 200000, "max_input_tokens": 200000,
@ -2863,10 +2899,10 @@
"supports_vision": true, "supports_vision": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-5-haiku@20241022": { "vertex_ai/claude-3-haiku": {
"max_tokens": 8192, "max_tokens": 4096,
"max_input_tokens": 200000, "max_input_tokens": 200000,
"max_output_tokens": 8192, "max_output_tokens": 4096,
"input_cost_per_token": 0.00000025, "input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.00000125, "output_cost_per_token": 0.00000125,
"litellm_provider": "vertex_ai-anthropic_models", "litellm_provider": "vertex_ai-anthropic_models",
@ -2887,6 +2923,17 @@
"supports_vision": true, "supports_vision": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-5-haiku": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000005,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-haiku@20241022": { "vertex_ai/claude-3-5-haiku@20241022": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 200000, "max_input_tokens": 200000,
@ -2898,6 +2945,18 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-opus": {
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-opus@20240229": { "vertex_ai/claude-3-opus@20240229": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 200000, "max_input_tokens": 200000,

View file

@ -6,6 +6,11 @@ model_list:
- model_name: rerank-model - model_name: rerank-model
litellm_params: litellm_params:
model: jina_ai/jina-reranker-v2-base-multilingual model: jina_ai/jina-reranker-v2-base-multilingual
- model_name: anthropic-vertex
litellm_params:
model: vertex_ai/claude-3-5-sonnet-v2
vertex_ai_project: "adroit-crow-413218"
vertex_ai_location: "us-east5"
router_settings: router_settings:

View file

@ -2827,6 +2827,18 @@
"mode": "chat", "mode": "chat",
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
}, },
"vertex_ai/claude-3-sonnet": {
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-sonnet@20240229": { "vertex_ai/claude-3-sonnet@20240229": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 200000, "max_input_tokens": 200000,
@ -2839,6 +2851,18 @@
"supports_vision": true, "supports_vision": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-5-sonnet": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-sonnet@20240620": { "vertex_ai/claude-3-5-sonnet@20240620": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 200000, "max_input_tokens": 200000,
@ -2851,6 +2875,18 @@
"supports_vision": true, "supports_vision": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-5-sonnet-v2": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-sonnet-v2@20241022": { "vertex_ai/claude-3-5-sonnet-v2@20241022": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 200000, "max_input_tokens": 200000,
@ -2863,10 +2899,10 @@
"supports_vision": true, "supports_vision": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-5-haiku@20241022": { "vertex_ai/claude-3-haiku": {
"max_tokens": 8192, "max_tokens": 4096,
"max_input_tokens": 200000, "max_input_tokens": 200000,
"max_output_tokens": 8192, "max_output_tokens": 4096,
"input_cost_per_token": 0.00000025, "input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.00000125, "output_cost_per_token": 0.00000125,
"litellm_provider": "vertex_ai-anthropic_models", "litellm_provider": "vertex_ai-anthropic_models",
@ -2887,6 +2923,17 @@
"supports_vision": true, "supports_vision": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-5-haiku": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000005,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-5-haiku@20241022": { "vertex_ai/claude-3-5-haiku@20241022": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 200000, "max_input_tokens": 200000,
@ -2898,6 +2945,18 @@
"supports_function_calling": true, "supports_function_calling": true,
"supports_assistant_prefill": true "supports_assistant_prefill": true
}, },
"vertex_ai/claude-3-opus": {
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"litellm_provider": "vertex_ai-anthropic_models",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true
},
"vertex_ai/claude-3-opus@20240229": { "vertex_ai/claude-3-opus@20240229": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 200000, "max_input_tokens": 200000,

View file

@ -770,7 +770,7 @@ def test_vertex_ai_claude_completion_cost():
) )
], ],
created=1700775391, created=1700775391,
model="vertex_ai/claude-3-sonnet@20240229", model="claude-3-sonnet",
object="chat.completion", object="chat.completion",
system_fingerprint=None, system_fingerprint=None,
usage=Usage( usage=Usage(
@ -780,7 +780,7 @@ def test_vertex_ai_claude_completion_cost():
), ),
) )
cost = litellm.completion_cost( cost = litellm.completion_cost(
model="vertex_ai/claude-3-sonnet@20240229", model="vertex_ai/claude-3-sonnet",
completion_response=response, completion_response=response,
messages=[{"role": "user", "content": "Hey, how's it going?"}], messages=[{"role": "user", "content": "Hey, how's it going?"}],
) )