LiteLLM Minor Fixes & Improvements (10/05/2024) (#6083)

* docs(prompt_caching.md): add prompt caching cost calc example to docs

* docs(prompt_caching.md): add proxy examples to docs

* feat(utils.py): expose new helper `supports_prompt_caching()` to check if a model supports prompt caching

* docs(prompt_caching.md): add docs on checking model support for prompt caching

* build: fix invalid json
This commit is contained in:
Krish Dholakia 2024-10-05 18:59:11 -04:00 committed by GitHub
parent fac3b2ee42
commit f2c0a31e3c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 459 additions and 59 deletions

View file

@ -9,7 +9,8 @@
"mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4": {
"max_tokens": 4096,
@ -19,7 +20,8 @@
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true
"supports_function_calling": true,
"supports_prompt_caching": true
},
"gpt-4o": {
"max_tokens": 4096,
@ -129,7 +131,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4o-2024-05-13": {
"max_tokens": 4096,
@ -141,7 +144,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4o-2024-08-06": {
"max_tokens": 16384,
@ -166,7 +170,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
},
"gpt-4-0314": {
"max_tokens": 4096,
@ -175,7 +180,8 @@
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_prompt_caching": true
},
"gpt-4-0613": {
"max_tokens": 4096,
@ -185,7 +191,8 @@
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true
"supports_function_calling": true,
"supports_prompt_caching": true
},
"gpt-4-32k": {
"max_tokens": 4096,
@ -194,7 +201,8 @@
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_prompt_caching": true
},
"gpt-4-32k-0314": {
"max_tokens": 4096,
@ -203,7 +211,8 @@
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_prompt_caching": true
},
"gpt-4-32k-0613": {
"max_tokens": 4096,
@ -212,7 +221,8 @@
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_prompt_caching": true
},
"gpt-4-turbo": {
"max_tokens": 4096,
@ -224,7 +234,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4-turbo-2024-04-09": {
"max_tokens": 4096,
@ -236,7 +247,8 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4-1106-preview": {
"max_tokens": 4096,
@ -247,7 +259,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
},
"gpt-4-0125-preview": {
"max_tokens": 4096,
@ -258,7 +271,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
},
"gpt-4-vision-preview": {
"max_tokens": 4096,
@ -268,7 +282,8 @@
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4-1106-vision-preview": {
"max_tokens": 4096,
@ -278,7 +293,8 @@
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat",
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-3.5-turbo": {
"max_tokens": 4097,
@ -288,7 +304,8 @@
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true
"supports_function_calling": true,
"supports_prompt_caching": true
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4097,
@ -297,7 +314,8 @@
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_prompt_caching": true
},
"gpt-3.5-turbo-0613": {
"max_tokens": 4097,
@ -307,7 +325,8 @@
"output_cost_per_token": 0.000002,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true
"supports_function_calling": true,
"supports_prompt_caching": true
},
"gpt-3.5-turbo-1106": {
"max_tokens": 16385,
@ -318,7 +337,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
},
"gpt-3.5-turbo-0125": {
"max_tokens": 16385,
@ -329,7 +349,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_prompt_caching": true
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16385,
@ -338,7 +359,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_prompt_caching": true
},
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16385,
@ -347,7 +369,8 @@
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supports_prompt_caching": true
},
"ft:gpt-3.5-turbo": {
"max_tokens": 4096,