forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/05/2024) (#6083)
* docs(prompt_caching.md): add prompt caching cost calc example to docs * docs(prompt_caching.md): add proxy examples to docs * feat(utils.py): expose new helper `supports_prompt_caching()` to check if a model supports prompt caching * docs(prompt_caching.md): add docs on checking model support for prompt caching * build: fix invalid json
This commit is contained in:
parent
fac3b2ee42
commit
f2c0a31e3c
7 changed files with 459 additions and 59 deletions
|
@ -9,7 +9,8 @@
|
|||
"mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -19,7 +20,8 @@
|
|||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -129,7 +131,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-2024-05-13": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -141,7 +144,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-2024-08-06": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -166,7 +170,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0314": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -175,7 +180,8 @@
|
|||
"input_cost_per_token": 0.00003,
|
||||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0613": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -185,7 +191,8 @@
|
|||
"output_cost_per_token": 0.00006,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -194,7 +201,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k-0314": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -203,7 +211,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-32k-0613": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -212,7 +221,8 @@
|
|||
"input_cost_per_token": 0.00006,
|
||||
"output_cost_per_token": 0.00012,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-turbo": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -224,7 +234,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-turbo-2024-04-09": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -236,7 +247,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-1106-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -247,7 +259,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-0125-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -258,7 +271,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-vision-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -268,7 +282,8 @@
|
|||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-1106-vision-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -278,7 +293,8 @@
|
|||
"output_cost_per_token": 0.00003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -288,7 +304,8 @@
|
|||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0301": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -297,7 +314,8 @@
|
|||
"input_cost_per_token": 0.0000015,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0613": {
|
||||
"max_tokens": 4097,
|
||||
|
@ -307,7 +325,8 @@
|
|||
"output_cost_per_token": 0.000002,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-1106": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -318,7 +337,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-0125": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -329,7 +349,8 @@
|
|||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-16k": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -338,7 +359,8 @@
|
|||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-3.5-turbo-16k-0613": {
|
||||
"max_tokens": 16385,
|
||||
|
@ -347,7 +369,8 @@
|
|||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000004,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
"mode": "chat",
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"ft:gpt-3.5-turbo": {
|
||||
"max_tokens": 4096,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue