forked from phoenix/litellm-mirror
(feat) OpenAI prompt caching models to model cost map (#6063)
* add prompt caching for latest models * add cache_read_input_token_cost for prompt caching models
This commit is contained in:
parent
6d1de8e1ee
commit
fc6e0dd6cb
2 changed files with 36 additions and 12 deletions
|
@ -27,11 +27,13 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-mini": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -39,11 +41,13 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060,
|
||||
"cache_read_input_token_cost": 0.000000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-mini-2024-07-18": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -51,11 +55,13 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060,
|
||||
"cache_read_input_token_cost": 0.000000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-mini": {
|
||||
"max_tokens": 65536,
|
||||
|
@ -63,11 +69,13 @@
|
|||
"max_output_tokens": 65536,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000012,
|
||||
"cache_read_input_token_cost": 0.0000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-mini-2024-09-12": {
|
||||
"max_tokens": 65536,
|
||||
|
@ -87,11 +95,13 @@
|
|||
"max_output_tokens": 32768,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000060,
|
||||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-preview-2024-09-12": {
|
||||
"max_tokens": 32768,
|
||||
|
@ -135,11 +145,13 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-turbo-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
|
|
@ -27,11 +27,13 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-mini": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -39,11 +41,13 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060,
|
||||
"cache_read_input_token_cost": 0.000000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4o-mini-2024-07-18": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -51,11 +55,13 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060,
|
||||
"cache_read_input_token_cost": 0.000000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-mini": {
|
||||
"max_tokens": 65536,
|
||||
|
@ -63,11 +69,13 @@
|
|||
"max_output_tokens": 65536,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000012,
|
||||
"cache_read_input_token_cost": 0.0000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-mini-2024-09-12": {
|
||||
"max_tokens": 65536,
|
||||
|
@ -87,11 +95,13 @@
|
|||
"max_output_tokens": 32768,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000060,
|
||||
"cache_read_input_token_cost": 0.0000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"o1-preview-2024-09-12": {
|
||||
"max_tokens": 32768,
|
||||
|
@ -135,11 +145,13 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"gpt-4-turbo-preview": {
|
||||
"max_tokens": 4096,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue