(feat) OpenAI prompt caching models to model cost map (#6063)

* add prompt caching for latest models

* add cache_read_input_token_cost for prompt caching models
This commit is contained in:
Ishaan Jaff 2024-10-04 19:12:13 +05:30 committed by GitHub
parent 6d1de8e1ee
commit fc6e0dd6cb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 36 additions and 12 deletions

View file

@ -27,11 +27,13 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4o-mini": {
"max_tokens": 16384,
@ -39,11 +41,13 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060,
"cache_read_input_token_cost": 0.000000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4o-mini-2024-07-18": {
"max_tokens": 16384,
@ -51,11 +55,13 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060,
"cache_read_input_token_cost": 0.000000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"o1-mini": {
"max_tokens": 65536,
@ -63,11 +69,13 @@
"max_output_tokens": 65536,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000012,
"cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"o1-mini-2024-09-12": {
"max_tokens": 65536,
@ -87,11 +95,13 @@
"max_output_tokens": 32768,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000060,
"cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"o1-preview-2024-09-12": {
"max_tokens": 32768,
@ -135,11 +145,13 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4-turbo-preview": {
"max_tokens": 4096,

View file

@ -27,11 +27,13 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4o-mini": {
"max_tokens": 16384,
@ -39,11 +41,13 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060,
"cache_read_input_token_cost": 0.000000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4o-mini-2024-07-18": {
"max_tokens": 16384,
@ -51,11 +55,13 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060,
"cache_read_input_token_cost": 0.000000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"o1-mini": {
"max_tokens": 65536,
@ -63,11 +69,13 @@
"max_output_tokens": 65536,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000012,
"cache_read_input_token_cost": 0.0000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"o1-mini-2024-09-12": {
"max_tokens": 65536,
@ -87,11 +95,13 @@
"max_output_tokens": 32768,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000060,
"cache_read_input_token_cost": 0.0000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"o1-preview-2024-09-12": {
"max_tokens": 32768,
@ -135,11 +145,13 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
"supports_vision": true,
"supports_prompt_caching": true
},
"gpt-4-turbo-preview": {
"max_tokens": 4096,