LiteLLM Minor Fixes & Improvements (10/05/2024) (#6083)

* docs(prompt_caching.md): add prompt caching cost calc example to docs * docs(prompt_caching.md): add proxy examples to docs * feat(utils.py): expose new helper `supports_prompt_caching()` to check if a model supports prompt caching * docs(prompt_caching.md): add docs on checking model support for prompt caching * build: fix invalid json
2024-10-05 18:59:11 -04:00 · 2024-10-05 18:59:11 -04:00 · f2c0a31e3c
commit f2c0a31e3c
parent fac3b2ee42
7 changed files with 459 additions and 59 deletions
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -9,7 +9,8 @@
        "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_prompt_caching": true
    },
    "gpt-4": {
        "max_tokens": 4096, 
@ -19,7 +20,8 @@
        "output_cost_per_token": 0.00006,
        "litellm_provider": "openai",
        "mode": "chat",
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-4o": {
        "max_tokens": 4096,
@ -129,7 +131,8 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_prompt_caching": true
    },
    "gpt-4o-2024-05-13": {
        "max_tokens": 4096,
@ -141,7 +144,8 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_prompt_caching": true
    },
    "gpt-4o-2024-08-06": {
        "max_tokens": 16384,
@ -166,7 +170,8 @@
        "litellm_provider": "openai",
        "mode": "chat",
        "supports_function_calling": true,
-        "supports_parallel_function_calling": true
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-4-0314": {
        "max_tokens": 4096,
@ -175,7 +180,8 @@
        "input_cost_per_token": 0.00003,
        "output_cost_per_token": 0.00006,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_prompt_caching": true
    },
    "gpt-4-0613": {
        "max_tokens": 4096,
@ -185,7 +191,8 @@
        "output_cost_per_token": 0.00006,
        "litellm_provider": "openai",
        "mode": "chat",
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-4-32k": {
        "max_tokens": 4096,
@ -194,7 +201,8 @@
        "input_cost_per_token": 0.00006,
        "output_cost_per_token": 0.00012,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_prompt_caching": true
    },
    "gpt-4-32k-0314": {
        "max_tokens": 4096,
@ -203,7 +211,8 @@
        "input_cost_per_token": 0.00006,
        "output_cost_per_token": 0.00012,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_prompt_caching": true
    },
    "gpt-4-32k-0613": {
        "max_tokens": 4096,
@ -212,7 +221,8 @@
        "input_cost_per_token": 0.00006,
        "output_cost_per_token": 0.00012,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_prompt_caching": true
    },
    "gpt-4-turbo": {
        "max_tokens": 4096,
@ -224,7 +234,8 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_prompt_caching": true
    },
    "gpt-4-turbo-2024-04-09": {
        "max_tokens": 4096,
@ -236,7 +247,8 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_prompt_caching": true
    },
    "gpt-4-1106-preview": {
        "max_tokens": 4096,
@ -247,7 +259,8 @@
        "litellm_provider": "openai",
        "mode": "chat",
        "supports_function_calling": true,
-        "supports_parallel_function_calling": true
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-4-0125-preview": {
        "max_tokens": 4096,
@ -258,7 +271,8 @@
        "litellm_provider": "openai",
        "mode": "chat",
        "supports_function_calling": true,
-        "supports_parallel_function_calling": true
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-4-vision-preview": {
        "max_tokens": 4096,
@ -268,7 +282,8 @@
        "output_cost_per_token": 0.00003,
        "litellm_provider": "openai",
        "mode": "chat",
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_prompt_caching": true
    },
    "gpt-4-1106-vision-preview": {
        "max_tokens": 4096,
@ -278,7 +293,8 @@
        "output_cost_per_token": 0.00003,
        "litellm_provider": "openai",
        "mode": "chat",
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_prompt_caching": true
    },
    "gpt-3.5-turbo": {
        "max_tokens": 4097,
@ -288,7 +304,8 @@
        "output_cost_per_token": 0.000002,
        "litellm_provider": "openai",
        "mode": "chat",
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-3.5-turbo-0301": {
        "max_tokens": 4097,
@ -297,7 +314,8 @@
        "input_cost_per_token": 0.0000015,
        "output_cost_per_token": 0.000002,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_prompt_caching": true
    },
    "gpt-3.5-turbo-0613": {
        "max_tokens": 4097,
@ -307,7 +325,8 @@
        "output_cost_per_token": 0.000002,
        "litellm_provider": "openai",
        "mode": "chat",
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-3.5-turbo-1106": {
        "max_tokens": 16385,
@ -318,7 +337,8 @@
        "litellm_provider": "openai",
        "mode": "chat",
        "supports_function_calling": true,
-        "supports_parallel_function_calling": true
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-3.5-turbo-0125": {
        "max_tokens": 16385,
@ -329,7 +349,8 @@
        "litellm_provider": "openai",
        "mode": "chat",
        "supports_function_calling": true,
-        "supports_parallel_function_calling": true
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true
    },
    "gpt-3.5-turbo-16k": {
        "max_tokens": 16385,
@ -338,7 +359,8 @@
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000004,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_prompt_caching": true
    },
    "gpt-3.5-turbo-16k-0613": {
        "max_tokens": 16385,
@ -347,7 +369,8 @@
        "input_cost_per_token": 0.000003,
        "output_cost_per_token": 0.000004,
        "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_prompt_caching": true
    },
    "ft:gpt-3.5-turbo": {
        "max_tokens": 4096,