(feat) openai prompt caching (non streaming) - add prompt_tokens_details in usage response (#6039)

* add prompt_tokens_details in usage response * use _prompt_tokens_details as a param in Usage * fix linting errors * fix type error * fix ci/cd deps * bump deps for openai * bump deps openai * fix llm translation testing * fix llm translation embedding
2024-10-03 11:01:10 -07:00 · 2024-10-03 11:01:10 -07:00 · 4e88fd65e1
commit 4e88fd65e1
parent 9fccb4a0da
10 changed files with 1515 additions and 1428 deletions
--- a/tests/llm_translation/test_databricks.py
+++ b/tests/llm_translation/test_databricks.py
@ -46,6 +46,7 @@ def mock_chat_response() -> Dict[str, Any]:
            "completion_tokens": 38,
            "completion_tokens_details": None,
            "total_tokens": 268,
+            "prompt_tokens_details": None,
        },
        "system_fingerprint": None,
    }
@ -201,6 +202,7 @@ def mock_embedding_response() -> Dict[str, Any]:
            "total_tokens": 8,
            "completion_tokens": 0,
            "completion_tokens_details": None,
+            "prompt_tokens_details": None,
        },
    }

--- a/tests/llm_translation/test_prompt_caching.py
+++ b/tests/llm_translation/test_prompt_caching.py
@ -0,0 +1,34 @@
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import litellm
+from litellm import Choices, Message, ModelResponse
+from litellm.types.utils import PromptTokensDetails
+
+
+@pytest.mark.asyncio
+async def test_prompt_caching():
+    """
+    Tests that:
+    - prompt_tokens_details is correctly handled and returned as PromptTokensDetails type
+    """
+    response1 = await litellm.acompletion(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "hi"}],
+    )
+    print("response1", response1)
+    print("response1.usage", response1.usage)
+    print("type of prompt_tokens_details", type(response1.usage.prompt_tokens_details))
+    assert isinstance(response1.usage.prompt_tokens_details, PromptTokensDetails)