forked from phoenix/litellm-mirror
(feat) openai prompt caching (non streaming) - add prompt_tokens_details in usage response (#6039)
* add prompt_tokens_details in usage response * use _prompt_tokens_details as a param in Usage * fix linting errors * fix type error * fix ci/cd deps * bump deps for openai * bump deps openai * fix llm translation testing * fix llm translation embedding
This commit is contained in:
parent
9fccb4a0da
commit
4e88fd65e1
10 changed files with 1515 additions and 1428 deletions
|
@ -46,6 +46,7 @@ def mock_chat_response() -> Dict[str, Any]:
|
|||
"completion_tokens": 38,
|
||||
"completion_tokens_details": None,
|
||||
"total_tokens": 268,
|
||||
"prompt_tokens_details": None,
|
||||
},
|
||||
"system_fingerprint": None,
|
||||
}
|
||||
|
@ -201,6 +202,7 @@ def mock_embedding_response() -> Dict[str, Any]:
|
|||
"total_tokens": 8,
|
||||
"completion_tokens": 0,
|
||||
"completion_tokens_details": None,
|
||||
"prompt_tokens_details": None,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
34
tests/llm_translation/test_prompt_caching.py
Normal file
34
tests/llm_translation/test_prompt_caching.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from respx import MockRouter
|
||||
|
||||
import litellm
|
||||
from litellm import Choices, Message, ModelResponse
|
||||
from litellm.types.utils import PromptTokensDetails
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_caching():
|
||||
"""
|
||||
Tests that:
|
||||
- prompt_tokens_details is correctly handled and returned as PromptTokensDetails type
|
||||
"""
|
||||
response1 = await litellm.acompletion(
|
||||
model="gpt-4o-mini",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
print("response1", response1)
|
||||
print("response1.usage", response1.usage)
|
||||
print("type of prompt_tokens_details", type(response1.usage.prompt_tokens_details))
|
||||
assert isinstance(response1.usage.prompt_tokens_details, PromptTokensDetails)
|
Loading…
Add table
Add a link
Reference in a new issue