fix(utils.py): include system fingerprint in streaming response object

This commit is contained in:
Krrish Dholakia 2023-11-30 08:45:35 -08:00
parent 78421d89aa
commit 01c7e18f31
3 changed files with 7 additions and 3 deletions

View file

@ -2143,6 +2143,7 @@ def stream_chunk_builder(chunks: list, messages: Optional[list]=None):
object = chunks[0]["object"]
created = chunks[0]["created"]
model = chunks[0]["model"]
system_fingerprint = chunks[0].get("system_fingerprint", None)
role = chunks[0]["choices"][0]["delta"]["role"]
finish_reason = chunks[-1]["choices"][0]["finish_reason"]
@ -2152,6 +2153,7 @@ def stream_chunk_builder(chunks: list, messages: Optional[list]=None):
"object": object,
"created": created,
"model": model,
"system_fingerprint": system_fingerprint,
"choices": [
{
"index": 0,

View file

@ -95,16 +95,18 @@ def test_stream_chunk_builder_litellm_tool_call():
try:
litellm.set_verbose = False
response = litellm.completion(
model="azure/chatgpt-functioncalling",
model="azure/gpt-4-nov-release",
messages=messages,
tools=tools_schema,
stream=True,
api_key="os.environ/AZURE_FRANCE_API_KEY",
api_base="https://openai-france-1234.openai.azure.com",
complete_response = True
)
print(f"complete response: {response}")
print(f"complete response usage: {response.usage}")
assert response.system_fingerprint is not None
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")

View file

@ -1796,7 +1796,6 @@ def register_model(model_cost: Union[str, dict]):
if key in litellm.model_cost:
for k,v in loaded_model_cost[key].items():
litellm.model_cost[key][k] = v
# litellm.model_cost[key] = loaded_model_cost[key]
# add new model names to provider lists
if value.get('litellm_provider') == 'openai':
if key not in litellm.open_ai_chat_completion_models:
@ -5291,6 +5290,7 @@ class CustomStreamWrapper:
if self.sent_first_chunk == False:
model_response.choices[0].delta["role"] = "assistant"
self.sent_first_chunk = True
# LOGGING
threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() # log response
return model_response
else: