fix(utils.py): include system fingerprint in streaming response object

2023-11-30 08:45:35 -08:00 · 2023-11-30 08:45:35 -08:00 · 01c7e18f31
commit 01c7e18f31
parent 78421d89aa
3 changed files with 7 additions and 3 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2143,6 +2143,7 @@ def stream_chunk_builder(chunks: list, messages: Optional[list]=None):
    object = chunks[0]["object"]
    created = chunks[0]["created"]
    model = chunks[0]["model"]
    system_fingerprint = chunks[0].get("system_fingerprint", None)
    role = chunks[0]["choices"][0]["delta"]["role"]
    finish_reason = chunks[-1]["choices"][0]["finish_reason"]
@ -2152,6 +2153,7 @@ def stream_chunk_builder(chunks: list, messages: Optional[list]=None):
        "object": object,
        "created": created,
        "model": model,
        "system_fingerprint": system_fingerprint,
        "choices": [
            {
                "index": 0,
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@ -95,16 +95,18 @@ def test_stream_chunk_builder_litellm_tool_call():
    try: 
      litellm.set_verbose = False
      response = litellm.completion(
-          model="azure/chatgpt-functioncalling",
+          model="azure/gpt-4-nov-release",
          messages=messages,
          tools=tools_schema,
          stream=True,
          api_key="os.environ/AZURE_FRANCE_API_KEY",
          api_base="https://openai-france-1234.openai.azure.com",
          complete_response = True
      )
      print(f"complete response: {response}")
      print(f"complete response usage: {response.usage}")
-
+      assert response.system_fingerprint is not None
    except Exception as e: 
       pytest.fail(f"An exception occurred - {str(e)}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1796,7 +1796,6 @@ def register_model(model_cost: Union[str, dict]):
        if key in litellm.model_cost:
            for k,v in loaded_model_cost[key].items(): 
                litellm.model_cost[key][k] = v
        # litellm.model_cost[key] = loaded_model_cost[key]
        # add new model names to provider lists
        if value.get('litellm_provider') == 'openai':
            if key not in litellm.open_ai_chat_completion_models:
@ -5291,6 +5290,7 @@ class CustomStreamWrapper:
                if self.sent_first_chunk == False:
                    model_response.choices[0].delta["role"] = "assistant"
                    self.sent_first_chunk = True
                # LOGGING
                threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() # log response
                return model_response
            else: