LiteLLM Minor Fixes and Improvements (09/10/2024) (#5618)

* fix(cost_calculator.py): move to debug for noisy warning message on cost calculation error Fixes https://github.com/BerriAI/litellm/issues/5610 * fix(databricks/cost_calculator.py): Handles model name issues for databricks models * fix(main.py): fix stream chunk builder for multiple tool calls Fixes https://github.com/BerriAI/litellm/issues/5591 * fix: correctly set user_alias when passed in Fixes https://github.com/BerriAI/litellm/issues/5612 * fix(types/utils.py): allow passing role for message object https://github.com/BerriAI/litellm/issues/5621 * fix(litellm_logging.py): Fix langfuse logging across multiple projects Fixes issue where langfuse logger was re-using the old logging object * feat(proxy/_types.py): support adding key-based tags for tag-based routing Enable tag based routing at key-level * fix(proxy/_types.py): fix inheritance * test(test_key_generate_prisma.py): fix test * test: fix test * fix(litellm_logging.py): return used callback object
2025-04-26 11:14:04 +00:00 · 2024-09-11 11:30:29 -07:00 · 2024-09-11 11:30:29 -07:00 · 7f47c48b35
commit 7f47c48b35
parent d6e0d5d234
15 changed files with 673 additions and 96 deletions
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -829,18 +829,11 @@ def response_cost_calculator(
        )
        return None
    except Exception as e:
        if litellm.suppress_debug_info:  # allow cli tools to suppress this information.
        verbose_logger.debug(
            "litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
                str(e), traceback.format_exc()
            )
        )
        else:
            verbose_logger.warning(
                "litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
                    str(e), traceback.format_exc()
                )
            )
        return None
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@ -269,10 +269,11 @@ class SlackAlerting(CustomLogger):
                    break
                await asyncio.sleep(3)  # wait 3s before retrying for trace id
-            if litellm.litellm_core_utils.litellm_logging.langFuseLogger is not None:
+            _langfuse_object = litellm_logging_obj._get_callback_object(
-                base_url = (
+                service_name="langfuse"
                    litellm.litellm_core_utils.litellm_logging.langFuseLogger.Langfuse.base_url
            )
            if _langfuse_object is not None:
                base_url = _langfuse_object.Langfuse.base_url
                return f"{base_url}/trace/{trace_id}"
        return None
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -924,6 +924,7 @@ class Logging:
                            else:
                                print_verbose("reaches langfuse for streaming logging!")
                                result = kwargs["complete_streaming_response"]
                        temp_langfuse_logger = langFuseLogger
                        if langFuseLogger is None or (
                            (
                                self.langfuse_public_key is not None
@ -940,12 +941,12 @@ class Logging:
                                and self.langfuse_host != langFuseLogger.langfuse_host
                            )
                        ):
-                            langFuseLogger = LangFuseLogger(
+                            temp_langfuse_logger = LangFuseLogger(
                                langfuse_public_key=self.langfuse_public_key,
                                langfuse_secret=self.langfuse_secret,
                                langfuse_host=self.langfuse_host,
                            )
-                        _response = langFuseLogger.log_event(
+                        _response = temp_langfuse_logger.log_event(
                            kwargs=kwargs,
                            response_obj=result,
                            start_time=start_time,
@ -1925,6 +1926,38 @@ class Logging:
        return trace_id
    def _get_callback_object(self, service_name: Literal["langfuse"]) -> Optional[Any]:
        """
        Return dynamic callback object.
        Meant to solve issue when doing key-based/team-based logging
        """
        global langFuseLogger
        if service_name == "langfuse":
            if langFuseLogger is None or (
                (
                    self.langfuse_public_key is not None
                    and self.langfuse_public_key != langFuseLogger.public_key
                )
                or (
                    self.langfuse_public_key is not None
                    and self.langfuse_public_key != langFuseLogger.public_key
                )
                or (
                    self.langfuse_host is not None
                    and self.langfuse_host != langFuseLogger.langfuse_host
                )
            ):
                return LangFuseLogger(
                    langfuse_public_key=self.langfuse_public_key,
                    langfuse_secret=self.langfuse_secret,
                    langfuse_host=self.langfuse_host,
                )
            return langFuseLogger
        return None
 def set_callbacks(callback_list, function_id=None):
    """
--- a/litellm/llms/databricks/cost_calculator.py
+++ b/litellm/llms/databricks/cost_calculator.py
@ -25,7 +25,30 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
        "dbrx-instruct"
    ):
        base_model = "databricks-dbrx-instruct"
-
+    elif model.startswith("databricks/meta-llama-3.1-70b-instruct") or model.startswith(
        "meta-llama-3.1-70b-instruct"
    ):
        base_model = "databricks-meta-llama-3-1-70b-instruct"
    elif model.startswith(
        "databricks/meta-llama-3.1-405b-instruct"
    ) or model.startswith("meta-llama-3.1-405b-instruct"):
        base_model = "databricks-meta-llama-3-1-405b-instruct"
    elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
        "mixtral-8x7b-instruct-v0.1"
    ):
        base_model = "databricks-mixtral-8x7b-instruct"
    elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
        "mixtral-8x7b-instruct-v0.1"
    ):
        base_model = "databricks-mixtral-8x7b-instruct"
    elif model.startswith("databricks/bge-large-en") or model.startswith(
        "bge-large-en"
    ):
        base_model = "databricks-bge-large-en"
    elif model.startswith("databricks/gte-large-en") or model.startswith(
        "gte-large-en"
    ):
        base_model = "databricks-gte-large-en"
    ## GET MODEL INFO
    model_info = get_model_info(model=base_model, custom_llm_provider="databricks")
--- a/litellm/main.py
+++ b/litellm/main.py
@ -5310,7 +5310,7 @@ def stream_chunk_builder(
        ]
        if len(tool_call_chunks) > 0:
-            argument_list = []
+            argument_list: List = []
            delta = tool_call_chunks[0]["choices"][0]["delta"]
            message = response["choices"][0]["message"]
            message["tool_calls"] = []
@ -5319,6 +5319,7 @@ def stream_chunk_builder(
            type = None
            tool_calls_list = []
            prev_index = None
            prev_name = None
            prev_id = None
            curr_id = None
            curr_index = 0
@ -5346,27 +5347,32 @@ def stream_chunk_builder(
                            type = tool_calls[0].type
                if prev_index is None:
                    prev_index = curr_index
                if prev_name is None:
                    prev_name = name
                if curr_index != prev_index:  # new tool call
                    combined_arguments = "".join(argument_list)
                    tool_calls_list.append(
                        {
                            "id": prev_id,
-                            "index": prev_index,
+                            "function": {
-                            "function": {"arguments": combined_arguments, "name": name},
+                                "arguments": combined_arguments,
                                "name": prev_name,
                            },
                            "type": type,
                        }
                    )
                    argument_list = []  # reset
                    prev_index = curr_index
                    prev_id = curr_id
                    prev_name = name
            combined_arguments = (
                "".join(argument_list) or "{}"
            )  # base case, return empty dict
            tool_calls_list.append(
                {
                    "id": id,
                    "index": curr_index,
                    "function": {"arguments": combined_arguments, "name": name},
                    "type": type,
                }
@ -5422,7 +5428,7 @@ def stream_chunk_builder(
                for choice in choices:
                    delta = choice.get("delta", {})
                    content = delta.get("content", "")
-                    if content == None:
+                    if content is None:
                        continue  # openai v1.0.0 sets content = None for chunks
                    content_list.append(content)
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -5461,90 +5461,129 @@
        "max_input_tokens": 128000,
        "max_output_tokens": 128000, 
        "input_cost_per_token": 0.000005,
-        "output_cost_per_token": 0.000015,
+        "input_dbu_cost_per_token": 0.000071429,
        "output_cost_per_token": 0.00001500002,
        "output_db_cost_per_token": 0.000214286,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-meta-llama-3-1-70b-instruct": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000, 
-        "input_cost_per_token": 0.000001,
+        "input_cost_per_token": 0.00000100002,
-        "output_cost_per_token": 0.000003,
+        "input_dbu_cost_per_token": 0.000014286,
        "output_cost_per_token": 0.00000299999,
        "output_dbu_cost_per_token": 0.000042857,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-dbrx-instruct": {
        "max_tokens": 32768,
        "max_input_tokens": 32768,
        "max_output_tokens": 32768, 
-        "input_cost_per_token": 0.00000075,
+        "input_cost_per_token": 0.00000074998,
-        "output_cost_per_token": 0.00000225,
+        "input_dbu_cost_per_token": 0.000010714,
        "output_cost_per_token": 0.00000224901,
        "output_dbu_cost_per_token": 0.000032143,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-meta-llama-3-70b-instruct": {
-        "max_tokens": 8192,
+        "max_tokens": 128000,
-        "max_input_tokens": 8192,
+        "max_input_tokens": 128000,
-        "max_output_tokens": 8192, 
+        "max_output_tokens": 128000, 
-        "input_cost_per_token": 0.000001,
+        "input_cost_per_token": 0.00000100002,
-        "output_cost_per_token": 0.000003,
+        "input_dbu_cost_per_token": 0.000014286,
        "output_cost_per_token": 0.00000299999,
        "output_dbu_cost_per_token": 0.000042857,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-llama-2-70b-chat": {
        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096, 
-        "input_cost_per_token": 0.0000005,
+        "input_cost_per_token": 0.00000050001,
        "input_dbu_cost_per_token": 0.000007143,
        "output_cost_per_token": 0.0000015,
        "output_dbu_cost_per_token": 0.000021429,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
-
+        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-mixtral-8x7b-instruct": {
        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096, 
-        "input_cost_per_token": 0.0000005,
+        "input_cost_per_token": 0.00000050001,
-        "output_cost_per_token": 0.000001,
+        "input_dbu_cost_per_token": 0.000007143,
        "output_cost_per_token": 0.00000099902,
        "output_dbu_cost_per_token": 0.000014286,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-mpt-30b-instruct": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192, 
-        "input_cost_per_token": 0.000001,
+        "input_cost_per_token": 0.00000099902,
-        "output_cost_per_token": 0.000001,
+        "input_dbu_cost_per_token": 0.000014286,
        "output_cost_per_token": 0.00000099902,
        "output_dbu_cost_per_token": 0.000014286,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-mpt-7b-instruct": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192, 
-        "input_cost_per_token": 0.0000005,
+        "input_cost_per_token": 0.00000050001,
-        "output_cost_per_token": 0.0000005,
+        "input_dbu_cost_per_token": 0.000007143,
        "output_cost_per_token": 0.0,
        "output_dbu_cost_per_token": 0.0,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-bge-large-en": {
        "max_tokens": 512,
        "max_input_tokens": 512,
        "output_vector_size": 1024, 
-        "input_cost_per_token": 0.0000001,
+        "input_cost_per_token": 0.00000010003,
        "input_dbu_cost_per_token": 0.000001429,
        "output_cost_per_token": 0.0,
        "output_dbu_cost_per_token": 0.0,
        "litellm_provider": "databricks",
        "mode": "embedding",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-gte-large-en": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "output_vector_size": 1024, 
        "input_cost_per_token": 0.00000012999,
        "input_dbu_cost_per_token": 0.000001857,
        "output_cost_per_token": 0.0,
        "output_dbu_cost_per_token": 0.0,
        "litellm_provider": "databricks",
        "mode": "embedding",
        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    }
 }
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,9 +1,9 @@
 model_list:
-  - model_name: "gpt-turbo"
+  - model_name: "gpt-4o"
    litellm_params:
-      model: azure/chatgpt-v-2
+      model: gpt-4o
      api_key: os.environ/AZURE_API_KEY
      api_base: os.environ/AZURE_API_BASE
-router_settings:
+litellm_settings:
-  model_group_alias: {"gpt-4": "gpt-turbo"} 
+  cache: true
  cache_params:
    type: local
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -600,7 +600,7 @@ class GenerateRequestBase(LiteLLMBase):
    soft_budget: Optional[float] = None
-class GenerateKeyRequest(GenerateRequestBase):
+class _GenerateKeyRequest(GenerateRequestBase):
    key_alias: Optional[str] = None
    key: Optional[str] = None
    duration: Optional[str] = None
@ -618,7 +618,11 @@ class GenerateKeyRequest(GenerateRequestBase):
    guardrails: Optional[List[str]] = None
-class GenerateKeyResponse(GenerateKeyRequest):
+class GenerateKeyRequest(_GenerateKeyRequest):
    tags: Optional[List[str]] = None
 class GenerateKeyResponse(_GenerateKeyRequest):
    key: str
    key_name: Optional[str] = None
    expires: Optional[datetime]
@ -677,9 +681,10 @@ class LiteLLM_ModelTable(LiteLLMBase):
    model_config = ConfigDict(protected_namespaces=())
-class NewUserRequest(GenerateKeyRequest):
+class NewUserRequest(_GenerateKeyRequest):
    max_budget: Optional[float] = None
    user_email: Optional[str] = None
    user_alias: Optional[str] = None
    user_role: Optional[
        Literal[
            LitellmUserRoles.PROXY_ADMIN,
@ -713,6 +718,7 @@ class NewUserResponse(GenerateKeyResponse):
    ] = None
    teams: Optional[list] = None
    organization_id: Optional[str] = None
    user_alias: Optional[str] = None
 class UpdateUserRequest(GenerateRequestBase):
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -156,6 +156,7 @@ async def new_user(
        user_id=response["user_id"],
        user_role=response.get("user_role", None),
        user_email=response.get("user_email", None),
        user_alias=response.get("user_alias", None),
        teams=response.get("teams", None),
        team_id=response.get("team_id", None),
        metadata=response.get("metadata", None),
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@ -202,6 +202,15 @@ async def generate_key_fn(
        if "budget_duration" in data_json:
            data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
        # Set tags on the new key
        if "tags" in data_json:
            if data_json["metadata"] is None:
                data_json["metadata"] = {"tags": data_json["tags"]}
            else:
                data_json["metadata"]["tags"] = data_json["tags"]
            data_json.pop("tags")
        response = await generate_key_helper_fn(
            request_type="key", **data_json, table_name="key"
        )
@ -257,12 +266,11 @@ async def generate_key_fn(
        return GenerateKeyResponse(**response)
    except Exception as e:
-        verbose_proxy_logger.error(
+        verbose_proxy_logger.exception(
            "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
                str(e)
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())
        if isinstance(e, HTTPException):
            raise ProxyException(
                message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -731,6 +739,7 @@ async def generate_key_helper_fn(
        str
    ] = None,  # dev-friendly alt param for 'token'. Exposed on `/key/generate` for setting key value yourself.
    user_id: Optional[str] = None,
    user_alias: Optional[str] = None,
    team_id: Optional[str] = None,
    user_email: Optional[str] = None,
    user_role: Optional[str] = None,
@ -816,6 +825,7 @@ async def generate_key_helper_fn(
            "max_budget": max_budget,
            "user_email": user_email,
            "user_id": user_id,
            "user_alias": user_alias,
            "team_id": team_id,
            "organization_id": organization_id,
            "user_role": user_role,
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@ -1221,11 +1221,37 @@ def test_completion_cost_anthropic_prompt_caching():
    assert cost_1 > cost_2
-def test_completion_cost_databricks():
+@pytest.mark.parametrize(
-    model, messages = "databricks/databricks-dbrx-instruct", [
+    "model",
-        {"role": "user", "content": "What is 2+2?"}
+    [
-    ]
+        "databricks/databricks-meta-llama-3-1-70b-instruct",
        "databricks/databricks-meta-llama-3-70b-instruct",
        "databricks/databricks-dbrx-instruct",
        "databricks/databricks-mixtral-8x7b-instruct",
    ],
 )
 def test_completion_cost_databricks(model):
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")
    model, messages = model, [{"role": "user", "content": "What is 2+2?"}]
    resp = litellm.completion(model=model, messages=messages)  # works fine
    print(resp)
    cost = completion_cost(completion_response=resp)
@pytest.mark.parametrize(
    "model",
    [
        "databricks/databricks-bge-large-en",
        "databricks/databricks-gte-large-en",
    ],
 )
 def test_completion_cost_databricks_embedding(model):
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")
    resp = litellm.embedding(model=model, input=["hey, how's it going?"])  # works fine
    print(resp)
    cost = completion_cost(completion_response=resp)
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -2762,6 +2762,7 @@ async def test_generate_key_with_model_tpm_limit(prisma_client):
        "team": "litellm-team3",
        "model_tpm_limit": {"gpt-4": 100},
        "model_rpm_limit": {"gpt-4": 2},
        "tags": None,
    }
    # Update model tpm_limit and rpm_limit
@ -2782,6 +2783,7 @@ async def test_generate_key_with_model_tpm_limit(prisma_client):
        "team": "litellm-team3",
        "model_tpm_limit": {"gpt-4": 200},
        "model_rpm_limit": {"gpt-4": 3},
        "tags": None,
    }
@ -2818,6 +2820,7 @@ async def test_generate_key_with_guardrails(prisma_client):
    assert result["info"]["metadata"] == {
        "team": "litellm-team3",
        "guardrails": ["aporia-pre-call"],
        "tags": None,
    }
    # Update model tpm_limit and rpm_limit
@ -2836,6 +2839,7 @@ async def test_generate_key_with_guardrails(prisma_client):
    assert result["info"]["metadata"] == {
        "team": "litellm-team3",
        "guardrails": ["aporia-pre-call", "aporia-post-call"],
        "tags": None,
    }
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@ -210,7 +210,6 @@ def test_stream_chunk_builder_litellm_mixed_calls():
    assert len(response.choices[0].message.tool_calls) == 1
    assert response.choices[0].message.tool_calls[0].to_dict() == {
        "index": 1,
        "function": {
            "arguments": '{"query": "SELECT COUNT(*) FROM users;"}',
            "name": "sql_query",
@ -226,3 +225,400 @@ def test_stream_chunk_builder_litellm_empty_chunks():
    response = stream_chunk_builder(chunks=[])
    assert response is None
 def test_stream_chunk_builder_multiple_tool_calls():
    init_chunks = [
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "id": "call_X9P9B6STj7ze8OsJCGkfoN94",
                                "function": {"arguments": "", "name": "exponentiate"},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": '{"ba'},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": 'se": '},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": '3, "ex'},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": "pone"},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": 'nt": '},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": "5}"},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "id": "call_Qq8yDeRx7v276abRcLrYORdW",
                                "function": {"arguments": "", "name": "add"},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": '{"fi'},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": "rst_i"},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": 'nt": 1'},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": '2, "'},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": "secon"},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": 'd_int"'},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": ": 3}"},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [{"finish_reason": "tool_calls", "index": 0, "delta": {}}],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
    ]
    chunks = []
    for chunk in init_chunks:
        chunks.append(litellm.ModelResponse(**chunk, stream=True))
    response = stream_chunk_builder(chunks=chunks)
    print(f"Returned response: {response}")
    completed_response = {
        "id": "chatcmpl-A61mXjvcRX0Xr2IiojN9TPiy1P3Fm",
        "choices": [
            {
                "finish_reason": "tool_calls",
                "index": 0,
                "message": {
                    "content": None,
                    "role": "assistant",
                    "tool_calls": [
                        {
                            "function": {
                                "arguments": '{"base": 3, "exponent": 5}',
                                "name": "exponentiate",
                            },
                            "id": "call_X9P9B6STj7ze8OsJCGkfoN94",
                            "type": "function",
                        },
                        {
                            "function": {
                                "arguments": '{"first_int": 12, "second_int": 3}',
                                "name": "add",
                            },
                            "id": "call_Qq8yDeRx7v276abRcLrYORdW",
                            "type": "function",
                        },
                    ],
                    "function_call": None,
                },
            }
        ],
        "created": 1726000181,
        "model": "gpt-4o-2024-05-13",
        "object": "chat.completion",
        "system_fingerprint": "fp_25624ae3a5",
        "usage": {"completion_tokens": 55, "prompt_tokens": 127, "total_tokens": 182},
        "service_tier": None,
    }
    expected_response = litellm.ModelResponse(**completed_response)
    print(f"\n\nexpected_response:\n{expected_response}\n\n")
    assert (
        expected_response.choices == response.choices
    ), "\nGot={}\n, Expected={}\n".format(response.choices, expected_response.choices)
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -325,7 +325,7 @@ class Message(OpenAIObject):
    ):
        init_values = {
            "content": content,
-            "role": "assistant",
+            "role": role or "assistant",  # handle null input
            "function_call": (
                FunctionCall(**function_call) if function_call is not None else None
            ),
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -5492,90 +5492,129 @@
        "max_input_tokens": 128000,
        "max_output_tokens": 128000, 
        "input_cost_per_token": 0.000005,
-        "output_cost_per_token": 0.000015,
+        "input_dbu_cost_per_token": 0.000071429,
        "output_cost_per_token": 0.00001500002,
        "output_db_cost_per_token": 0.000214286,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-meta-llama-3-1-70b-instruct": {
        "max_tokens": 128000,
        "max_input_tokens": 128000,
        "max_output_tokens": 128000, 
-        "input_cost_per_token": 0.000001,
+        "input_cost_per_token": 0.00000100002,
-        "output_cost_per_token": 0.000003,
+        "input_dbu_cost_per_token": 0.000014286,
        "output_cost_per_token": 0.00000299999,
        "output_dbu_cost_per_token": 0.000042857,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-dbrx-instruct": {
        "max_tokens": 32768,
        "max_input_tokens": 32768,
        "max_output_tokens": 32768, 
-        "input_cost_per_token": 0.00000075,
+        "input_cost_per_token": 0.00000074998,
-        "output_cost_per_token": 0.00000225,
+        "input_dbu_cost_per_token": 0.000010714,
        "output_cost_per_token": 0.00000224901,
        "output_dbu_cost_per_token": 0.000032143,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-meta-llama-3-70b-instruct": {
-        "max_tokens": 8192,
+        "max_tokens": 128000,
-        "max_input_tokens": 8192,
+        "max_input_tokens": 128000,
-        "max_output_tokens": 8192, 
+        "max_output_tokens": 128000, 
-        "input_cost_per_token": 0.000001,
+        "input_cost_per_token": 0.00000100002,
-        "output_cost_per_token": 0.000003,
+        "input_dbu_cost_per_token": 0.000014286,
        "output_cost_per_token": 0.00000299999,
        "output_dbu_cost_per_token": 0.000042857,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-llama-2-70b-chat": {
        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096, 
-        "input_cost_per_token": 0.0000005,
+        "input_cost_per_token": 0.00000050001,
        "input_dbu_cost_per_token": 0.000007143,
        "output_cost_per_token": 0.0000015,
        "output_dbu_cost_per_token": 0.000021429,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
-
+        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-mixtral-8x7b-instruct": {
        "max_tokens": 4096,
        "max_input_tokens": 4096,
        "max_output_tokens": 4096, 
-        "input_cost_per_token": 0.0000005,
+        "input_cost_per_token": 0.00000050001,
-        "output_cost_per_token": 0.000001,
+        "input_dbu_cost_per_token": 0.000007143,
        "output_cost_per_token": 0.00000099902,
        "output_dbu_cost_per_token": 0.000014286,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-mpt-30b-instruct": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192, 
-        "input_cost_per_token": 0.000001,
+        "input_cost_per_token": 0.00000099902,
-        "output_cost_per_token": 0.000001,
+        "input_dbu_cost_per_token": 0.000014286,
        "output_cost_per_token": 0.00000099902,
        "output_dbu_cost_per_token": 0.000014286,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-mpt-7b-instruct": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "max_output_tokens": 8192, 
-        "input_cost_per_token": 0.0000005,
+        "input_cost_per_token": 0.00000050001,
-        "output_cost_per_token": 0.0000005,
+        "input_dbu_cost_per_token": 0.000007143,
        "output_cost_per_token": 0.0,
        "output_dbu_cost_per_token": 0.0,
        "litellm_provider": "databricks",
        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-bge-large-en": {
        "max_tokens": 512,
        "max_input_tokens": 512,
        "output_vector_size": 1024, 
-        "input_cost_per_token": 0.0000001,
+        "input_cost_per_token": 0.00000010003,
        "input_dbu_cost_per_token": 0.000001429,
        "output_cost_per_token": 0.0,
        "output_dbu_cost_per_token": 0.0,
        "litellm_provider": "databricks",
        "mode": "embedding",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving"
+        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    },
    "databricks/databricks-gte-large-en": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
        "output_vector_size": 1024, 
        "input_cost_per_token": 0.00000012999,
        "input_dbu_cost_per_token": 0.000001857,
        "output_cost_per_token": 0.0,
        "output_dbu_cost_per_token": 0.0,
        "litellm_provider": "databricks",
        "mode": "embedding",
        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
    }
 }