Merge branch 'main' into litellm_auth_fix

2025-04-27 11:43:54 +00:00 · 2024-08-08 17:14:16 -07:00 · 2024-08-08 17:14:16 -07:00 · ced4582ecb
commit ced4582ecb
parent dd93e425ad d6d78f69fb
24 changed files with 483 additions and 59 deletions
--- a/README.md
+++ b/README.md
@ -11,7 +11,7 @@
        <p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
        <br>
    </p>
-<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
+<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
 <h4 align="center">
    <a href="https://pypi.org/project/litellm/" target="_blank">
        <img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
@ -35,9 +35,9 @@ LiteLLM manages:
 - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
 - [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
 - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
+- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)

-[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
+[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
 [**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)

 🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. 
@ -134,7 +134,7 @@ litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log in
 response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
 ```

-# OpenAI Proxy - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
+# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))

 Track spend + Load Balance across multiple projects

--- a/docs/my-website/docs/index.md
+++ b/docs/my-website/docs/index.md
@ -14,7 +14,7 @@ https://github.com/BerriAI/litellm

 ## How to use LiteLLM
 You can use litellm through either:
-1. [LiteLLM Proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
+1. [LiteLLM Proxy Server](#openai-proxy) - Server (LLM Gateway) to call 100+ LLMs, load balance, cost tracking across projects
 2. [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking

 ### When to use LiteLLM Proxy Server
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -427,6 +427,105 @@ print(resp)
 ```


+### **Context Caching**
+
+Use Vertex AI Context Caching
+
+[**Relevant VertexAI Docs**](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview)
+
+<Tabs>
+
+<TabItem value="proxy" label="LiteLLM PROXY">
+
+1. Add model to config.yaml
+```yaml
+model_list:
+  # used for /chat/completions, /completions, /embeddings endpoints
+  - model_name: gemini-1.5-pro-001
+    litellm_params:
+      model: vertex_ai_beta/gemini-1.5-pro-001
+      vertex_project: "project-id"
+      vertex_location: "us-central1"
+      vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
+
+# used for the /cachedContent and vertexAI native endpoints
+default_vertex_config:
+  vertex_project: "adroit-crow-413218"
+  vertex_location: "us-central1"
+  vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
+
+```
+
+2. Start Proxy 
+
+```
+$ litellm --config /path/to/config.yaml
+```
+
+3. Make Request!
+
+- First create a cachedContents object by calling the Vertex `cachedContents` endpoint. [VertexAI API Ref for cachedContents endpoint](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest). (LiteLLM proxy forwards the `/cachedContents` request to the VertexAI API)
+- Use the `cachedContents` object in your /chat/completions request to vertexAI
+
+```python
+import datetime
+import openai
+import httpx
+
+# Set Litellm proxy variables here
+LITELLM_BASE_URL = "http://0.0.0.0:4000"
+LITELLM_PROXY_API_KEY = "sk-1234"
+
+client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
+httpx_client = httpx.Client(timeout=30)
+
+################################
+# First create a cachedContents object
+# this request gets forwarded as is to: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
+print("creating cached content")
+create_cache = httpx_client.post(
+    url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
+    headers = {"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
+    json = {
+        "model": "gemini-1.5-pro-001",
+        "contents": [
+            {
+                "role": "user",
+                "parts": [{
+                    "text": "This is sample text to demonstrate explicit caching."*4000
+                }]
+            }
+        ],
+    }
+)
+print("response from create_cache", create_cache)
+create_cache_response = create_cache.json()
+print("json from create_cache", create_cache_response)
+cached_content_name = create_cache_response["name"]
+
+#################################
+# Use the `cachedContents` object in your /chat/completions
+response = client.chat.completions.create(  # type: ignore
+    model="gemini-1.5-pro-001",
+    max_tokens=8192,
+    messages=[
+        {
+            "role": "user",
+            "content": "what is the sample text about?",
+        },
+    ],
+    temperature="0.7",
+    extra_body={"cached_content": cached_content_name}, # 👈 key change
+)
+
+print("response from proxy", response)
+
+```
+
+</TabItem>
+</Tabs>
+
+
 ## Pre-requisites
 * `pip install google-cloud-aiplatform` (pre-installed on proxy docker image)
 * Authentication: 
--- a/docs/my-website/docs/proxy/quick_start.md
+++ b/docs/my-website/docs/proxy/quick_start.md
@ -5,7 +5,7 @@ import TabItem from '@theme/TabItem';
 # Quick Start
 Quick start CLI, Config, Docker

-LiteLLM Server manages:
+LiteLLM Server (LLM Gateway) manages:

 * **Unified Interface**: Calling 100+ LLMs [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI `ChatCompletions` & `Completions` format
 * **Cost tracking**: Authentication, Spend Tracking & Budgets [Virtual Keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
--- a/docs/my-website/docs/vertex_ai.md
+++ b/docs/my-website/docs/vertex_ai.md
@ -1,5 +1,11 @@
 # [BETA] Vertex AI Endpoints

+:::tip
+
+Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../docs/providers/vertex.md)
+
+:::
+
 ## Supported API Endpoints

 - Gemini API
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -24,7 +24,7 @@ const sidebars = {
      link: {
        type: "generated-index",
        title: "💥 LiteLLM Proxy Server",
-        description: `OpenAI Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
+        description: `OpenAI Proxy Server (LLM Gateway) to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
        slug: "/simple_proxy",
      },
      items: [
--- a/litellm/init.py
+++ b/litellm/init.py
@ -261,6 +261,7 @@ default_user_params: Optional[Dict] = None
 default_team_settings: Optional[List] = None
 max_user_budget: Optional[float] = None
 max_internal_user_budget: Optional[float] = None
+internal_user_budget_duration: Optional[str] = None
 max_end_user_budget: Optional[float] = None
 #### REQUEST PRIORITIZATION ####
 priority_reservation: Optional[Dict[str, float]] = None
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@ -90,7 +90,13 @@ class ServiceLogging(CustomLogger):
                    )

    async def init_prometheus_services_logger_if_none(self):
-        if self.prometheusServicesLogger is None:
+        """
+        initializes prometheusServicesLogger if it is None or no attribute exists on ServiceLogging Object
+
+        """
+        if not hasattr(self, "prometheusServicesLogger"):
+            self.prometheusServicesLogger = PrometheusServicesLogger()
+        elif self.prometheusServicesLogger is None:
            self.prometheusServicesLogger = self.prometheusServicesLogger()
        return

--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -1,6 +1,9 @@
 # What is this?
 ## Helper utilities
-from typing import List, Literal, Optional, Tuple
+import os
+from typing import BinaryIO, List, Literal, Optional, Tuple
+
+from litellm._logging import verbose_logger


 def map_finish_reason(
@ -83,3 +86,20 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
            return kwargs["litellm_parent_otel_span"]
    except:
        return None
+
+
+def get_file_check_sum(_file: BinaryIO):
+    """
+    Helper to safely get file checksum - used as a cache key
+    """
+    try:
+        file_descriptor = _file.fileno()
+        file_stat = os.fstat(file_descriptor)
+        file_size = str(file_stat.st_size)
+        file_checksum = _file.name + file_size
+        return file_checksum
+    except Exception as e:
+        verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
+        file_checksum = _file.name
+        return file_checksum
+    return file_checksum
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -287,6 +287,9 @@ class AnthropicConfig:
            if user_message is not None:
                new_messages.append(user_message)

+            if len(new_user_content_list) > 0:
+                new_messages.append({"role": "user", "content": new_user_content_list})
+
            if len(tool_message_list) > 0:
                new_messages.extend(tool_message_list)

--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@ -278,6 +278,14 @@ class VertexFineTuningAPI(VertexLLM):
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        elif "countTokens" in request_route:
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
+        elif "cachedContents" in request_route:
+            _model = request_data.get("model")
+            if _model is not None and "/publishers/google/models/" not in _model:
+                request_data["model"] = (
+                    f"projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{_model}"
+                )
+
+            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        else:
            raise ValueError(f"Unsupported Vertex AI request route: {request_route}")
        if self.async_handler is None:
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -1135,8 +1135,9 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
        return anthropic_tool_result
    if message["role"] == "function":
        content = message.get("content")  # type: ignore
+        tool_call_id = message.get("tool_call_id") or str(uuid.uuid4())
        anthropic_tool_result = AnthropicMessagesToolResultParam(
-            type="tool_result", tool_use_id=str(uuid.uuid4()), content=content
+            type="tool_result", tool_use_id=tool_call_id, content=content
        )

        return anthropic_tool_result
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -881,6 +881,21 @@ class VertexLLM(BaseLLM):

        return self._credentials.token, self.project_id

+    def is_using_v1beta1_features(self, optional_params: dict) -> bool:
+        """
+        VertexAI only supports ContextCaching on v1beta1
+
+        use this helper to decide if request should be sent to v1 or v1beta1
+
+        Returns v1beta1 if context caching is enabled
+        Returns v1 in all other cases
+        """
+        if "cached_content" in optional_params:
+            return True
+        if "CachedContent" in optional_params:
+            return True
+        return False
+
    def _get_token_and_url(
        self,
        model: str,
@ -891,6 +906,7 @@ class VertexLLM(BaseLLM):
        stream: Optional[bool],
        custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
        api_base: Optional[str],
+        should_use_v1beta1_features: Optional[bool] = False,
    ) -> Tuple[Optional[str], str]:
        """
        Internal function. Returns the token and url for the call.
@ -920,12 +936,13 @@ class VertexLLM(BaseLLM):
            vertex_location = self.get_vertex_region(vertex_region=vertex_location)

            ### SET RUNTIME ENDPOINT ###
+            version = "v1beta1" if should_use_v1beta1_features is True else "v1"
            endpoint = "generateContent"
            if stream is True:
                endpoint = "streamGenerateContent"
-                url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
+                url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
            else:
-                url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
+                url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"

        if (
            api_base is not None
@ -1055,6 +1072,9 @@ class VertexLLM(BaseLLM):
    ) -> Union[ModelResponse, CustomStreamWrapper]:
        stream: Optional[bool] = optional_params.pop("stream", None)  # type: ignore

+        should_use_v1beta1_features = self.is_using_v1beta1_features(
+            optional_params=optional_params
+        )
        auth_header, url = self._get_token_and_url(
            model=model,
            gemini_api_key=gemini_api_key,
@ -1064,6 +1084,7 @@ class VertexLLM(BaseLLM):
            stream=stream,
            custom_llm_provider=custom_llm_provider,
            api_base=api_base,
+            should_use_v1beta1_features=should_use_v1beta1_features,
        )

        ## TRANSFORMATION ##
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,4 +1,8 @@
 model_list:
-  - model_name: "gpt-4"
+  - model_name: "claude-3-5-sonnet-20240620"
    litellm_params:
-      model: "gpt-4"
+      model: "claude-3-5-sonnet-20240620"
+
+litellm_settings:
+  max_internal_user_budget: 0.001
+  internal_user_budget_duration: "5m"
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -91,6 +91,10 @@ async def new_user(
        if litellm.max_internal_user_budget is not None:
            data_json["max_budget"] = litellm.max_internal_user_budget

+    if "budget_duration" in data_json and data_json["budget_duration"] is None:
+        if litellm.internal_user_budget_duration is not None:
+            data_json["budget_duration"] = litellm.internal_user_budget_duration
+
    response = await generate_key_helper_fn(request_type="user", **data_json)

    # Admin UI Logic
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -3,20 +3,14 @@ model_list:
    litellm_params:
      model: openai/fake
      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railwaz.app/
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
  - model_name: fireworks-llama-v3-70b-instruct
    litellm_params:
      model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
      api_key: "os.environ/FIREWORKS"
-  # provider specific wildcard routing
-  - model_name: "anthropic/*"
+  - model_name: "*"
    litellm_params:
-      model: "anthropic/*"
-      api_key: os.environ/ANTHROPIC_API_KEY
-  - model_name: "groq/*"
-    litellm_params:
-      model: "groq/*"
-      api_key: os.environ/GROQ_API_KEY
+      model: "*"
  - model_name: "*"
    litellm_params:
      model: openai/*
@ -25,37 +19,22 @@ model_list:
    litellm_params:
      model: mistral/mistral-small-latest
      api_key: "os.environ/MISTRAL_API_KEY"
-  - model_name: tts
+  - model_name: gemini-1.5-pro-001
    litellm_params:
-      model: openai/tts-1
-      api_key: "os.environ/OPENAI_API_KEY"
-    model_info:
-      mode: audio_speech
-
-
-# for /files endpoints
-files_settings:
-  - custom_llm_provider: azure
-    api_base: https://exampleopenaiendpoint-production.up.railway.app
-    api_key: fake-key
-    api_version: "2023-03-15-preview"
-  - custom_llm_provider: openai
-    api_key: os.environ/OPENAI_API_KEY
+      model: vertex_ai_beta/gemini-1.5-pro-001
+      vertex_project: "adroit-crow-413218"
+      vertex_location: "us-central1"
+      vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json"
+ # Add path to service account.json

+default_vertex_config:
+  vertex_project: "adroit-crow-413218"
+  vertex_location: "us-central1"
+  vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json


 general_settings: 
  master_key: sk-1234
-  pass_through_endpoints:
-    - path: "/v1/rerank"                                  # route you want to add to LiteLLM Proxy Server
-      target: "https://api.cohere.com/v1/rerank"          # URL this route should forward requests to
-      headers:                                            # headers to forward to this URL
-        content-type: application/json                    # (Optional) Extra Headers to pass to this endpoint 
-        accept: application/json
-      forward_headers: True
-

 litellm_settings:
  callbacks: ["otel"] # 👈 KEY CHANGE
-  success_callback: ["prometheus"]
-  failure_callback: ["prometheus"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -5374,7 +5374,13 @@ async def anthropic_response(
    litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]

    global user_temperature, user_request_timeout, user_max_tokens, user_api_base
-    data: dict = {**anthropic_data, "adapter_id": "anthropic"}
+    body = await request.body()
+    body_str = body.decode()
+    try:
+        request_data: dict = ast.literal_eval(body_str)
+    except Exception:
+        request_data = json.loads(body_str)
+    data: dict = {**request_data, "adapter_id": "anthropic"}
    try:
        data["model"] = (
            general_settings.get("completion_model", None)  # server default
--- a/litellm/proxy/tests/test_gemini_context_caching.py
+++ b/litellm/proxy/tests/test_gemini_context_caching.py
@ -0,0 +1,54 @@
+import datetime
+
+import httpx
+import openai
+
+# Set Litellm proxy variables here
+LITELLM_BASE_URL = "http://0.0.0.0:4000"
+LITELLM_PROXY_API_KEY = "sk-1234"
+
+client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
+httpx_client = httpx.Client(timeout=30)
+
+################################
+# First create a cachedContents object
+print("creating cached content")
+create_cache = httpx_client.post(
+    url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
+    headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
+    json={
+        "model": "gemini-1.5-pro-001",
+        "contents": [
+            {
+                "role": "user",
+                "parts": [
+                    {
+                        "text": "This is sample text to demonstrate explicit caching."
+                        * 4000
+                    }
+                ],
+            }
+        ],
+    },
+)
+print("response from create_cache", create_cache)
+create_cache_response = create_cache.json()
+print("json from create_cache", create_cache_response)
+cached_content_name = create_cache_response["name"]
+
+#################################
+# Use the `cachedContents` object in your /chat/completions
+response = client.chat.completions.create(  # type: ignore
+    model="gemini-1.5-pro-001",
+    max_tokens=8192,
+    messages=[
+        {
+            "role": "user",
+            "content": "what is the sample text about?",
+        },
+    ],
+    temperature="0.7",
+    extra_body={"cached_content": cached_content_name},  # 👈 key change
+)
+
+print("response from proxy", response)
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@ -303,3 +303,30 @@ async def vertex_cancel_fine_tuning_job(
        return response
    except Exception as e:
        raise exception_handler(e) from e
+
+
+@router.post(
+    "/vertex-ai/cachedContents",
+    dependencies=[Depends(user_api_key_auth)],
+    tags=["Vertex AI endpoints"],
+)
+async def vertex_create_add_cached_content(
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    this is a pass through endpoint for the Vertex AI API. /cachedContents endpoint
+
+    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
+
+    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
+    """
+    try:
+        response = await execute_post_vertex_ai_request(
+            request=request,
+            route="/cachedContents",
+        )
+        return response
+    except Exception as e:
+        raise exception_handler(e) from e
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -1969,3 +1969,58 @@ def test_prompt_factory_nested():
        assert isinstance(
            message["parts"][0]["text"], str
        ), "'text' value not a string."
+
+
+def test_get_token_url():
+    from litellm.llms.vertex_httpx import VertexLLM
+
+    vertex_llm = VertexLLM()
+    vertex_ai_project = "adroit-crow-413218"
+    vertex_ai_location = "us-central1"
+    json_obj = get_vertex_ai_creds_json()
+    vertex_credentials = json.dumps(json_obj)
+
+    should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
+        optional_params={"cached_content": "hi"}
+    )
+
+    assert should_use_v1beta1_features is True
+
+    _, url = vertex_llm._get_token_and_url(
+        vertex_project=vertex_ai_project,
+        vertex_location=vertex_ai_location,
+        vertex_credentials=vertex_credentials,
+        gemini_api_key="",
+        custom_llm_provider="vertex_ai_beta",
+        should_use_v1beta1_features=should_use_v1beta1_features,
+        api_base=None,
+        model="",
+        stream=False,
+    )
+
+    print("url=", url)
+
+    assert "/v1beta1/" in url
+
+    should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
+        optional_params={"temperature": 0.1}
+    )
+
+    _, url = vertex_llm._get_token_and_url(
+        vertex_project=vertex_ai_project,
+        vertex_location=vertex_ai_location,
+        vertex_credentials=vertex_credentials,
+        gemini_api_key="",
+        custom_llm_provider="vertex_ai_beta",
+        should_use_v1beta1_features=should_use_v1beta1_features,
+        api_base=None,
+        model="",
+        stream=False,
+    )
+
+    print("url for normal request", url)
+
+    assert "v1beta1" not in url
+    assert "/v1/" in url
+
+    pass
--- a/litellm/tests/test_anthropic_completion.py
+++ b/litellm/tests/test_anthropic_completion.py
@ -183,3 +183,96 @@ async def test_anthropic_router_completion_e2e():
    assert isinstance(response, AnthropicResponse)

    assert response.model == "gpt-3.5-turbo"
+
+
+def test_anthropic_tool_calling_translation():
+    kwargs = {
+        "model": "claude-3-5-sonnet-20240620",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Would development of a software platform be under ASC 350-40 or ASC 985?",
+                    }
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "tool_use",
+                        "id": "37d6f703-cbcc-497d-95a1-2aa24a114adc",
+                        "name": "TaskPlanningTool",
+                        "input": {
+                            "completed_steps": [],
+                            "next_steps": [
+                                {
+                                    "tool_name": "AccountingResearchTool",
+                                    "description": "Research ASC 350-40 to understand its scope and applicability to software development.",
+                                },
+                                {
+                                    "tool_name": "AccountingResearchTool",
+                                    "description": "Research ASC 985 to understand its scope and applicability to software development.",
+                                },
+                                {
+                                    "tool_name": "AccountingResearchTool",
+                                    "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
+                                },
+                            ],
+                            "learnings": [],
+                            "potential_issues": [
+                                "The distinction between the two standards might not be clear-cut for all types of software development.",
+                                "There might be specific circumstances or details about the software platform that could affect which standard applies.",
+                            ],
+                            "missing_info": [
+                                "Specific details about the type of software platform being developed (e.g., for internal use or for sale).",
+                                "Whether the entity developing the software is also the end-user or if it's being developed for external customers.",
+                            ],
+                            "done": False,
+                            "required_formatting": None,
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "eb7023b1-5ee8-43b8-b90f-ac5a23d37c31",
+                        "content": {
+                            "completed_steps": [],
+                            "next_steps": [
+                                {
+                                    "tool_name": "AccountingResearchTool",
+                                    "description": "Research ASC 350-40 to understand its scope and applicability to software development.",
+                                },
+                                {
+                                    "tool_name": "AccountingResearchTool",
+                                    "description": "Research ASC 985 to understand its scope and applicability to software development.",
+                                },
+                                {
+                                    "tool_name": "AccountingResearchTool",
+                                    "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
+                                },
+                            ],
+                            "formatting_step": None,
+                        },
+                    }
+                ],
+            },
+        ],
+    }
+
+    from litellm.adapters.anthropic_adapter import anthropic_adapter
+
+    translated_params = anthropic_adapter.translate_completion_input_params(
+        kwargs=kwargs
+    )
+
+    print(translated_params["messages"])
+
+    assert len(translated_params["messages"]) > 0
+    assert translated_params["messages"][1]["role"] == "user"
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -4405,6 +4405,3 @@ def test_moderation():
    output = response.results[0]
    print(output)
    return output
-
-
-# test_moderation()
--- a/litellm/tests/test_prompt_factory.py
+++ b/litellm/tests/test_prompt_factory.py
@ -219,3 +219,44 @@ def test_base64_image_input(url, expected_media_type):
    response = convert_to_anthropic_image_obj(openai_image_url=url)

    assert response["media_type"] == expected_media_type
+
+
+def test_anthropic_messages_tool_call():
+    messages = [
+        {
+            "role": "user",
+            "content": "Would development of a software platform be under ASC 350-40 or ASC 985?",
+        },
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
+            "tool_calls": [
+                {
+                    "id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
+                    "function": {
+                        "arguments": '{"completed_steps": [], "next_steps": [{"tool_name": "AccountingResearchTool", "description": "Research ASC 350-40 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Research ASC 985 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}], "learnings": [], "potential_issues": ["The distinction between the two standards might not be clear-cut for all types of software development.", "There might be specific circumstances or details about the software platform that could affect which standard applies."], "missing_info": ["Specific details about the type of software platform being developed (e.g., for internal use or for sale).", "Whether the entity developing the software is also the end-user or if it\'s being developed for external customers."], "done": false, "required_formatting": null}',
+                        "name": "TaskPlanningTool",
+                    },
+                    "type": "function",
+                }
+            ],
+        },
+        {
+            "role": "function",
+            "content": '{"completed_steps":[],"next_steps":[{"tool_name":"AccountingResearchTool","description":"Research ASC 350-40 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Research ASC 985 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}],"formatting_step":null}',
+            "name": "TaskPlanningTool",
+            "tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
+        },
+    ]
+
+    translated_messages = anthropic_messages_pt(
+        messages, model="claude-3-sonnet-20240229", llm_provider="anthropic"
+    )
+
+    print(translated_messages)
+
+    assert (
+        translated_messages[-1]["content"][0]["tool_use_id"]
+        == "bc8cb4b6-88c4-4138-8993-3a9d9cd51656"
+    )
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -55,7 +55,10 @@ import litellm._service_logger  # for storing API inputs, outputs, and metadata
 import litellm.litellm_core_utils
 import litellm.litellm_core_utils.json_validation_rule
 from litellm.caching import DualCache
-from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.litellm_core_utils.core_helpers import (
+    get_file_check_sum,
+    map_finish_reason,
+)
 from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
 from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
 from litellm.litellm_core_utils.redact_messages import (
@ -557,12 +560,8 @@ def function_setup(
            or call_type == CallTypes.transcription.value
        ):
            _file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
-            file_name = getattr(_file_name, "name", "audio_file")
-            file_descriptor = _file_name.fileno()
-            file_stat = os.fstat(file_descriptor)
-            file_size = str(file_stat.st_size)
-
-            file_checksum = _file_name.name + file_size
+            file_checksum = get_file_check_sum(_file=_file_name)
+            file_name = _file_name.name
            if "metadata" in kwargs:
                kwargs["metadata"]["file_checksum"] = file_checksum
            else: