Merge branch 'main' into litellm_auth_fix

2025-04-28 04:04:31 +00:00 · 2024-08-08 17:14:16 -07:00 · 2024-08-08 17:14:16 -07:00 · ced4582ecb
commit ced4582ecb
parent dd93e425ad d6d78f69fb
24 changed files with 483 additions and 59 deletions
--- a/README.md
+++ b/README.md
@ -11,7 +11,7 @@
        <p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
        <br>
    </p>
-<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
+<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
 <h4 align="center">
    <a href="https://pypi.org/project/litellm/" target="_blank">
        <img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
@ -35,9 +35,9 @@ LiteLLM manages:
 - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
 - [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
 - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
+- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)
-[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
+[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
 [**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
 🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. 
@ -134,7 +134,7 @@ litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log in
 response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
 ```
-# OpenAI Proxy - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
+# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
 Track spend + Load Balance across multiple projects
--- a/docs/my-website/docs/index.md
+++ b/docs/my-website/docs/index.md
@ -14,7 +14,7 @@ https://github.com/BerriAI/litellm
 ## How to use LiteLLM
 You can use litellm through either:
-1. [LiteLLM Proxy Server](#openai-proxy) - Server to call 100+ LLMs, load balance, cost tracking across projects
+1. [LiteLLM Proxy Server](#openai-proxy) - Server (LLM Gateway) to call 100+ LLMs, load balance, cost tracking across projects
 2. [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking
 ### When to use LiteLLM Proxy Server
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -427,6 +427,105 @@ print(resp)
 ```
 ### **Context Caching**
 Use Vertex AI Context Caching
 [**Relevant VertexAI Docs**](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-overview)
 <Tabs>
 <TabItem value="proxy" label="LiteLLM PROXY">
 1. Add model to config.yaml
 ```yaml
 model_list:
  # used for /chat/completions, /completions, /embeddings endpoints
  - model_name: gemini-1.5-pro-001
    litellm_params:
      model: vertex_ai_beta/gemini-1.5-pro-001
      vertex_project: "project-id"
      vertex_location: "us-central1"
      vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
 # used for the /cachedContent and vertexAI native endpoints
 default_vertex_config:
  vertex_project: "adroit-crow-413218"
  vertex_location: "us-central1"
  vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
 ```
 2. Start Proxy 
 ```
 $ litellm --config /path/to/config.yaml
 ```
 3. Make Request!
 - First create a cachedContents object by calling the Vertex `cachedContents` endpoint. [VertexAI API Ref for cachedContents endpoint](https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest). (LiteLLM proxy forwards the `/cachedContents` request to the VertexAI API)
 - Use the `cachedContents` object in your /chat/completions request to vertexAI
 ```python
 import datetime
 import openai
 import httpx
 # Set Litellm proxy variables here
 LITELLM_BASE_URL = "http://0.0.0.0:4000"
 LITELLM_PROXY_API_KEY = "sk-1234"
 client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
 httpx_client = httpx.Client(timeout=30)
 ################################
 # First create a cachedContents object
 # this request gets forwarded as is to: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
 print("creating cached content")
 create_cache = httpx_client.post(
    url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
    headers = {"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
    json = {
        "model": "gemini-1.5-pro-001",
        "contents": [
            {
                "role": "user",
                "parts": [{
                    "text": "This is sample text to demonstrate explicit caching."*4000
                }]
            }
        ],
    }
 )
 print("response from create_cache", create_cache)
 create_cache_response = create_cache.json()
 print("json from create_cache", create_cache_response)
 cached_content_name = create_cache_response["name"]
 #################################
 # Use the `cachedContents` object in your /chat/completions
 response = client.chat.completions.create(  # type: ignore
    model="gemini-1.5-pro-001",
    max_tokens=8192,
    messages=[
        {
            "role": "user",
            "content": "what is the sample text about?",
        },
    ],
    temperature="0.7",
    extra_body={"cached_content": cached_content_name}, # 👈 key change
 )
 print("response from proxy", response)
 ```
 </TabItem>
 </Tabs>
 ## Pre-requisites
 * `pip install google-cloud-aiplatform` (pre-installed on proxy docker image)
 * Authentication: 
--- a/docs/my-website/docs/proxy/quick_start.md
+++ b/docs/my-website/docs/proxy/quick_start.md
@ -5,7 +5,7 @@ import TabItem from '@theme/TabItem';
 # Quick Start
 Quick start CLI, Config, Docker
-LiteLLM Server manages:
+LiteLLM Server (LLM Gateway) manages:
 * **Unified Interface**: Calling 100+ LLMs [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI `ChatCompletions` & `Completions` format
 * **Cost tracking**: Authentication, Spend Tracking & Budgets [Virtual Keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
--- a/docs/my-website/docs/vertex_ai.md
+++ b/docs/my-website/docs/vertex_ai.md
@ -1,5 +1,11 @@
 # [BETA] Vertex AI Endpoints
 :::tip
 Looking for the Unified API (OpenAI format) for VertexAI ? [Go here - using vertexAI with LiteLLM SDK or LiteLLM Proxy Server](../docs/providers/vertex.md)
 :::
 ## Supported API Endpoints
 - Gemini API
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -24,7 +24,7 @@ const sidebars = {
      link: {
        type: "generated-index",
        title: "💥 LiteLLM Proxy Server",
-        description: `OpenAI Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
+        description: `OpenAI Proxy Server (LLM Gateway) to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
        slug: "/simple_proxy",
      },
      items: [
--- a/litellm/init.py
+++ b/litellm/init.py
@ -261,6 +261,7 @@ default_user_params: Optional[Dict] = None
 default_team_settings: Optional[List] = None
 max_user_budget: Optional[float] = None
 max_internal_user_budget: Optional[float] = None
 internal_user_budget_duration: Optional[str] = None
 max_end_user_budget: Optional[float] = None
 #### REQUEST PRIORITIZATION ####
 priority_reservation: Optional[Dict[str, float]] = None
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@ -90,7 +90,13 @@ class ServiceLogging(CustomLogger):
                    )
    async def init_prometheus_services_logger_if_none(self):
-        if self.prometheusServicesLogger is None:
+        """
        initializes prometheusServicesLogger if it is None or no attribute exists on ServiceLogging Object
        """
        if not hasattr(self, "prometheusServicesLogger"):
            self.prometheusServicesLogger = PrometheusServicesLogger()
        elif self.prometheusServicesLogger is None:
            self.prometheusServicesLogger = self.prometheusServicesLogger()
        return
--- a/litellm/litellm_core_utils/core_helpers.py
+++ b/litellm/litellm_core_utils/core_helpers.py
@ -1,6 +1,9 @@
 # What is this?
 ## Helper utilities
-from typing import List, Literal, Optional, Tuple
+import os
 from typing import BinaryIO, List, Literal, Optional, Tuple
 from litellm._logging import verbose_logger
 def map_finish_reason(
@ -83,3 +86,20 @@ def _get_parent_otel_span_from_kwargs(kwargs: Optional[dict] = None):
            return kwargs["litellm_parent_otel_span"]
    except:
        return None
 def get_file_check_sum(_file: BinaryIO):
    """
    Helper to safely get file checksum - used as a cache key
    """
    try:
        file_descriptor = _file.fileno()
        file_stat = os.fstat(file_descriptor)
        file_size = str(file_stat.st_size)
        file_checksum = _file.name + file_size
        return file_checksum
    except Exception as e:
        verbose_logger.error(f"Error getting file_checksum: {(str(e))}")
        file_checksum = _file.name
        return file_checksum
    return file_checksum
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -287,6 +287,9 @@ class AnthropicConfig:
            if user_message is not None:
                new_messages.append(user_message)
            if len(new_user_content_list) > 0:
                new_messages.append({"role": "user", "content": new_user_content_list})
            if len(tool_message_list) > 0:
                new_messages.extend(tool_message_list)
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@ -278,6 +278,14 @@ class VertexFineTuningAPI(VertexLLM):
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        elif "countTokens" in request_route:
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        elif "cachedContents" in request_route:
            _model = request_data.get("model")
            if _model is not None and "/publishers/google/models/" not in _model:
                request_data["model"] = (
                    f"projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{_model}"
                )
            url = f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}{request_route}"
        else:
            raise ValueError(f"Unsupported Vertex AI request route: {request_route}")
        if self.async_handler is None:
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -1135,8 +1135,9 @@ def convert_to_anthropic_tool_result(message: dict) -> AnthropicMessagesToolResu
        return anthropic_tool_result
    if message["role"] == "function":
        content = message.get("content")  # type: ignore
        tool_call_id = message.get("tool_call_id") or str(uuid.uuid4())
        anthropic_tool_result = AnthropicMessagesToolResultParam(
-            type="tool_result", tool_use_id=str(uuid.uuid4()), content=content
+            type="tool_result", tool_use_id=tool_call_id, content=content
        )
        return anthropic_tool_result
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -881,6 +881,21 @@ class VertexLLM(BaseLLM):
        return self._credentials.token, self.project_id
    def is_using_v1beta1_features(self, optional_params: dict) -> bool:
        """
        VertexAI only supports ContextCaching on v1beta1
        use this helper to decide if request should be sent to v1 or v1beta1
        Returns v1beta1 if context caching is enabled
        Returns v1 in all other cases
        """
        if "cached_content" in optional_params:
            return True
        if "CachedContent" in optional_params:
            return True
        return False
    def _get_token_and_url(
        self,
        model: str,
@ -891,6 +906,7 @@ class VertexLLM(BaseLLM):
        stream: Optional[bool],
        custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
        api_base: Optional[str],
        should_use_v1beta1_features: Optional[bool] = False,
    ) -> Tuple[Optional[str], str]:
        """
        Internal function. Returns the token and url for the call.
@ -920,12 +936,13 @@ class VertexLLM(BaseLLM):
            vertex_location = self.get_vertex_region(vertex_region=vertex_location)
            ### SET RUNTIME ENDPOINT ###
            version = "v1beta1" if should_use_v1beta1_features is True else "v1"
            endpoint = "generateContent"
            if stream is True:
                endpoint = "streamGenerateContent"
-                url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
+                url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}?alt=sse"
            else:
-                url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
+                url = f"https://{vertex_location}-aiplatform.googleapis.com/{version}/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
        if (
            api_base is not None
@ -1055,6 +1072,9 @@ class VertexLLM(BaseLLM):
    ) -> Union[ModelResponse, CustomStreamWrapper]:
        stream: Optional[bool] = optional_params.pop("stream", None)  # type: ignore
        should_use_v1beta1_features = self.is_using_v1beta1_features(
            optional_params=optional_params
        )
        auth_header, url = self._get_token_and_url(
            model=model,
            gemini_api_key=gemini_api_key,
@ -1064,6 +1084,7 @@ class VertexLLM(BaseLLM):
            stream=stream,
            custom_llm_provider=custom_llm_provider,
            api_base=api_base,
            should_use_v1beta1_features=should_use_v1beta1_features,
        )
        ## TRANSFORMATION ##
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,4 +1,8 @@
 model_list:
-  - model_name: "gpt-4"
+  - model_name: "claude-3-5-sonnet-20240620"
    litellm_params:
-      model: "gpt-4"
+      model: "claude-3-5-sonnet-20240620"
 litellm_settings:
  max_internal_user_budget: 0.001
  internal_user_budget_duration: "5m"
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -91,6 +91,10 @@ async def new_user(
        if litellm.max_internal_user_budget is not None:
            data_json["max_budget"] = litellm.max_internal_user_budget
    if "budget_duration" in data_json and data_json["budget_duration"] is None:
        if litellm.internal_user_budget_duration is not None:
            data_json["budget_duration"] = litellm.internal_user_budget_duration
    response = await generate_key_helper_fn(request_type="user", **data_json)
    # Admin UI Logic
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -3,20 +3,14 @@ model_list:
    litellm_params:
      model: openai/fake
      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railwaz.app/
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
  - model_name: fireworks-llama-v3-70b-instruct
    litellm_params:
      model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
      api_key: "os.environ/FIREWORKS"
-  # provider specific wildcard routing
+  - model_name: "*"
  - model_name: "anthropic/*"
    litellm_params:
-      model: "anthropic/*"
+      model: "*"
      api_key: os.environ/ANTHROPIC_API_KEY
  - model_name: "groq/*"
    litellm_params:
      model: "groq/*"
      api_key: os.environ/GROQ_API_KEY
  - model_name: "*"
    litellm_params:
      model: openai/*
@ -25,37 +19,22 @@ model_list:
    litellm_params:
      model: mistral/mistral-small-latest
      api_key: "os.environ/MISTRAL_API_KEY"
-  - model_name: tts
+  - model_name: gemini-1.5-pro-001
    litellm_params:
-      model: openai/tts-1
+      model: vertex_ai_beta/gemini-1.5-pro-001
-      api_key: "os.environ/OPENAI_API_KEY"
+      vertex_project: "adroit-crow-413218"
-    model_info:
+      vertex_location: "us-central1"
-      mode: audio_speech
+      vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json"
-
+ # Add path to service account.json
 # for /files endpoints
 files_settings:
  - custom_llm_provider: azure
    api_base: https://exampleopenaiendpoint-production.up.railway.app
    api_key: fake-key
    api_version: "2023-03-15-preview"
  - custom_llm_provider: openai
    api_key: os.environ/OPENAI_API_KEY
 default_vertex_config:
  vertex_project: "adroit-crow-413218"
  vertex_location: "us-central1"
  vertex_credentials: "adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json
 general_settings: 
  master_key: sk-1234
  pass_through_endpoints:
    - path: "/v1/rerank"                                  # route you want to add to LiteLLM Proxy Server
      target: "https://api.cohere.com/v1/rerank"          # URL this route should forward requests to
      headers:                                            # headers to forward to this URL
        content-type: application/json                    # (Optional) Extra Headers to pass to this endpoint 
        accept: application/json
      forward_headers: True
 litellm_settings:
  callbacks: ["otel"] # 👈 KEY CHANGE
  success_callback: ["prometheus"]
  failure_callback: ["prometheus"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -5374,7 +5374,13 @@ async def anthropic_response(
    litellm.adapters = [{"id": "anthropic", "adapter": anthropic_adapter}]
    global user_temperature, user_request_timeout, user_max_tokens, user_api_base
-    data: dict = {**anthropic_data, "adapter_id": "anthropic"}
+    body = await request.body()
    body_str = body.decode()
    try:
        request_data: dict = ast.literal_eval(body_str)
    except Exception:
        request_data = json.loads(body_str)
    data: dict = {**request_data, "adapter_id": "anthropic"}
    try:
        data["model"] = (
            general_settings.get("completion_model", None)  # server default
--- a/litellm/proxy/tests/test_gemini_context_caching.py
+++ b/litellm/proxy/tests/test_gemini_context_caching.py
@ -0,0 +1,54 @@
 import datetime
 import httpx
 import openai
 # Set Litellm proxy variables here
 LITELLM_BASE_URL = "http://0.0.0.0:4000"
 LITELLM_PROXY_API_KEY = "sk-1234"
 client = openai.OpenAI(api_key=LITELLM_PROXY_API_KEY, base_url=LITELLM_BASE_URL)
 httpx_client = httpx.Client(timeout=30)
 ################################
 # First create a cachedContents object
 print("creating cached content")
 create_cache = httpx_client.post(
    url=f"{LITELLM_BASE_URL}/vertex-ai/cachedContents",
    headers={"Authorization": f"Bearer {LITELLM_PROXY_API_KEY}"},
    json={
        "model": "gemini-1.5-pro-001",
        "contents": [
            {
                "role": "user",
                "parts": [
                    {
                        "text": "This is sample text to demonstrate explicit caching."
                        * 4000
                    }
                ],
            }
        ],
    },
 )
 print("response from create_cache", create_cache)
 create_cache_response = create_cache.json()
 print("json from create_cache", create_cache_response)
 cached_content_name = create_cache_response["name"]
 #################################
 # Use the `cachedContents` object in your /chat/completions
 response = client.chat.completions.create(  # type: ignore
    model="gemini-1.5-pro-001",
    max_tokens=8192,
    messages=[
        {
            "role": "user",
            "content": "what is the sample text about?",
        },
    ],
    temperature="0.7",
    extra_body={"cached_content": cached_content_name},  # 👈 key change
 )
 print("response from proxy", response)
--- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
+++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py
@ -303,3 +303,30 @@ async def vertex_cancel_fine_tuning_job(
        return response
    except Exception as e:
        raise exception_handler(e) from e
@router.post(
    "/vertex-ai/cachedContents",
    dependencies=[Depends(user_api_key_auth)],
    tags=["Vertex AI endpoints"],
 )
 async def vertex_create_add_cached_content(
    request: Request,
    fastapi_response: Response,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    this is a pass through endpoint for the Vertex AI API. /cachedContents endpoint
    Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest
    it uses the vertex ai credentials on the proxy and forwards to vertex ai api
    """
    try:
        response = await execute_post_vertex_ai_request(
            request=request,
            route="/cachedContents",
        )
        return response
    except Exception as e:
        raise exception_handler(e) from e
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -1969,3 +1969,58 @@ def test_prompt_factory_nested():
        assert isinstance(
            message["parts"][0]["text"], str
        ), "'text' value not a string."
 def test_get_token_url():
    from litellm.llms.vertex_httpx import VertexLLM
    vertex_llm = VertexLLM()
    vertex_ai_project = "adroit-crow-413218"
    vertex_ai_location = "us-central1"
    json_obj = get_vertex_ai_creds_json()
    vertex_credentials = json.dumps(json_obj)
    should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
        optional_params={"cached_content": "hi"}
    )
    assert should_use_v1beta1_features is True
    _, url = vertex_llm._get_token_and_url(
        vertex_project=vertex_ai_project,
        vertex_location=vertex_ai_location,
        vertex_credentials=vertex_credentials,
        gemini_api_key="",
        custom_llm_provider="vertex_ai_beta",
        should_use_v1beta1_features=should_use_v1beta1_features,
        api_base=None,
        model="",
        stream=False,
    )
    print("url=", url)
    assert "/v1beta1/" in url
    should_use_v1beta1_features = vertex_llm.is_using_v1beta1_features(
        optional_params={"temperature": 0.1}
    )
    _, url = vertex_llm._get_token_and_url(
        vertex_project=vertex_ai_project,
        vertex_location=vertex_ai_location,
        vertex_credentials=vertex_credentials,
        gemini_api_key="",
        custom_llm_provider="vertex_ai_beta",
        should_use_v1beta1_features=should_use_v1beta1_features,
        api_base=None,
        model="",
        stream=False,
    )
    print("url for normal request", url)
    assert "v1beta1" not in url
    assert "/v1/" in url
    pass
--- a/litellm/tests/test_anthropic_completion.py
+++ b/litellm/tests/test_anthropic_completion.py
@ -183,3 +183,96 @@ async def test_anthropic_router_completion_e2e():
    assert isinstance(response, AnthropicResponse)
    assert response.model == "gpt-3.5-turbo"
 def test_anthropic_tool_calling_translation():
    kwargs = {
        "model": "claude-3-5-sonnet-20240620",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Would development of a software platform be under ASC 350-40 or ASC 985?",
                    }
                ],
            },
            {
                "role": "assistant",
                "content": [
                    {
                        "type": "tool_use",
                        "id": "37d6f703-cbcc-497d-95a1-2aa24a114adc",
                        "name": "TaskPlanningTool",
                        "input": {
                            "completed_steps": [],
                            "next_steps": [
                                {
                                    "tool_name": "AccountingResearchTool",
                                    "description": "Research ASC 350-40 to understand its scope and applicability to software development.",
                                },
                                {
                                    "tool_name": "AccountingResearchTool",
                                    "description": "Research ASC 985 to understand its scope and applicability to software development.",
                                },
                                {
                                    "tool_name": "AccountingResearchTool",
                                    "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
                                },
                            ],
                            "learnings": [],
                            "potential_issues": [
                                "The distinction between the two standards might not be clear-cut for all types of software development.",
                                "There might be specific circumstances or details about the software platform that could affect which standard applies.",
                            ],
                            "missing_info": [
                                "Specific details about the type of software platform being developed (e.g., for internal use or for sale).",
                                "Whether the entity developing the software is also the end-user or if it's being developed for external customers.",
                            ],
                            "done": False,
                            "required_formatting": None,
                        },
                    }
                ],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "tool_use_id": "eb7023b1-5ee8-43b8-b90f-ac5a23d37c31",
                        "content": {
                            "completed_steps": [],
                            "next_steps": [
                                {
                                    "tool_name": "AccountingResearchTool",
                                    "description": "Research ASC 350-40 to understand its scope and applicability to software development.",
                                },
                                {
                                    "tool_name": "AccountingResearchTool",
                                    "description": "Research ASC 985 to understand its scope and applicability to software development.",
                                },
                                {
                                    "tool_name": "AccountingResearchTool",
                                    "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development.",
                                },
                            ],
                            "formatting_step": None,
                        },
                    }
                ],
            },
        ],
    }
    from litellm.adapters.anthropic_adapter import anthropic_adapter
    translated_params = anthropic_adapter.translate_completion_input_params(
        kwargs=kwargs
    )
    print(translated_params["messages"])
    assert len(translated_params["messages"]) > 0
    assert translated_params["messages"][1]["role"] == "user"
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -4405,6 +4405,3 @@ def test_moderation():
    output = response.results[0]
    print(output)
    return output
 # test_moderation()
--- a/litellm/tests/test_prompt_factory.py
+++ b/litellm/tests/test_prompt_factory.py
@ -219,3 +219,44 @@ def test_base64_image_input(url, expected_media_type):
    response = convert_to_anthropic_image_obj(openai_image_url=url)
    assert response["media_type"] == expected_media_type
 def test_anthropic_messages_tool_call():
    messages = [
        {
            "role": "user",
            "content": "Would development of a software platform be under ASC 350-40 or ASC 985?",
        },
        {
            "role": "assistant",
            "content": "",
            "tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
            "tool_calls": [
                {
                    "id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
                    "function": {
                        "arguments": '{"completed_steps": [], "next_steps": [{"tool_name": "AccountingResearchTool", "description": "Research ASC 350-40 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Research ASC 985 to understand its scope and applicability to software development."}, {"tool_name": "AccountingResearchTool", "description": "Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}], "learnings": [], "potential_issues": ["The distinction between the two standards might not be clear-cut for all types of software development.", "There might be specific circumstances or details about the software platform that could affect which standard applies."], "missing_info": ["Specific details about the type of software platform being developed (e.g., for internal use or for sale).", "Whether the entity developing the software is also the end-user or if it\'s being developed for external customers."], "done": false, "required_formatting": null}',
                        "name": "TaskPlanningTool",
                    },
                    "type": "function",
                }
            ],
        },
        {
            "role": "function",
            "content": '{"completed_steps":[],"next_steps":[{"tool_name":"AccountingResearchTool","description":"Research ASC 350-40 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Research ASC 985 to understand its scope and applicability to software development."},{"tool_name":"AccountingResearchTool","description":"Compare the scopes of ASC 350-40 and ASC 985 to determine which is more applicable to software platform development."}],"formatting_step":null}',
            "name": "TaskPlanningTool",
            "tool_call_id": "bc8cb4b6-88c4-4138-8993-3a9d9cd51656",
        },
    ]
    translated_messages = anthropic_messages_pt(
        messages, model="claude-3-sonnet-20240229", llm_provider="anthropic"
    )
    print(translated_messages)
    assert (
        translated_messages[-1]["content"][0]["tool_use_id"]
        == "bc8cb4b6-88c4-4138-8993-3a9d9cd51656"
    )
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -55,7 +55,10 @@ import litellm._service_logger  # for storing API inputs, outputs, and metadata
 import litellm.litellm_core_utils
 import litellm.litellm_core_utils.json_validation_rule
 from litellm.caching import DualCache
-from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.litellm_core_utils.core_helpers import (
    get_file_check_sum,
    map_finish_reason,
 )
 from litellm.litellm_core_utils.exception_mapping_utils import get_error_message
 from litellm.litellm_core_utils.llm_request_utils import _ensure_extra_body_is_safe
 from litellm.litellm_core_utils.redact_messages import (
@ -557,12 +560,8 @@ def function_setup(
            or call_type == CallTypes.transcription.value
        ):
            _file_name: BinaryIO = args[1] if len(args) > 1 else kwargs["file"]
-            file_name = getattr(_file_name, "name", "audio_file")
+            file_checksum = get_file_check_sum(_file=_file_name)
-            file_descriptor = _file_name.fileno()
+            file_name = _file_name.name
            file_stat = os.fstat(file_descriptor)
            file_size = str(file_stat.st_size)
            file_checksum = _file_name.name + file_size
            if "metadata" in kwargs:
                kwargs["metadata"]["file_checksum"] = file_checksum
            else: