LiteLLM minor fixes + improvements (31/08/2024) (#5464)

* fix(vertex_endpoints.py): fix vertex ai pass through endpoints

* test(test_streaming.py): skip model due to end of life

* feat(custom_logger.py): add special callback for model hitting tpm/rpm limits

Closes https://github.com/BerriAI/litellm/issues/4096
This commit is contained in:
Krish Dholakia 2024-09-01 13:31:42 -07:00 committed by GitHub
parent 1c9a82771a
commit ca4e746545
8 changed files with 174 additions and 13 deletions

View file

@ -73,6 +73,45 @@ def exception_handler(e: Exception):
)
def construct_target_url(
base_url: str,
requested_route: str,
default_vertex_location: Optional[str],
default_vertex_project: Optional[str],
) -> httpx.URL:
"""
Allow user to specify their own project id / location.
If missing, use defaults
Handle cachedContent scenario - https://github.com/BerriAI/litellm/issues/5460
Constructed Url:
POST https://LOCATION-aiplatform.googleapis.com/{version}/projects/PROJECT_ID/locations/LOCATION/cachedContents
"""
new_base_url = httpx.URL(base_url)
if "locations" in requested_route: # contains the target project id + location
updated_url = new_base_url.copy_with(path=requested_route)
return updated_url
"""
- Add endpoint version (e.g. v1beta for cachedContent, v1 for rest)
- Add default project id
- Add default location
"""
vertex_version: Literal["v1", "v1beta1"] = "v1"
if "cachedContent" in requested_route:
vertex_version = "v1beta1"
base_requested_route = "{}/projects/{}/locations/{}".format(
vertex_version, default_vertex_project, default_vertex_location
)
updated_requested_route = "/" + base_requested_route + requested_route
updated_url = new_base_url.copy_with(path=updated_requested_route)
return updated_url
@router.api_route(
"/vertex-ai/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"]
)
@ -86,8 +125,6 @@ async def vertex_proxy_route(
import re
from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
verbose_proxy_logger.debug("requested endpoint %s", endpoint)
headers: dict = {}
# Use headers from the incoming request if default_vertex_config is not set
@ -133,8 +170,14 @@ async def vertex_proxy_route(
encoded_endpoint = "/" + encoded_endpoint
# Construct the full target URL using httpx
base_url = httpx.URL(base_target_url)
updated_url = base_url.copy_with(path=encoded_endpoint)
updated_url = construct_target_url(
base_url=base_target_url,
requested_route=encoded_endpoint,
default_vertex_location=vertex_location,
default_vertex_project=vertex_project,
)
# base_url = httpx.URL(base_target_url)
# updated_url = base_url.copy_with(path=encoded_endpoint)
verbose_proxy_logger.debug("updated url %s", updated_url)