mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
* fix(utils.py): remove unsupported optional params (if drop_params=True) before passing into map openai params Fixes https://github.com/BerriAI/litellm/issues/7242 * test: new test for langfuse prompt management hook Addresses https://github.com/BerriAI/litellm/issues/3893#issuecomment-2549080296 * feat(main.py): add 'get_chat_completion_prompt' customlogger hook allows for langfuse prompt management Addresses https://github.com/BerriAI/litellm/issues/3893#issuecomment-2549080296 * feat(langfuse_prompt_management.py): working e2e langfuse prompt management works with `langfuse/` route * feat(main.py): initial tracing for dynamic langfuse params allows admin to specify langfuse keys by model in model_list * feat(main.py): support passing langfuse credentials dynamically * fix(langfuse_prompt_management.py): create langfuse client based on dynamic callback params allows dynamic langfuse params to work * fix: fix linting errors * docs(prompt_management.md): refactor docs for sdk + proxy prompt management tutorial * docs(prompt_management.md): cleanup doc * docs: cleanup topnav * docs(prompt_management.md): update docs to be easier to use * fix: remove unused imports * docs(prompt_management.md): add architectural overview doc * fix(litellm_logging.py): fix dynamic param passing * fix(langfuse_prompt_management.py): fix linting errors * fix: fix linting errors * fix: use typing_extensions for typealias to ensure python3.8 compatibility * test: use stream_options in test to account for tiktoken diff * fix: improve import error message, and check run test earlier
149 lines
5 KiB
Python
149 lines
5 KiB
Python
"""
|
|
API Handler for calling Vertex AI Model Garden Models
|
|
|
|
Most Vertex Model Garden Models are OpenAI compatible - so this handler calls `openai_like_chat_completions`
|
|
|
|
Usage:
|
|
|
|
response = litellm.completion(
|
|
model="vertex_ai/openai/5464397967697903616",
|
|
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
)
|
|
|
|
Sent to this route when `model` is in the format `vertex_ai/openai/{MODEL_ID}`
|
|
|
|
|
|
Vertex Documentation for using the OpenAI /chat/completions endpoint: https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_llama3_deployment.ipynb
|
|
"""
|
|
|
|
from typing import Callable, Optional, Union
|
|
|
|
import httpx # type: ignore
|
|
|
|
from litellm.utils import ModelResponse
|
|
|
|
from ..common_utils import VertexAIError
|
|
from ..vertex_llm_base import VertexBase
|
|
|
|
|
|
def create_vertex_url(
|
|
vertex_location: str,
|
|
vertex_project: str,
|
|
stream: Optional[bool],
|
|
model: str,
|
|
api_base: Optional[str] = None,
|
|
) -> str:
|
|
"""Return the base url for the vertex garden models"""
|
|
# f"https://{self.endpoint.location}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{self.endpoint.location}"
|
|
return f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}/endpoints/{model}"
|
|
|
|
|
|
class VertexAIModelGardenModels(VertexBase):
|
|
def __init__(self) -> None:
|
|
pass
|
|
|
|
def completion(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
logging_obj,
|
|
api_base: Optional[str],
|
|
optional_params: dict,
|
|
custom_prompt_dict: dict,
|
|
headers: Optional[dict],
|
|
timeout: Union[float, httpx.Timeout],
|
|
litellm_params: dict,
|
|
vertex_project=None,
|
|
vertex_location=None,
|
|
vertex_credentials=None,
|
|
logger_fn=None,
|
|
acompletion: bool = False,
|
|
client=None,
|
|
):
|
|
"""
|
|
Handles calling Vertex AI Model Garden Models in OpenAI compatible format
|
|
|
|
Sent to this route when `model` is in the format `vertex_ai/openai/{MODEL_ID}`
|
|
"""
|
|
try:
|
|
import vertexai
|
|
|
|
from litellm.llms.openai_like.chat.handler import OpenAILikeChatHandler
|
|
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
|
VertexLLM,
|
|
)
|
|
except Exception as e:
|
|
|
|
raise VertexAIError(
|
|
status_code=400,
|
|
message=f"""vertexai import failed please run `pip install -U "google-cloud-aiplatform>=1.38"`. Got error: {e}""",
|
|
)
|
|
|
|
if not (
|
|
hasattr(vertexai, "preview") or hasattr(vertexai.preview, "language_models")
|
|
):
|
|
raise VertexAIError(
|
|
status_code=400,
|
|
message="""Upgrade vertex ai. Run `pip install "google-cloud-aiplatform>=1.38"`""",
|
|
)
|
|
try:
|
|
model = model.replace("openai/", "")
|
|
vertex_httpx_logic = VertexLLM()
|
|
|
|
access_token, project_id = vertex_httpx_logic._ensure_access_token(
|
|
credentials=vertex_credentials,
|
|
project_id=vertex_project,
|
|
custom_llm_provider="vertex_ai",
|
|
)
|
|
|
|
openai_like_chat_completions = OpenAILikeChatHandler()
|
|
|
|
## CONSTRUCT API BASE
|
|
stream: bool = optional_params.get("stream", False) or False
|
|
optional_params["stream"] = stream
|
|
default_api_base = create_vertex_url(
|
|
vertex_location=vertex_location or "us-central1",
|
|
vertex_project=vertex_project or project_id,
|
|
stream=stream,
|
|
model=model,
|
|
)
|
|
|
|
if len(default_api_base.split(":")) > 1:
|
|
endpoint = default_api_base.split(":")[-1]
|
|
else:
|
|
endpoint = ""
|
|
|
|
_, api_base = self._check_custom_proxy(
|
|
api_base=api_base,
|
|
custom_llm_provider="vertex_ai",
|
|
gemini_api_key=None,
|
|
endpoint=endpoint,
|
|
stream=stream,
|
|
auth_header=None,
|
|
url=default_api_base,
|
|
)
|
|
model = ""
|
|
return openai_like_chat_completions.completion(
|
|
model=model,
|
|
messages=messages,
|
|
api_base=api_base,
|
|
api_key=access_token,
|
|
custom_prompt_dict=custom_prompt_dict,
|
|
model_response=model_response,
|
|
print_verbose=print_verbose,
|
|
logging_obj=logging_obj,
|
|
optional_params=optional_params,
|
|
acompletion=acompletion,
|
|
litellm_params=litellm_params,
|
|
logger_fn=logger_fn,
|
|
client=client,
|
|
timeout=timeout,
|
|
encoding=encoding,
|
|
custom_llm_provider="vertex_ai",
|
|
)
|
|
|
|
except Exception as e:
|
|
raise VertexAIError(status_code=500, message=str(e))
|