Merge branch 'main' into litellm_allow_turning_off_message_logging_for_callbacks

This commit is contained in:
Ishaan Jaff 2024-09-09 21:59:36 -07:00 committed by GitHub
commit 02325f33d7
34 changed files with 442 additions and 117 deletions

View file

@ -25,6 +25,13 @@ model_list:
model: openai/gpt-4o model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
tags: ["paid"] # 👈 Key Change tags: ["paid"] # 👈 Key Change
- model_name: gpt-4
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
router_settings: router_settings:
enable_tag_filtering: True # 👈 Key Change enable_tag_filtering: True # 👈 Key Change
@ -136,6 +143,46 @@ Response
} }
``` ```
## Setting Default Tags
Use this if you want all untagged requests to be routed to specific deployments
1. Set default tag on your yaml
```yaml
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["default"] # 👈 Key Change - All untagged requests will get routed to this
model_info:
id: "default-model" # used for identifying model in response headers
```
2. Start proxy
```shell
$ litellm --config /path/to/config.yaml
```
3. Make request with no tags
```shell
curl -i http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "fake-openai-endpoint",
"messages": [
{"role": "user", "content": "Hello, Claude gm!"}
]
}'
```
Expect to see the following response header when this works
```shell
x-litellm-model-id: default-model
```
## ✨ Team based tag routing (Enterprise) ## ✨ Team based tag routing (Enterprise)
LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams) LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams)
@ -170,6 +217,12 @@ Here's how to set up and use team-based tag routing using curl commands:
tags: ["teamB"] # 👈 Key Change tags: ["teamB"] # 👈 Key Change
model_info: model_info:
id: "team-b-model" # used for identifying model in response headers id: "team-b-model" # used for identifying model in response headers
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
router_settings: router_settings:
enable_tag_filtering: True # 👈 Key Change enable_tag_filtering: True # 👈 Key Change

View file

@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
-d '{ -d '{
"metadata": { "metadata": {
"logging": [{ "logging": [{
"callback_name": "langfuse", # 'otel', 'langfuse', 'lunary' "callback_name": "langfuse", # "otel", "langfuse", "lunary"
"callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default "callback_type": "success", # "success", "failure", "success_and_failure"
"callback_vars": { "callback_vars": {
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment "langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment

View file

@ -118,7 +118,7 @@ in_memory_llm_clients_cache: dict = {}
safe_memory_mode: bool = False safe_memory_mode: bool = False
enable_azure_ad_token_refresh: Optional[bool] = False enable_azure_ad_token_refresh: Optional[bool] = False
### DEFAULT AZURE API VERSION ### ### DEFAULT AZURE API VERSION ###
AZURE_DEFAULT_API_VERSION = "2024-07-01-preview" # this is updated to the latest AZURE_DEFAULT_API_VERSION = "2024-08-01-preview" # this is updated to the latest
### COHERE EMBEDDINGS DEFAULT TYPE ### ### COHERE EMBEDDINGS DEFAULT TYPE ###
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document" COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document"
### GUARDRAILS ### ### GUARDRAILS ###
@ -483,7 +483,12 @@ openai_compatible_providers: List = [
"azure_ai", "azure_ai",
"github", "github",
] ]
openai_text_completion_compatible_providers: List = (
[ # providers that support `/v1/completions`
"together_ai",
"fireworks_ai",
]
)
# well supported replicate llms # well supported replicate llms
replicate_models: List = [ replicate_models: List = [
@ -863,7 +868,7 @@ from .llms.custom_llm import CustomLLM
from .llms.huggingface_restapi import HuggingfaceConfig from .llms.huggingface_restapi import HuggingfaceConfig
from .llms.anthropic.chat import AnthropicConfig from .llms.anthropic.chat import AnthropicConfig
from .llms.anthropic.completion import AnthropicTextConfig from .llms.anthropic.completion import AnthropicTextConfig
from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
from .llms.predibase import PredibaseConfig from .llms.predibase import PredibaseConfig
from .llms.replicate import ReplicateConfig from .llms.replicate import ReplicateConfig
from .llms.cohere.completion import CohereConfig from .llms.cohere.completion import CohereConfig

View file

@ -22,6 +22,9 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_cha
from litellm.llms.anthropic.cost_calculation import ( from litellm.llms.anthropic.cost_calculation import (
cost_per_token as anthropic_cost_per_token, cost_per_token as anthropic_cost_per_token,
) )
from litellm.llms.databricks.cost_calculator import (
cost_per_token as databricks_cost_per_token,
)
from litellm.rerank_api.types import RerankResponse from litellm.rerank_api.types import RerankResponse
from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
@ -159,7 +162,7 @@ def cost_per_token(
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model) _, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
model_without_prefix = model model_without_prefix = model
model_parts = model.split("/") model_parts = model.split("/", 1)
if len(model_parts) > 1: if len(model_parts) > 1:
model_without_prefix = model_parts[1] model_without_prefix = model_parts[1]
else: else:
@ -212,6 +215,8 @@ def cost_per_token(
) )
elif custom_llm_provider == "anthropic": elif custom_llm_provider == "anthropic":
return anthropic_cost_per_token(model=model, usage=usage_block) return anthropic_cost_per_token(model=model, usage=usage_block)
elif custom_llm_provider == "databricks":
return databricks_cost_per_token(model=model, usage=usage_block)
elif custom_llm_provider == "gemini": elif custom_llm_provider == "gemini":
return google_cost_per_token( return google_cost_per_token(
model=model_without_prefix, model=model_without_prefix,

View file

@ -649,7 +649,9 @@ class OpenTelemetry(CustomLogger):
return BatchSpanProcessor( return BatchSpanProcessor(
OTLPSpanExporterHTTP( OTLPSpanExporterHTTP(
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
) ),
max_queue_size=100,
max_export_batch_size=100,
) )
elif self.OTEL_EXPORTER == "otlp_grpc": elif self.OTEL_EXPORTER == "otlp_grpc":
verbose_logger.debug( verbose_logger.debug(
@ -659,7 +661,9 @@ class OpenTelemetry(CustomLogger):
return BatchSpanProcessor( return BatchSpanProcessor(
OTLPSpanExporterGRPC( OTLPSpanExporterGRPC(
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
) ),
max_queue_size=100,
max_export_batch_size=100,
) )
else: else:
verbose_logger.debug( verbose_logger.debug(

View file

@ -2333,6 +2333,8 @@ def get_standard_logging_object_payload(
completion_start_time_float = completion_start_time.timestamp() completion_start_time_float = completion_start_time.timestamp()
elif isinstance(completion_start_time, float): elif isinstance(completion_start_time, float):
completion_start_time_float = completion_start_time completion_start_time_float = completion_start_time
else:
completion_start_time_float = end_time_float
# clean up litellm hidden params # clean up litellm hidden params
clean_hidden_params = StandardLoggingHiddenParams( clean_hidden_params = StandardLoggingHiddenParams(
model_id=None, model_id=None,

View file

@ -245,7 +245,10 @@ class AzureOpenAIConfig:
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the models perspective. - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the models perspective.
""" """
if json_schema is not None: if json_schema is not None and (
(api_version_year <= "2024" and api_version_month < "08")
or "gpt-4o" not in model
): # azure api version "2024-08-01-preview" onwards supports 'json_schema' only for gpt-4o
_tool_choice = ChatCompletionToolChoiceObjectParam( _tool_choice = ChatCompletionToolChoiceObjectParam(
type="function", type="function",
function=ChatCompletionToolChoiceFunctionParam( function=ChatCompletionToolChoiceFunctionParam(

View file

@ -1263,6 +1263,7 @@ class OpenAIChatCompletion(BaseLLM):
error_headers = getattr(e, "headers", None) error_headers = getattr(e, "headers", None)
if response is not None and hasattr(response, "text"): if response is not None and hasattr(response, "text"):
error_headers = getattr(e, "headers", None)
raise OpenAIError( raise OpenAIError(
status_code=500, status_code=500,
message=f"{str(e)}\n\nOriginal Response: {response.text}", message=f"{str(e)}\n\nOriginal Response: {response.text}",
@ -1800,12 +1801,11 @@ class OpenAITextCompletion(BaseLLM):
headers: Optional[dict] = None, headers: Optional[dict] = None,
): ):
super().completion() super().completion()
exception_mapping_worked = False
try: try:
if headers is None: if headers is None:
headers = self.validate_environment(api_key=api_key) headers = self.validate_environment(api_key=api_key)
if model is None or messages is None: if model is None or messages is None:
raise OpenAIError(status_code=422, message=f"Missing model or messages") raise OpenAIError(status_code=422, message="Missing model or messages")
if ( if (
len(messages) > 0 len(messages) > 0

View file

@ -162,11 +162,10 @@ class AzureTextCompletion(BaseLLM):
client=None, client=None,
): ):
super().completion() super().completion()
exception_mapping_worked = False
try: try:
if model is None or messages is None: if model is None or messages is None:
raise AzureOpenAIError( raise AzureOpenAIError(
status_code=422, message=f"Missing model or messages" status_code=422, message="Missing model or messages"
) )
max_retries = optional_params.pop("max_retries", 2) max_retries = optional_params.pop("max_retries", 2)
@ -293,7 +292,10 @@ class AzureTextCompletion(BaseLLM):
"api-version", api_version "api-version", api_version
) )
response = azure_client.completions.create(**data, timeout=timeout) # type: ignore raw_response = azure_client.completions.with_raw_response.create(
**data, timeout=timeout
)
response = raw_response.parse()
stringified_response = response.model_dump() stringified_response = response.model_dump()
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
@ -380,13 +382,15 @@ class AzureTextCompletion(BaseLLM):
"complete_input_dict": data, "complete_input_dict": data,
}, },
) )
response = await azure_client.completions.create(**data, timeout=timeout) raw_response = await azure_client.completions.with_raw_response.create(
**data, timeout=timeout
)
response = raw_response.parse()
return openai_text_completion_config.convert_to_chat_model_response_object( return openai_text_completion_config.convert_to_chat_model_response_object(
response_object=response.model_dump(), response_object=response.model_dump(),
model_response_object=model_response, model_response_object=model_response,
) )
except AzureOpenAIError as e: except AzureOpenAIError as e:
exception_mapping_worked = True
raise e raise e
except Exception as e: except Exception as e:
status_code = getattr(e, "status_code", 500) status_code = getattr(e, "status_code", 500)

View file

@ -736,7 +736,9 @@ class BedrockLLM(BaseAWSLLM):
if (stream is not None and stream is True) and provider != "ai21": if (stream is not None and stream is True) and provider != "ai21":
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke-with-response-stream" endpoint_url = f"{endpoint_url}/model/{modelId}/invoke-with-response-stream"
proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke-with-response-stream" proxy_endpoint_url = (
f"{proxy_endpoint_url}/model/{modelId}/invoke-with-response-stream"
)
else: else:
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke" endpoint_url = f"{endpoint_url}/model/{modelId}/invoke"
proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke" proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke"
@ -1268,7 +1270,7 @@ class AmazonConverseConfig:
if len(value) == 0: # converse raises error for empty strings if len(value) == 0: # converse raises error for empty strings
continue continue
value = [value] value = [value]
optional_params["stop_sequences"] = value optional_params["stopSequences"] = value
if param == "temperature": if param == "temperature":
optional_params["temperature"] = value optional_params["temperature"] = value
if param == "top_p": if param == "top_p":

View file

@ -29,8 +29,8 @@ from litellm.types.utils import (
) )
from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage
from .base import BaseLLM from ..base import BaseLLM
from .prompt_templates.factory import custom_prompt, prompt_factory from ..prompt_templates.factory import custom_prompt, prompt_factory
class DatabricksError(Exception): class DatabricksError(Exception):
@ -328,6 +328,7 @@ class DatabricksChatCompletion(BaseLLM):
api_base: str, api_base: str,
custom_prompt_dict: dict, custom_prompt_dict: dict,
model_response: ModelResponse, model_response: ModelResponse,
custom_llm_provider: str,
print_verbose: Callable, print_verbose: Callable,
encoding, encoding,
api_key, api_key,
@ -371,6 +372,8 @@ class DatabricksChatCompletion(BaseLLM):
) )
response = ModelResponse(**response_json) response = ModelResponse(**response_json)
response.model = custom_llm_provider + "/" + response.model
if base_model is not None: if base_model is not None:
response._hidden_params["model"] = base_model response._hidden_params["model"] = base_model
return response return response
@ -472,6 +475,7 @@ class DatabricksChatCompletion(BaseLLM):
data=data, data=data,
api_base=api_base, api_base=api_base,
custom_prompt_dict=custom_prompt_dict, custom_prompt_dict=custom_prompt_dict,
custom_llm_provider=custom_llm_provider,
model_response=model_response, model_response=model_response,
print_verbose=print_verbose, print_verbose=print_verbose,
encoding=encoding, encoding=encoding,
@ -528,6 +532,8 @@ class DatabricksChatCompletion(BaseLLM):
response = ModelResponse(**response_json) response = ModelResponse(**response_json)
response.model = custom_llm_provider + "/" + response.model
if base_model is not None: if base_model is not None:
response._hidden_params["model"] = base_model response._hidden_params["model"] = base_model

View file

@ -0,0 +1,39 @@
"""
Helper util for handling databricks-specific cost calculation
- e.g.: handling 'dbrx-instruct-*'
"""
from typing import Tuple
from litellm.types.utils import Usage
from litellm.utils import get_model_info
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
"""
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
Input:
- model: str, the model name without provider prefix
- usage: LiteLLM Usage block, containing anthropic caching information
Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
"""
base_model = model
if model.startswith("databricks/dbrx-instruct") or model.startswith(
"dbrx-instruct"
):
base_model = "databricks-dbrx-instruct"
## GET MODEL INFO
model_info = get_model_info(model=base_model, custom_llm_provider="databricks")
## CALCULATE INPUT COST
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
## CALCULATE OUTPUT COST
completion_cost = usage["completion_tokens"] * model_info["output_cost_per_token"]
return prompt_cost, completion_cost

View file

@ -273,7 +273,7 @@ class SagemakerLLM(BaseAWSLLM):
model_id = optional_params.get("model_id", None) model_id = optional_params.get("model_id", None)
if use_messages_api is True: if use_messages_api is True:
from litellm.llms.databricks import DatabricksChatCompletion from litellm.llms.databricks.chat import DatabricksChatCompletion
openai_like_chat_completions = DatabricksChatCompletion() openai_like_chat_completions = DatabricksChatCompletion()
inference_params["stream"] = True if stream is True else False inference_params["stream"] = True if stream is True else False

View file

@ -80,7 +80,7 @@ class VertexAIPartnerModels(BaseLLM):
import vertexai import vertexai
from google.cloud import aiplatform from google.cloud import aiplatform
from litellm.llms.databricks import DatabricksChatCompletion from litellm.llms.databricks.chat import DatabricksChatCompletion
from litellm.llms.OpenAI.openai import OpenAIChatCompletion from litellm.llms.OpenAI.openai import OpenAIChatCompletion
from litellm.llms.text_completion_codestral import CodestralTextCompletion from litellm.llms.text_completion_codestral import CodestralTextCompletion
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import ( from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (

View file

@ -92,7 +92,7 @@ from .llms.cohere import chat as cohere_chat
from .llms.cohere import completion as cohere_completion # type: ignore from .llms.cohere import completion as cohere_completion # type: ignore
from .llms.cohere import embed as cohere_embed from .llms.cohere import embed as cohere_embed
from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.custom_llm import CustomLLM, custom_chat_llm_router
from .llms.databricks import DatabricksChatCompletion from .llms.databricks.chat import DatabricksChatCompletion
from .llms.huggingface_restapi import Huggingface from .llms.huggingface_restapi import Huggingface
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
@ -1013,7 +1013,10 @@ def completion(
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE") api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE")
api_version = ( api_version = (
api_version or litellm.api_version or get_secret("AZURE_API_VERSION") api_version
or litellm.api_version
or get_secret("AZURE_API_VERSION")
or litellm.AZURE_DEFAULT_API_VERSION
) )
api_key = ( api_key = (
@ -1209,6 +1212,9 @@ def completion(
custom_llm_provider == "text-completion-openai" custom_llm_provider == "text-completion-openai"
or "ft:babbage-002" in model or "ft:babbage-002" in model
or "ft:davinci-002" in model # support for finetuned completion models or "ft:davinci-002" in model # support for finetuned completion models
or custom_llm_provider
in litellm.openai_text_completion_compatible_providers
and kwargs.get("text_completion") is True
): ):
openai.api_type = "openai" openai.api_type = "openai"
@ -4099,8 +4105,8 @@ def text_completion(
kwargs.pop("prompt", None) kwargs.pop("prompt", None)
if ( if _model is not None and (
_model is not None and custom_llm_provider == "openai" custom_llm_provider == "openai"
): # for openai compatible endpoints - e.g. vllm, call the native /v1/completions endpoint for text completion calls ): # for openai compatible endpoints - e.g. vllm, call the native /v1/completions endpoint for text completion calls
if _model not in litellm.open_ai_chat_completion_models: if _model not in litellm.open_ai_chat_completion_models:
model = "text-completion-openai/" + _model model = "text-completion-openai/" + _model

View file

@ -2512,16 +2512,16 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_token": 0.00000035, "input_cost_per_token": 0.000000075,
"input_cost_per_token_above_128k_tokens": 0.0000007, "input_cost_per_token_above_128k_tokens": 0.00000015,
"output_cost_per_token": 0.00000105, "output_cost_per_token": 0.0000003,
"output_cost_per_token_above_128k_tokens": 0.0000021, "output_cost_per_token_above_128k_tokens": 0.0000006,
"litellm_provider": "gemini", "litellm_provider": "gemini",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
"supports_function_calling": true, "supports_function_calling": true,
"supports_vision": true, "supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" "source": "https://ai.google.dev/pricing"
}, },
"gemini/gemini-1.5-flash-latest": { "gemini/gemini-1.5-flash-latest": {
"max_tokens": 8192, "max_tokens": 8192,
@ -2533,16 +2533,16 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_token": 0.00000035, "input_cost_per_token": 0.000000075,
"input_cost_per_token_above_128k_tokens": 0.0000007, "input_cost_per_token_above_128k_tokens": 0.00000015,
"output_cost_per_token": 0.00000105, "output_cost_per_token": 0.0000003,
"output_cost_per_token_above_128k_tokens": 0.0000021, "output_cost_per_token_above_128k_tokens": 0.0000006,
"litellm_provider": "gemini", "litellm_provider": "gemini",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
"supports_function_calling": true, "supports_function_calling": true,
"supports_vision": true, "supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" "source": "https://ai.google.dev/pricing"
}, },
"gemini/gemini-pro": { "gemini/gemini-pro": {
"max_tokens": 8192, "max_tokens": 8192,

View file

@ -1,16 +1,9 @@
model_list: model_list:
- model_name: "anthropic/claude-3-5-sonnet-20240620" - model_name: "gpt-turbo"
litellm_params: litellm_params:
model: anthropic/claude-3-5-sonnet-20240620 model: azure/chatgpt-v-2
# api_base: http://0.0.0.0:9000 api_key: os.environ/AZURE_API_KEY
- model_name: gpt-3.5-turbo api_base: os.environ/AZURE_API_BASE
litellm_params:
model: openai/*
litellm_settings: router_settings:
success_callback: ["s3"] model_group_alias: {"gpt-4": "gpt-turbo"}
s3_callback_params:
s3_bucket_name: litellm-logs # AWS Bucket Name for S3
s3_region_name: us-west-2 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3

View file

@ -242,6 +242,9 @@ class LiteLLMRoutes(enum.Enum):
"/v1/models", "/v1/models",
# token counter # token counter
"/utils/token_counter", "/utils/token_counter",
# rerank
"/rerank",
"/v1/rerank",
] ]
mapped_pass_through_routes: List = [ mapped_pass_through_routes: List = [

View file

@ -3,7 +3,7 @@
import asyncio import asyncio
import logging import logging
import random import random
from typing import Optional from typing import List, Optional
import litellm import litellm
from litellm._logging import print_verbose from litellm._logging import print_verbose
@ -36,6 +36,25 @@ def _clean_endpoint_data(endpoint_data: dict, details: Optional[bool] = True):
) )
def filter_deployments_by_id(
model_list: List,
) -> List:
seen_ids = set()
filtered_deployments = []
for deployment in model_list:
_model_info = deployment.get("model_info") or {}
_id = _model_info.get("id") or None
if _id is None:
continue
if _id not in seen_ids:
seen_ids.add(_id)
filtered_deployments.append(deployment)
return filtered_deployments
async def _perform_health_check(model_list: list, details: Optional[bool] = True): async def _perform_health_check(model_list: list, details: Optional[bool] = True):
""" """
Perform a health check for each model in the list. Perform a health check for each model in the list.
@ -105,6 +124,9 @@ async def perform_health_check(
_new_model_list = [x for x in model_list if x["model_name"] == model] _new_model_list = [x for x in model_list if x["model_name"] == model]
model_list = _new_model_list model_list = _new_model_list
model_list = filter_deployments_by_id(
model_list=model_list
) # filter duplicate deployments (e.g. when model alias'es are used)
healthy_endpoints, unhealthy_endpoints = await _perform_health_check( healthy_endpoints, unhealthy_endpoints = await _perform_health_check(
model_list, details model_list, details
) )

View file

@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback(
team_callback_settings_obj.success_callback = [] team_callback_settings_obj.success_callback = []
if team_callback_settings_obj.failure_callback is None: if team_callback_settings_obj.failure_callback is None:
team_callback_settings_obj.failure_callback = [] team_callback_settings_obj.failure_callback = []
if data.callback_name not in team_callback_settings_obj.success_callback: if data.callback_name not in team_callback_settings_obj.success_callback:
team_callback_settings_obj.success_callback.append(data.callback_name) team_callback_settings_obj.success_callback.append(data.callback_name)
if data.callback_name in team_callback_settings_obj.failure_callback: if data.callback_name not in team_callback_settings_obj.failure_callback:
team_callback_settings_obj.failure_callback.append(data.callback_name) team_callback_settings_obj.failure_callback.append(data.callback_name)
for var, value in data.callback_vars.items(): for var, value in data.callback_vars.items():

View file

@ -109,8 +109,8 @@ async def add_new_member(
where={"user_id": user_info.user_id}, # type: ignore where={"user_id": user_info.user_id}, # type: ignore
data={"teams": {"push": [team_id]}}, data={"teams": {"push": [team_id]}},
) )
if _returned_user is not None:
returned_user = LiteLLM_UserTable(**_returned_user.model_dump()) returned_user = LiteLLM_UserTable(**_returned_user.model_dump())
elif len(existing_user_row) > 1: elif len(existing_user_row) > 1:
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,

View file

@ -1,19 +1,19 @@
model_list: model_list:
- model_name: openai/* - model_name: openai/*
litellm_params: litellm_params:
model: gpt-3.5-turbo model: openai/*
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
model_info:
litellm_settings: id: "good-openai"
success_callback: ["prometheus"] - model_name: openai/*
failure_callback: ["prometheus"]
guardrails:
- guardrail_name: "presidio-pre-guard"
litellm_params: litellm_params:
guardrail: presidio # supported values: "aporia", "lakera", "presidio" model: openai/*
mode: "pre_call" # pre_call, during_call, post_call api_key: os.environ/non-exsitent-env-var
output_parse_pii: True tags: ["bad-model"]
model_info:
id: "test-openai"
litellm_settings: litellm_settings:
callbacks: ["otel"] callbacks: ["otel"]
@ -22,8 +22,16 @@ callback_settings:
otel: otel:
message_logging: False message_logging: False
router_settings:
enable_tag_filtering: True # 👈 Key Chang
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
alerting: ["slack"] alerting: ["slack"]
spend_report_frequency: "1d" spend_report_frequency: "1d"
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]

View file

@ -3690,7 +3690,7 @@ class Router:
exception=original_exception, exception=original_exception,
) )
allowed_fails = _allowed_fails or self.allowed_fails allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails
dt = get_utc_datetime() dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M") current_minute = dt.strftime("%H-%M")
@ -4556,6 +4556,27 @@ class Router:
ids.append(id) ids.append(id)
return ids return ids
def _get_all_deployments(
self, model_name: str, model_alias: Optional[str] = None
) -> List[DeploymentTypedDict]:
"""
Return all deployments of a model name
Used for accurate 'get_model_list'.
"""
returned_models: List[DeploymentTypedDict] = []
for model in self.model_list:
if model["model_name"] == model_name:
if model_alias is not None:
alias_model = copy.deepcopy(model)
alias_model["model_name"] = model_name
returned_models.append(alias_model)
else:
returned_models.append(model)
return returned_models
def get_model_names(self) -> List[str]: def get_model_names(self) -> List[str]:
""" """
Returns all possible model names for router. Returns all possible model names for router.
@ -4567,15 +4588,18 @@ class Router:
def get_model_list( def get_model_list(
self, model_name: Optional[str] = None self, model_name: Optional[str] = None
) -> Optional[List[DeploymentTypedDict]]: ) -> Optional[List[DeploymentTypedDict]]:
"""
Includes router model_group_alias'es as well
"""
if hasattr(self, "model_list"): if hasattr(self, "model_list"):
returned_models: List[DeploymentTypedDict] = [] returned_models: List[DeploymentTypedDict] = []
for model_alias, model_value in self.model_group_alias.items(): for model_alias, model_value in self.model_group_alias.items():
model_alias_item = DeploymentTypedDict( returned_models.extend(
model_name=model_alias, self._get_all_deployments(
litellm_params=LiteLLMParamsTypedDict(model=model_value), model_name=model_value, model_alias=model_alias
)
) )
returned_models.append(model_alias_item)
if model_name is None: if model_name is None:
returned_models += self.model_list returned_models += self.model_list
@ -4583,8 +4607,7 @@ class Router:
return returned_models return returned_models
for model in self.model_list: for model in self.model_list:
if model["model_name"] == model_name: returned_models.extend(self._get_all_deployments(model_name=model_name))
returned_models.append(model)
return returned_models return returned_models
return None return None

View file

@ -1,5 +1,9 @@
""" """
Use this to route requests between free and paid tiers Use this to route requests between Teams
- If tags in request is a subset of tags in deployment, return deployment
- if deployments are set with default tags, return all default deployment
- If no default_deployments are set, return all deployments
""" """
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
@ -25,14 +29,14 @@ async def get_deployments_for_tag(
if request_kwargs is None: if request_kwargs is None:
verbose_logger.debug( verbose_logger.debug(
"get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s", "get_deployments_for_tag: request_kwargs is None returning healthy_deployments: %s",
healthy_deployments, healthy_deployments,
) )
return healthy_deployments return healthy_deployments
if healthy_deployments is None: if healthy_deployments is None:
verbose_logger.debug( verbose_logger.debug(
"get_deployments_for_tier: healthy_deployments is None returning healthy_deployments" "get_deployments_for_tag: healthy_deployments is None returning healthy_deployments"
) )
return healthy_deployments return healthy_deployments
@ -43,7 +47,9 @@ async def get_deployments_for_tag(
new_healthy_deployments = [] new_healthy_deployments = []
if request_tags: if request_tags:
verbose_logger.debug("parameter routing: router_keys: %s", request_tags) verbose_logger.debug(
"get_deployments_for_tag routing: router_keys: %s", request_tags
)
# example this can be router_keys=["free", "custom"] # example this can be router_keys=["free", "custom"]
# get all deployments that have a superset of these router keys # get all deployments that have a superset of these router keys
for deployment in healthy_deployments: for deployment in healthy_deployments:
@ -66,9 +72,26 @@ async def get_deployments_for_tag(
request_tags, request_tags,
) )
new_healthy_deployments.append(deployment) new_healthy_deployments.append(deployment)
elif "default" in deployment_tags:
verbose_logger.debug(
"adding default deployment with tags: %s, request tags: %s",
deployment_tags,
request_tags,
)
new_healthy_deployments.append(deployment)
return new_healthy_deployments return new_healthy_deployments
# for Untagged requests use default deployments if set
_default_deployments_with_tags = []
for deployment in healthy_deployments:
if "default" in deployment.get("litellm_params", {}).get("tags", []):
_default_deployments_with_tags.append(deployment)
if len(_default_deployments_with_tags) > 0:
return _default_deployments_with_tags
# if no default deployment is found, return healthy_deployments
verbose_logger.debug( verbose_logger.debug(
"no tier found in metadata, returning healthy_deployments: %s", "no tier found in metadata, returning healthy_deployments: %s",
healthy_deployments, healthy_deployments,

View file

@ -626,6 +626,8 @@ async def test_model_function_invoke(model, sync_mode, api_key, api_base):
response = await litellm.acompletion(**data) response = await litellm.acompletion(**data)
print(f"response: {response}") print(f"response: {response}")
except litellm.InternalServerError:
pass
except litellm.RateLimitError as e: except litellm.RateLimitError as e:
pass pass
except Exception as e: except Exception as e:
@ -889,18 +891,29 @@ def encode_image(image_path):
return base64.b64encode(image_file.read()).decode("utf-8") return base64.b64encode(image_file.read()).decode("utf-8")
@pytest.mark.skip( @pytest.mark.parametrize(
reason="we already test claude-3, this is just another way to pass images" "model",
) [
def test_completion_claude_3_base64(): "gpt-4o",
"azure/gpt-4o",
"anthropic/claude-3-opus-20240229",
],
) #
def test_completion_base64(model):
try: try:
import base64
import requests
litellm.set_verbose = True litellm.set_verbose = True
litellm.num_retries = 3 url = "https://dummyimage.com/100/100/fff&text=Test+image"
image_path = "../proxy/cached_logo.jpg" response = requests.get(url)
# Getting the base64 string file_data = response.content
base64_image = encode_image(image_path)
encoded_file = base64.b64encode(file_data).decode("utf-8")
base64_image = f"data:image/png;base64,{encoded_file}"
resp = litellm.completion( resp = litellm.completion(
model="anthropic/claude-3-opus-20240229", model=model,
messages=[ messages=[
{ {
"role": "user", "role": "user",
@ -908,9 +921,7 @@ def test_completion_claude_3_base64():
{"type": "text", "text": "Whats in this image?"}, {"type": "text", "text": "Whats in this image?"},
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {"url": base64_image},
"url": "data:image/jpeg;base64," + base64_image
},
}, },
], ],
} }
@ -919,7 +930,6 @@ def test_completion_claude_3_base64():
print(f"\nResponse: {resp}") print(f"\nResponse: {resp}")
prompt_tokens = resp.usage.prompt_tokens prompt_tokens = resp.usage.prompt_tokens
raise Exception("it worked!")
except Exception as e: except Exception as e:
if "500 Internal error encountered.'" in str(e): if "500 Internal error encountered.'" in str(e):
pass pass
@ -2174,15 +2184,16 @@ def test_completion_openai():
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model", "model, api_version",
[ [
"gpt-4o-2024-08-06", ("gpt-4o-2024-08-06", None),
"azure/chatgpt-v-2", ("azure/chatgpt-v-2", None),
"bedrock/anthropic.claude-3-sonnet-20240229-v1:0", ("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", None),
("azure/gpt-4o", "2024-08-01-preview"),
], ],
) )
@pytest.mark.flaky(retries=3, delay=1) @pytest.mark.flaky(retries=3, delay=1)
def test_completion_openai_pydantic(model): def test_completion_openai_pydantic(model, api_version):
try: try:
litellm.set_verbose = True litellm.set_verbose = True
from pydantic import BaseModel from pydantic import BaseModel
@ -2207,6 +2218,7 @@ def test_completion_openai_pydantic(model):
messages=messages, messages=messages,
metadata={"hi": "bye"}, metadata={"hi": "bye"},
response_format=EventsList, response_format=EventsList,
api_version=api_version,
) )
break break
except litellm.JSONSchemaValidationError: except litellm.JSONSchemaValidationError:
@ -3469,14 +3481,14 @@ def response_format_tests(response: litellm.ModelResponse):
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model", "model",
[ [
# "bedrock/cohere.command-r-plus-v1:0", "bedrock/mistral.mistral-large-2407-v1:0",
"bedrock/cohere.command-r-plus-v1:0",
"anthropic.claude-3-sonnet-20240229-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0",
# "anthropic.claude-instant-v1", "anthropic.claude-instant-v1",
# "bedrock/ai21.j2-mid", "mistral.mistral-7b-instruct-v0:2",
# "mistral.mistral-7b-instruct-v0:2",
# "bedrock/amazon.titan-tg1-large", # "bedrock/amazon.titan-tg1-large",
# "meta.llama3-8b-instruct-v1:0", "meta.llama3-8b-instruct-v1:0",
# "cohere.command-text-v14", "cohere.command-text-v14",
], ],
) )
@pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.parametrize("sync_mode", [True, False])
@ -3491,6 +3503,7 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
messages=[{"role": "user", "content": "Hey! how's it going?"}], messages=[{"role": "user", "content": "Hey! how's it going?"}],
temperature=0.2, temperature=0.2,
max_tokens=200, max_tokens=200,
stop=["stop sequence"],
) )
assert isinstance(response, litellm.ModelResponse) assert isinstance(response, litellm.ModelResponse)
@ -3502,6 +3515,7 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
messages=[{"role": "user", "content": "Hey! how's it going?"}], messages=[{"role": "user", "content": "Hey! how's it going?"}],
temperature=0.2, temperature=0.2,
max_tokens=100, max_tokens=100,
stop=["stop sequence"],
) )
assert isinstance(response, litellm.ModelResponse) assert isinstance(response, litellm.ModelResponse)

View file

@ -1219,3 +1219,13 @@ def test_completion_cost_anthropic_prompt_caching():
cost_2 = completion_cost(model=model, completion_response=response_2) cost_2 = completion_cost(model=model, completion_response=response_2)
assert cost_1 > cost_2 assert cost_1 > cost_2
def test_completion_cost_databricks():
model, messages = "databricks/databricks-dbrx-instruct", [
{"role": "user", "content": "What is 2+2?"}
]
resp = litellm.completion(model=model, messages=messages) # works fine
cost = completion_cost(completion_response=resp)

View file

@ -864,7 +864,7 @@ def _pre_call_utils(
data["messages"] = [{"role": "user", "content": "Hello world"}] data["messages"] = [{"role": "user", "content": "Hello world"}]
if streaming is True: if streaming is True:
data["stream"] = True data["stream"] = True
mapped_target = client.chat.completions.with_raw_response mapped_target = client.chat.completions.with_raw_response # type: ignore
if sync_mode: if sync_mode:
original_function = litellm.completion original_function = litellm.completion
else: else:
@ -873,7 +873,7 @@ def _pre_call_utils(
data["prompt"] = "Hello world" data["prompt"] = "Hello world"
if streaming is True: if streaming is True:
data["stream"] = True data["stream"] = True
mapped_target = client.completions.with_raw_response mapped_target = client.completions.with_raw_response # type: ignore
if sync_mode: if sync_mode:
original_function = litellm.text_completion original_function = litellm.text_completion
else: else:

View file

@ -52,6 +52,7 @@ def get_current_weather(location, unit="fahrenheit"):
# "anthropic.claude-3-sonnet-20240229-v1:0", # "anthropic.claude-3-sonnet-20240229-v1:0",
], ],
) )
@pytest.mark.flaky(retries=3, delay=1)
def test_aaparallel_function_call(model): def test_aaparallel_function_call(model):
try: try:
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client): @pytest.mark.parametrize(
"callback_type, expected_success_callbacks, expected_failure_callbacks",
[
("success", ["langfuse"], []),
("failure", [], ["langfuse"]),
("success_and_failure", ["langfuse"], ["langfuse"]),
],
)
async def test_add_callback_via_key_litellm_pre_call_utils(
prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks
):
import json import json
from fastapi import HTTPException, Request, Response from fastapi import HTTPException, Request, Response
@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
"logging": [ "logging": [
{ {
"callback_name": "langfuse", "callback_name": "langfuse",
"callback_type": "success", "callback_type": callback_type,
"callback_vars": { "callback_vars": {
"langfuse_public_key": "my-mock-public-key", "langfuse_public_key": "my-mock-public-key",
"langfuse_secret_key": "my-mock-secret-key", "langfuse_secret_key": "my-mock-secret-key",
@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
} }
new_data = await add_litellm_data_to_request(**data) new_data = await add_litellm_data_to_request(**data)
print("NEW DATA: {}".format(new_data))
assert "success_callback" in new_data
assert new_data["success_callback"] == ["langfuse"]
assert "langfuse_public_key" in new_data assert "langfuse_public_key" in new_data
assert new_data["langfuse_public_key"] == "my-mock-public-key" assert new_data["langfuse_public_key"] == "my-mock-public-key"
assert "langfuse_secret_key" in new_data assert "langfuse_secret_key" in new_data
assert new_data["langfuse_secret_key"] == "my-mock-secret-key" assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
if expected_success_callbacks:
assert "success_callback" in new_data
assert new_data["success_callback"] == expected_success_callbacks
if expected_failure_callbacks:
assert "failure_callback" in new_data
assert new_data["failure_callback"] == expected_failure_callbacks
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_gemini_pass_through_endpoint(): async def test_gemini_pass_through_endpoint():

View file

@ -91,3 +91,72 @@ async def test_router_free_paid_tier():
print("response_extra_info: ", response_extra_info) print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "very-expensive-model" assert response_extra_info["model_id"] == "very-expensive-model"
@pytest.mark.asyncio()
async def test_default_tagged_deployments():
"""
- only use default deployment for untagged requests
- if a request has tag "default", use default deployment
"""
router = litellm.Router(
model_list=[
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"tags": ["default"],
},
"model_info": {"id": "default-model"},
},
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
},
"model_info": {"id": "default-model-2"},
},
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o-mini",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"tags": ["teamA"],
},
"model_info": {"id": "very-expensive-model"},
},
],
enable_tag_filtering=True,
)
for _ in range(5):
# Untagged request, this should pick model with id == "default-model"
response = await router.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Tell me a joke."}],
)
print("Response: ", response)
response_extra_info = response._hidden_params
print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "default-model"
for _ in range(5):
# requests tagged with "default", this should pick model with id == "default-model"
response = await router.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Tell me a joke."}],
metadata={"tags": ["default"]},
)
print("Response: ", response)
response_extra_info = response._hidden_params
print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "default-model"

View file

@ -4239,3 +4239,14 @@ def test_completion_vllm():
mock_call.assert_called_once() mock_call.assert_called_once()
assert "hello" in mock_call.call_args.kwargs["extra_body"] assert "hello" in mock_call.call_args.kwargs["extra_body"]
def test_completion_fireworks_ai_multiple_choices():
litellm.set_verbose = True
response = litellm.text_completion(
model="fireworks_ai/llama-v3p1-8b-instruct",
prompt=["halo", "hi", "halo", "hi"],
)
print(response.choices)
assert len(response.choices) == 4

View file

@ -2512,16 +2512,16 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_token": 0.00000035, "input_cost_per_token": 0.000000075,
"input_cost_per_token_above_128k_tokens": 0.0000007, "input_cost_per_token_above_128k_tokens": 0.00000015,
"output_cost_per_token": 0.00000105, "output_cost_per_token": 0.0000003,
"output_cost_per_token_above_128k_tokens": 0.0000021, "output_cost_per_token_above_128k_tokens": 0.0000006,
"litellm_provider": "gemini", "litellm_provider": "gemini",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
"supports_function_calling": true, "supports_function_calling": true,
"supports_vision": true, "supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" "source": "https://ai.google.dev/pricing"
}, },
"gemini/gemini-1.5-flash-latest": { "gemini/gemini-1.5-flash-latest": {
"max_tokens": 8192, "max_tokens": 8192,
@ -2533,16 +2533,16 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_token": 0.00000035, "input_cost_per_token": 0.000000075,
"input_cost_per_token_above_128k_tokens": 0.0000007, "input_cost_per_token_above_128k_tokens": 0.00000015,
"output_cost_per_token": 0.00000105, "output_cost_per_token": 0.0000003,
"output_cost_per_token_above_128k_tokens": 0.0000021, "output_cost_per_token_above_128k_tokens": 0.0000006,
"litellm_provider": "gemini", "litellm_provider": "gemini",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
"supports_function_calling": true, "supports_function_calling": true,
"supports_vision": true, "supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" "source": "https://ai.google.dev/pricing"
}, },
"gemini/gemini-pro": { "gemini/gemini-pro": {
"max_tokens": 8192, "max_tokens": 8192,

View file

@ -148,6 +148,7 @@ router_settings:
redis_password: os.environ/REDIS_PASSWORD redis_password: os.environ/REDIS_PASSWORD
redis_port: os.environ/REDIS_PORT redis_port: os.environ/REDIS_PORT
enable_pre_call_checks: true enable_pre_call_checks: true
model_group_alias: {"my-special-fake-model-alias-name": "fake-openai-endpoint-3"}
general_settings: general_settings:
master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.44.22" version = "1.44.23"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.44.22" version = "1.44.23"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]