mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Merge branch 'main' into litellm_allow_turning_off_message_logging_for_callbacks
This commit is contained in:
commit
02325f33d7
34 changed files with 442 additions and 117 deletions
|
@ -25,6 +25,13 @@ model_list:
|
|||
model: openai/gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
tags: ["paid"] # 👈 Key Change
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: openai/gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
|
||||
|
||||
|
||||
router_settings:
|
||||
enable_tag_filtering: True # 👈 Key Change
|
||||
|
@ -136,6 +143,46 @@ Response
|
|||
}
|
||||
```
|
||||
|
||||
## Setting Default Tags
|
||||
|
||||
Use this if you want all untagged requests to be routed to specific deployments
|
||||
|
||||
1. Set default tag on your yaml
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
tags: ["default"] # 👈 Key Change - All untagged requests will get routed to this
|
||||
model_info:
|
||||
id: "default-model" # used for identifying model in response headers
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
```shell
|
||||
$ litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Make request with no tags
|
||||
```shell
|
||||
curl -i http://localhost:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "fake-openai-endpoint",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, Claude gm!"}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Expect to see the following response header when this works
|
||||
```shell
|
||||
x-litellm-model-id: default-model
|
||||
```
|
||||
|
||||
## ✨ Team based tag routing (Enterprise)
|
||||
|
||||
LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams)
|
||||
|
@ -170,6 +217,12 @@ Here's how to set up and use team-based tag routing using curl commands:
|
|||
tags: ["teamB"] # 👈 Key Change
|
||||
model_info:
|
||||
id: "team-b-model" # used for identifying model in response headers
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
|
||||
|
||||
router_settings:
|
||||
enable_tag_filtering: True # 👈 Key Change
|
||||
|
|
|
@ -208,8 +208,8 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
|||
-d '{
|
||||
"metadata": {
|
||||
"logging": [{
|
||||
"callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
|
||||
"callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default
|
||||
"callback_name": "langfuse", # "otel", "langfuse", "lunary"
|
||||
"callback_type": "success", # "success", "failure", "success_and_failure"
|
||||
"callback_vars": {
|
||||
"langfuse_public_key": "os.environ/LANGFUSE_PUBLIC_KEY", # [RECOMMENDED] reference key in proxy environment
|
||||
"langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment
|
||||
|
|
|
@ -118,7 +118,7 @@ in_memory_llm_clients_cache: dict = {}
|
|||
safe_memory_mode: bool = False
|
||||
enable_azure_ad_token_refresh: Optional[bool] = False
|
||||
### DEFAULT AZURE API VERSION ###
|
||||
AZURE_DEFAULT_API_VERSION = "2024-07-01-preview" # this is updated to the latest
|
||||
AZURE_DEFAULT_API_VERSION = "2024-08-01-preview" # this is updated to the latest
|
||||
### COHERE EMBEDDINGS DEFAULT TYPE ###
|
||||
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document"
|
||||
### GUARDRAILS ###
|
||||
|
@ -483,7 +483,12 @@ openai_compatible_providers: List = [
|
|||
"azure_ai",
|
||||
"github",
|
||||
]
|
||||
|
||||
openai_text_completion_compatible_providers: List = (
|
||||
[ # providers that support `/v1/completions`
|
||||
"together_ai",
|
||||
"fireworks_ai",
|
||||
]
|
||||
)
|
||||
|
||||
# well supported replicate llms
|
||||
replicate_models: List = [
|
||||
|
@ -863,7 +868,7 @@ from .llms.custom_llm import CustomLLM
|
|||
from .llms.huggingface_restapi import HuggingfaceConfig
|
||||
from .llms.anthropic.chat import AnthropicConfig
|
||||
from .llms.anthropic.completion import AnthropicTextConfig
|
||||
from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig
|
||||
from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
|
||||
from .llms.predibase import PredibaseConfig
|
||||
from .llms.replicate import ReplicateConfig
|
||||
from .llms.cohere.completion import CohereConfig
|
||||
|
|
|
@ -22,6 +22,9 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_cha
|
|||
from litellm.llms.anthropic.cost_calculation import (
|
||||
cost_per_token as anthropic_cost_per_token,
|
||||
)
|
||||
from litellm.llms.databricks.cost_calculator import (
|
||||
cost_per_token as databricks_cost_per_token,
|
||||
)
|
||||
from litellm.rerank_api.types import RerankResponse
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
|
@ -159,7 +162,7 @@ def cost_per_token(
|
|||
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
|
||||
|
||||
model_without_prefix = model
|
||||
model_parts = model.split("/")
|
||||
model_parts = model.split("/", 1)
|
||||
if len(model_parts) > 1:
|
||||
model_without_prefix = model_parts[1]
|
||||
else:
|
||||
|
@ -212,6 +215,8 @@ def cost_per_token(
|
|||
)
|
||||
elif custom_llm_provider == "anthropic":
|
||||
return anthropic_cost_per_token(model=model, usage=usage_block)
|
||||
elif custom_llm_provider == "databricks":
|
||||
return databricks_cost_per_token(model=model, usage=usage_block)
|
||||
elif custom_llm_provider == "gemini":
|
||||
return google_cost_per_token(
|
||||
model=model_without_prefix,
|
||||
|
|
|
@ -649,7 +649,9 @@ class OpenTelemetry(CustomLogger):
|
|||
return BatchSpanProcessor(
|
||||
OTLPSpanExporterHTTP(
|
||||
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
|
||||
)
|
||||
),
|
||||
max_queue_size=100,
|
||||
max_export_batch_size=100,
|
||||
)
|
||||
elif self.OTEL_EXPORTER == "otlp_grpc":
|
||||
verbose_logger.debug(
|
||||
|
@ -659,7 +661,9 @@ class OpenTelemetry(CustomLogger):
|
|||
return BatchSpanProcessor(
|
||||
OTLPSpanExporterGRPC(
|
||||
endpoint=self.OTEL_ENDPOINT, headers=_split_otel_headers
|
||||
)
|
||||
),
|
||||
max_queue_size=100,
|
||||
max_export_batch_size=100,
|
||||
)
|
||||
else:
|
||||
verbose_logger.debug(
|
||||
|
|
|
@ -2333,6 +2333,8 @@ def get_standard_logging_object_payload(
|
|||
completion_start_time_float = completion_start_time.timestamp()
|
||||
elif isinstance(completion_start_time, float):
|
||||
completion_start_time_float = completion_start_time
|
||||
else:
|
||||
completion_start_time_float = end_time_float
|
||||
# clean up litellm hidden params
|
||||
clean_hidden_params = StandardLoggingHiddenParams(
|
||||
model_id=None,
|
||||
|
|
|
@ -245,7 +245,10 @@ class AzureOpenAIConfig:
|
|||
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
|
||||
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective.
|
||||
"""
|
||||
if json_schema is not None:
|
||||
if json_schema is not None and (
|
||||
(api_version_year <= "2024" and api_version_month < "08")
|
||||
or "gpt-4o" not in model
|
||||
): # azure api version "2024-08-01-preview" onwards supports 'json_schema' only for gpt-4o
|
||||
_tool_choice = ChatCompletionToolChoiceObjectParam(
|
||||
type="function",
|
||||
function=ChatCompletionToolChoiceFunctionParam(
|
||||
|
|
|
@ -1263,6 +1263,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
|
||||
error_headers = getattr(e, "headers", None)
|
||||
if response is not None and hasattr(response, "text"):
|
||||
error_headers = getattr(e, "headers", None)
|
||||
raise OpenAIError(
|
||||
status_code=500,
|
||||
message=f"{str(e)}\n\nOriginal Response: {response.text}",
|
||||
|
@ -1800,12 +1801,11 @@ class OpenAITextCompletion(BaseLLM):
|
|||
headers: Optional[dict] = None,
|
||||
):
|
||||
super().completion()
|
||||
exception_mapping_worked = False
|
||||
try:
|
||||
if headers is None:
|
||||
headers = self.validate_environment(api_key=api_key)
|
||||
if model is None or messages is None:
|
||||
raise OpenAIError(status_code=422, message=f"Missing model or messages")
|
||||
raise OpenAIError(status_code=422, message="Missing model or messages")
|
||||
|
||||
if (
|
||||
len(messages) > 0
|
||||
|
|
|
@ -162,11 +162,10 @@ class AzureTextCompletion(BaseLLM):
|
|||
client=None,
|
||||
):
|
||||
super().completion()
|
||||
exception_mapping_worked = False
|
||||
try:
|
||||
if model is None or messages is None:
|
||||
raise AzureOpenAIError(
|
||||
status_code=422, message=f"Missing model or messages"
|
||||
status_code=422, message="Missing model or messages"
|
||||
)
|
||||
|
||||
max_retries = optional_params.pop("max_retries", 2)
|
||||
|
@ -293,7 +292,10 @@ class AzureTextCompletion(BaseLLM):
|
|||
"api-version", api_version
|
||||
)
|
||||
|
||||
response = azure_client.completions.create(**data, timeout=timeout) # type: ignore
|
||||
raw_response = azure_client.completions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
response = raw_response.parse()
|
||||
stringified_response = response.model_dump()
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
|
@ -380,13 +382,15 @@ class AzureTextCompletion(BaseLLM):
|
|||
"complete_input_dict": data,
|
||||
},
|
||||
)
|
||||
response = await azure_client.completions.create(**data, timeout=timeout)
|
||||
raw_response = await azure_client.completions.with_raw_response.create(
|
||||
**data, timeout=timeout
|
||||
)
|
||||
response = raw_response.parse()
|
||||
return openai_text_completion_config.convert_to_chat_model_response_object(
|
||||
response_object=response.model_dump(),
|
||||
model_response_object=model_response,
|
||||
)
|
||||
except AzureOpenAIError as e:
|
||||
exception_mapping_worked = True
|
||||
raise e
|
||||
except Exception as e:
|
||||
status_code = getattr(e, "status_code", 500)
|
||||
|
|
|
@ -736,7 +736,9 @@ class BedrockLLM(BaseAWSLLM):
|
|||
|
||||
if (stream is not None and stream is True) and provider != "ai21":
|
||||
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke-with-response-stream"
|
||||
proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke-with-response-stream"
|
||||
proxy_endpoint_url = (
|
||||
f"{proxy_endpoint_url}/model/{modelId}/invoke-with-response-stream"
|
||||
)
|
||||
else:
|
||||
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke"
|
||||
proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke"
|
||||
|
@ -1268,7 +1270,7 @@ class AmazonConverseConfig:
|
|||
if len(value) == 0: # converse raises error for empty strings
|
||||
continue
|
||||
value = [value]
|
||||
optional_params["stop_sequences"] = value
|
||||
optional_params["stopSequences"] = value
|
||||
if param == "temperature":
|
||||
optional_params["temperature"] = value
|
||||
if param == "top_p":
|
||||
|
|
|
@ -29,8 +29,8 @@ from litellm.types.utils import (
|
|||
)
|
||||
from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage
|
||||
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
from ..base import BaseLLM
|
||||
from ..prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
|
||||
class DatabricksError(Exception):
|
||||
|
@ -328,6 +328,7 @@ class DatabricksChatCompletion(BaseLLM):
|
|||
api_base: str,
|
||||
custom_prompt_dict: dict,
|
||||
model_response: ModelResponse,
|
||||
custom_llm_provider: str,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
api_key,
|
||||
|
@ -371,6 +372,8 @@ class DatabricksChatCompletion(BaseLLM):
|
|||
)
|
||||
response = ModelResponse(**response_json)
|
||||
|
||||
response.model = custom_llm_provider + "/" + response.model
|
||||
|
||||
if base_model is not None:
|
||||
response._hidden_params["model"] = base_model
|
||||
return response
|
||||
|
@ -472,6 +475,7 @@ class DatabricksChatCompletion(BaseLLM):
|
|||
data=data,
|
||||
api_base=api_base,
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
encoding=encoding,
|
||||
|
@ -528,6 +532,8 @@ class DatabricksChatCompletion(BaseLLM):
|
|||
|
||||
response = ModelResponse(**response_json)
|
||||
|
||||
response.model = custom_llm_provider + "/" + response.model
|
||||
|
||||
if base_model is not None:
|
||||
response._hidden_params["model"] = base_model
|
||||
|
39
litellm/llms/databricks/cost_calculator.py
Normal file
39
litellm/llms/databricks/cost_calculator.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
"""
|
||||
Helper util for handling databricks-specific cost calculation
|
||||
- e.g.: handling 'dbrx-instruct-*'
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from litellm.types.utils import Usage
|
||||
from litellm.utils import get_model_info
|
||||
|
||||
|
||||
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
- usage: LiteLLM Usage block, containing anthropic caching information
|
||||
|
||||
Returns:
|
||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||
"""
|
||||
base_model = model
|
||||
if model.startswith("databricks/dbrx-instruct") or model.startswith(
|
||||
"dbrx-instruct"
|
||||
):
|
||||
base_model = "databricks-dbrx-instruct"
|
||||
|
||||
## GET MODEL INFO
|
||||
model_info = get_model_info(model=base_model, custom_llm_provider="databricks")
|
||||
|
||||
## CALCULATE INPUT COST
|
||||
|
||||
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
completion_cost = usage["completion_tokens"] * model_info["output_cost_per_token"]
|
||||
|
||||
return prompt_cost, completion_cost
|
|
@ -273,7 +273,7 @@ class SagemakerLLM(BaseAWSLLM):
|
|||
model_id = optional_params.get("model_id", None)
|
||||
|
||||
if use_messages_api is True:
|
||||
from litellm.llms.databricks import DatabricksChatCompletion
|
||||
from litellm.llms.databricks.chat import DatabricksChatCompletion
|
||||
|
||||
openai_like_chat_completions = DatabricksChatCompletion()
|
||||
inference_params["stream"] = True if stream is True else False
|
||||
|
|
|
@ -80,7 +80,7 @@ class VertexAIPartnerModels(BaseLLM):
|
|||
import vertexai
|
||||
from google.cloud import aiplatform
|
||||
|
||||
from litellm.llms.databricks import DatabricksChatCompletion
|
||||
from litellm.llms.databricks.chat import DatabricksChatCompletion
|
||||
from litellm.llms.OpenAI.openai import OpenAIChatCompletion
|
||||
from litellm.llms.text_completion_codestral import CodestralTextCompletion
|
||||
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
||||
|
|
|
@ -92,7 +92,7 @@ from .llms.cohere import chat as cohere_chat
|
|||
from .llms.cohere import completion as cohere_completion # type: ignore
|
||||
from .llms.cohere import embed as cohere_embed
|
||||
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
||||
from .llms.databricks import DatabricksChatCompletion
|
||||
from .llms.databricks.chat import DatabricksChatCompletion
|
||||
from .llms.huggingface_restapi import Huggingface
|
||||
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
|
||||
from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
|
||||
|
@ -1013,7 +1013,10 @@ def completion(
|
|||
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE")
|
||||
|
||||
api_version = (
|
||||
api_version or litellm.api_version or get_secret("AZURE_API_VERSION")
|
||||
api_version
|
||||
or litellm.api_version
|
||||
or get_secret("AZURE_API_VERSION")
|
||||
or litellm.AZURE_DEFAULT_API_VERSION
|
||||
)
|
||||
|
||||
api_key = (
|
||||
|
@ -1209,6 +1212,9 @@ def completion(
|
|||
custom_llm_provider == "text-completion-openai"
|
||||
or "ft:babbage-002" in model
|
||||
or "ft:davinci-002" in model # support for finetuned completion models
|
||||
or custom_llm_provider
|
||||
in litellm.openai_text_completion_compatible_providers
|
||||
and kwargs.get("text_completion") is True
|
||||
):
|
||||
openai.api_type = "openai"
|
||||
|
||||
|
@ -4099,8 +4105,8 @@ def text_completion(
|
|||
|
||||
kwargs.pop("prompt", None)
|
||||
|
||||
if (
|
||||
_model is not None and custom_llm_provider == "openai"
|
||||
if _model is not None and (
|
||||
custom_llm_provider == "openai"
|
||||
): # for openai compatible endpoints - e.g. vllm, call the native /v1/completions endpoint for text completion calls
|
||||
if _model not in litellm.open_ai_chat_completion_models:
|
||||
model = "text-completion-openai/" + _model
|
||||
|
|
|
@ -2512,16 +2512,16 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_token": 0.00000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
||||
"output_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-latest": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -2533,16 +2533,16 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_token": 0.00000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
||||
"output_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-pro": {
|
||||
"max_tokens": 8192,
|
||||
|
|
|
@ -1,16 +1,9 @@
|
|||
model_list:
|
||||
- model_name: "anthropic/claude-3-5-sonnet-20240620"
|
||||
- model_name: "gpt-turbo"
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-5-sonnet-20240620
|
||||
# api_base: http://0.0.0.0:9000
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
model: azure/chatgpt-v-2
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["s3"]
|
||||
s3_callback_params:
|
||||
s3_bucket_name: litellm-logs # AWS Bucket Name for S3
|
||||
s3_region_name: us-west-2 # AWS Region Name for S3
|
||||
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
|
||||
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
|
||||
router_settings:
|
||||
model_group_alias: {"gpt-4": "gpt-turbo"}
|
|
@ -242,6 +242,9 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/v1/models",
|
||||
# token counter
|
||||
"/utils/token_counter",
|
||||
# rerank
|
||||
"/rerank",
|
||||
"/v1/rerank",
|
||||
]
|
||||
|
||||
mapped_pass_through_routes: List = [
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
import asyncio
|
||||
import logging
|
||||
import random
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose
|
||||
|
@ -36,6 +36,25 @@ def _clean_endpoint_data(endpoint_data: dict, details: Optional[bool] = True):
|
|||
)
|
||||
|
||||
|
||||
def filter_deployments_by_id(
|
||||
model_list: List,
|
||||
) -> List:
|
||||
seen_ids = set()
|
||||
filtered_deployments = []
|
||||
|
||||
for deployment in model_list:
|
||||
_model_info = deployment.get("model_info") or {}
|
||||
_id = _model_info.get("id") or None
|
||||
if _id is None:
|
||||
continue
|
||||
|
||||
if _id not in seen_ids:
|
||||
seen_ids.add(_id)
|
||||
filtered_deployments.append(deployment)
|
||||
|
||||
return filtered_deployments
|
||||
|
||||
|
||||
async def _perform_health_check(model_list: list, details: Optional[bool] = True):
|
||||
"""
|
||||
Perform a health check for each model in the list.
|
||||
|
@ -105,6 +124,9 @@ async def perform_health_check(
|
|||
_new_model_list = [x for x in model_list if x["model_name"] == model]
|
||||
model_list = _new_model_list
|
||||
|
||||
model_list = filter_deployments_by_id(
|
||||
model_list=model_list
|
||||
) # filter duplicate deployments (e.g. when model alias'es are used)
|
||||
healthy_endpoints, unhealthy_endpoints = await _perform_health_check(
|
||||
model_list, details
|
||||
)
|
||||
|
|
|
@ -86,10 +86,11 @@ def convert_key_logging_metadata_to_callback(
|
|||
team_callback_settings_obj.success_callback = []
|
||||
if team_callback_settings_obj.failure_callback is None:
|
||||
team_callback_settings_obj.failure_callback = []
|
||||
|
||||
if data.callback_name not in team_callback_settings_obj.success_callback:
|
||||
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||
|
||||
if data.callback_name in team_callback_settings_obj.failure_callback:
|
||||
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||
|
||||
for var, value in data.callback_vars.items():
|
||||
|
|
|
@ -109,7 +109,7 @@ async def add_new_member(
|
|||
where={"user_id": user_info.user_id}, # type: ignore
|
||||
data={"teams": {"push": [team_id]}},
|
||||
)
|
||||
|
||||
if _returned_user is not None:
|
||||
returned_user = LiteLLM_UserTable(**_returned_user.model_dump())
|
||||
elif len(existing_user_row) > 1:
|
||||
raise HTTPException(
|
||||
|
|
|
@ -1,19 +1,19 @@
|
|||
model_list:
|
||||
- model_name: openai/*
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: "presidio-pre-guard"
|
||||
model_info:
|
||||
id: "good-openai"
|
||||
- model_name: openai/*
|
||||
litellm_params:
|
||||
guardrail: presidio # supported values: "aporia", "lakera", "presidio"
|
||||
mode: "pre_call" # pre_call, during_call, post_call
|
||||
output_parse_pii: True
|
||||
model: openai/*
|
||||
api_key: os.environ/non-exsitent-env-var
|
||||
tags: ["bad-model"]
|
||||
model_info:
|
||||
id: "test-openai"
|
||||
|
||||
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["otel"]
|
||||
|
@ -22,8 +22,16 @@ callback_settings:
|
|||
otel:
|
||||
message_logging: False
|
||||
|
||||
router_settings:
|
||||
enable_tag_filtering: True # 👈 Key Chang
|
||||
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
alerting: ["slack"]
|
||||
spend_report_frequency: "1d"
|
||||
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
|
@ -3690,7 +3690,7 @@ class Router:
|
|||
exception=original_exception,
|
||||
)
|
||||
|
||||
allowed_fails = _allowed_fails or self.allowed_fails
|
||||
allowed_fails = _allowed_fails if _allowed_fails is not None else self.allowed_fails
|
||||
|
||||
dt = get_utc_datetime()
|
||||
current_minute = dt.strftime("%H-%M")
|
||||
|
@ -4556,6 +4556,27 @@ class Router:
|
|||
ids.append(id)
|
||||
return ids
|
||||
|
||||
def _get_all_deployments(
|
||||
self, model_name: str, model_alias: Optional[str] = None
|
||||
) -> List[DeploymentTypedDict]:
|
||||
"""
|
||||
Return all deployments of a model name
|
||||
|
||||
Used for accurate 'get_model_list'.
|
||||
"""
|
||||
|
||||
returned_models: List[DeploymentTypedDict] = []
|
||||
for model in self.model_list:
|
||||
if model["model_name"] == model_name:
|
||||
if model_alias is not None:
|
||||
alias_model = copy.deepcopy(model)
|
||||
alias_model["model_name"] = model_name
|
||||
returned_models.append(alias_model)
|
||||
else:
|
||||
returned_models.append(model)
|
||||
|
||||
return returned_models
|
||||
|
||||
def get_model_names(self) -> List[str]:
|
||||
"""
|
||||
Returns all possible model names for router.
|
||||
|
@ -4567,15 +4588,18 @@ class Router:
|
|||
def get_model_list(
|
||||
self, model_name: Optional[str] = None
|
||||
) -> Optional[List[DeploymentTypedDict]]:
|
||||
"""
|
||||
Includes router model_group_alias'es as well
|
||||
"""
|
||||
if hasattr(self, "model_list"):
|
||||
returned_models: List[DeploymentTypedDict] = []
|
||||
|
||||
for model_alias, model_value in self.model_group_alias.items():
|
||||
model_alias_item = DeploymentTypedDict(
|
||||
model_name=model_alias,
|
||||
litellm_params=LiteLLMParamsTypedDict(model=model_value),
|
||||
returned_models.extend(
|
||||
self._get_all_deployments(
|
||||
model_name=model_value, model_alias=model_alias
|
||||
)
|
||||
)
|
||||
returned_models.append(model_alias_item)
|
||||
|
||||
if model_name is None:
|
||||
returned_models += self.model_list
|
||||
|
@ -4583,8 +4607,7 @@ class Router:
|
|||
return returned_models
|
||||
|
||||
for model in self.model_list:
|
||||
if model["model_name"] == model_name:
|
||||
returned_models.append(model)
|
||||
returned_models.extend(self._get_all_deployments(model_name=model_name))
|
||||
|
||||
return returned_models
|
||||
return None
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
"""
|
||||
Use this to route requests between free and paid tiers
|
||||
Use this to route requests between Teams
|
||||
|
||||
- If tags in request is a subset of tags in deployment, return deployment
|
||||
- if deployments are set with default tags, return all default deployment
|
||||
- If no default_deployments are set, return all deployments
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||
|
@ -25,14 +29,14 @@ async def get_deployments_for_tag(
|
|||
|
||||
if request_kwargs is None:
|
||||
verbose_logger.debug(
|
||||
"get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s",
|
||||
"get_deployments_for_tag: request_kwargs is None returning healthy_deployments: %s",
|
||||
healthy_deployments,
|
||||
)
|
||||
return healthy_deployments
|
||||
|
||||
if healthy_deployments is None:
|
||||
verbose_logger.debug(
|
||||
"get_deployments_for_tier: healthy_deployments is None returning healthy_deployments"
|
||||
"get_deployments_for_tag: healthy_deployments is None returning healthy_deployments"
|
||||
)
|
||||
return healthy_deployments
|
||||
|
||||
|
@ -43,7 +47,9 @@ async def get_deployments_for_tag(
|
|||
|
||||
new_healthy_deployments = []
|
||||
if request_tags:
|
||||
verbose_logger.debug("parameter routing: router_keys: %s", request_tags)
|
||||
verbose_logger.debug(
|
||||
"get_deployments_for_tag routing: router_keys: %s", request_tags
|
||||
)
|
||||
# example this can be router_keys=["free", "custom"]
|
||||
# get all deployments that have a superset of these router keys
|
||||
for deployment in healthy_deployments:
|
||||
|
@ -66,9 +72,26 @@ async def get_deployments_for_tag(
|
|||
request_tags,
|
||||
)
|
||||
new_healthy_deployments.append(deployment)
|
||||
elif "default" in deployment_tags:
|
||||
verbose_logger.debug(
|
||||
"adding default deployment with tags: %s, request tags: %s",
|
||||
deployment_tags,
|
||||
request_tags,
|
||||
)
|
||||
new_healthy_deployments.append(deployment)
|
||||
|
||||
return new_healthy_deployments
|
||||
|
||||
# for Untagged requests use default deployments if set
|
||||
_default_deployments_with_tags = []
|
||||
for deployment in healthy_deployments:
|
||||
if "default" in deployment.get("litellm_params", {}).get("tags", []):
|
||||
_default_deployments_with_tags.append(deployment)
|
||||
|
||||
if len(_default_deployments_with_tags) > 0:
|
||||
return _default_deployments_with_tags
|
||||
|
||||
# if no default deployment is found, return healthy_deployments
|
||||
verbose_logger.debug(
|
||||
"no tier found in metadata, returning healthy_deployments: %s",
|
||||
healthy_deployments,
|
||||
|
|
|
@ -626,6 +626,8 @@ async def test_model_function_invoke(model, sync_mode, api_key, api_base):
|
|||
response = await litellm.acompletion(**data)
|
||||
|
||||
print(f"response: {response}")
|
||||
except litellm.InternalServerError:
|
||||
pass
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
|
@ -889,18 +891,29 @@ def encode_image(image_path):
|
|||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="we already test claude-3, this is just another way to pass images"
|
||||
)
|
||||
def test_completion_claude_3_base64():
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"gpt-4o",
|
||||
"azure/gpt-4o",
|
||||
"anthropic/claude-3-opus-20240229",
|
||||
],
|
||||
) #
|
||||
def test_completion_base64(model):
|
||||
try:
|
||||
import base64
|
||||
|
||||
import requests
|
||||
|
||||
litellm.set_verbose = True
|
||||
litellm.num_retries = 3
|
||||
image_path = "../proxy/cached_logo.jpg"
|
||||
# Getting the base64 string
|
||||
base64_image = encode_image(image_path)
|
||||
url = "https://dummyimage.com/100/100/fff&text=Test+image"
|
||||
response = requests.get(url)
|
||||
file_data = response.content
|
||||
|
||||
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||
base64_image = f"data:image/png;base64,{encoded_file}"
|
||||
resp = litellm.completion(
|
||||
model="anthropic/claude-3-opus-20240229",
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
|
@ -908,9 +921,7 @@ def test_completion_claude_3_base64():
|
|||
{"type": "text", "text": "Whats in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "data:image/jpeg;base64," + base64_image
|
||||
},
|
||||
"image_url": {"url": base64_image},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
@ -919,7 +930,6 @@ def test_completion_claude_3_base64():
|
|||
print(f"\nResponse: {resp}")
|
||||
|
||||
prompt_tokens = resp.usage.prompt_tokens
|
||||
raise Exception("it worked!")
|
||||
except Exception as e:
|
||||
if "500 Internal error encountered.'" in str(e):
|
||||
pass
|
||||
|
@ -2174,15 +2184,16 @@ def test_completion_openai():
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
"model, api_version",
|
||||
[
|
||||
"gpt-4o-2024-08-06",
|
||||
"azure/chatgpt-v-2",
|
||||
"bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
("gpt-4o-2024-08-06", None),
|
||||
("azure/chatgpt-v-2", None),
|
||||
("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", None),
|
||||
("azure/gpt-4o", "2024-08-01-preview"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
def test_completion_openai_pydantic(model):
|
||||
def test_completion_openai_pydantic(model, api_version):
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
from pydantic import BaseModel
|
||||
|
@ -2207,6 +2218,7 @@ def test_completion_openai_pydantic(model):
|
|||
messages=messages,
|
||||
metadata={"hi": "bye"},
|
||||
response_format=EventsList,
|
||||
api_version=api_version,
|
||||
)
|
||||
break
|
||||
except litellm.JSONSchemaValidationError:
|
||||
|
@ -3469,14 +3481,14 @@ def response_format_tests(response: litellm.ModelResponse):
|
|||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
# "bedrock/cohere.command-r-plus-v1:0",
|
||||
"bedrock/mistral.mistral-large-2407-v1:0",
|
||||
"bedrock/cohere.command-r-plus-v1:0",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
# "anthropic.claude-instant-v1",
|
||||
# "bedrock/ai21.j2-mid",
|
||||
# "mistral.mistral-7b-instruct-v0:2",
|
||||
"anthropic.claude-instant-v1",
|
||||
"mistral.mistral-7b-instruct-v0:2",
|
||||
# "bedrock/amazon.titan-tg1-large",
|
||||
# "meta.llama3-8b-instruct-v1:0",
|
||||
# "cohere.command-text-v14",
|
||||
"meta.llama3-8b-instruct-v1:0",
|
||||
"cohere.command-text-v14",
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
|
@ -3491,6 +3503,7 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
|
|||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||
temperature=0.2,
|
||||
max_tokens=200,
|
||||
stop=["stop sequence"],
|
||||
)
|
||||
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
|
@ -3502,6 +3515,7 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
|
|||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||
temperature=0.2,
|
||||
max_tokens=100,
|
||||
stop=["stop sequence"],
|
||||
)
|
||||
|
||||
assert isinstance(response, litellm.ModelResponse)
|
||||
|
|
|
@ -1219,3 +1219,13 @@ def test_completion_cost_anthropic_prompt_caching():
|
|||
cost_2 = completion_cost(model=model, completion_response=response_2)
|
||||
|
||||
assert cost_1 > cost_2
|
||||
|
||||
|
||||
def test_completion_cost_databricks():
|
||||
model, messages = "databricks/databricks-dbrx-instruct", [
|
||||
{"role": "user", "content": "What is 2+2?"}
|
||||
]
|
||||
|
||||
resp = litellm.completion(model=model, messages=messages) # works fine
|
||||
|
||||
cost = completion_cost(completion_response=resp)
|
||||
|
|
|
@ -864,7 +864,7 @@ def _pre_call_utils(
|
|||
data["messages"] = [{"role": "user", "content": "Hello world"}]
|
||||
if streaming is True:
|
||||
data["stream"] = True
|
||||
mapped_target = client.chat.completions.with_raw_response
|
||||
mapped_target = client.chat.completions.with_raw_response # type: ignore
|
||||
if sync_mode:
|
||||
original_function = litellm.completion
|
||||
else:
|
||||
|
@ -873,7 +873,7 @@ def _pre_call_utils(
|
|||
data["prompt"] = "Hello world"
|
||||
if streaming is True:
|
||||
data["stream"] = True
|
||||
mapped_target = client.completions.with_raw_response
|
||||
mapped_target = client.completions.with_raw_response # type: ignore
|
||||
if sync_mode:
|
||||
original_function = litellm.text_completion
|
||||
else:
|
||||
|
|
|
@ -52,6 +52,7 @@ def get_current_weather(location, unit="fahrenheit"):
|
|||
# "anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
],
|
||||
)
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
def test_aaparallel_function_call(model):
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -1255,7 +1255,17 @@ async def test_add_callback_via_key(prisma_client):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
||||
@pytest.mark.parametrize(
|
||||
"callback_type, expected_success_callbacks, expected_failure_callbacks",
|
||||
[
|
||||
("success", ["langfuse"], []),
|
||||
("failure", [], ["langfuse"]),
|
||||
("success_and_failure", ["langfuse"], ["langfuse"]),
|
||||
],
|
||||
)
|
||||
async def test_add_callback_via_key_litellm_pre_call_utils(
|
||||
prisma_client, callback_type, expected_success_callbacks, expected_failure_callbacks
|
||||
):
|
||||
import json
|
||||
|
||||
from fastapi import HTTPException, Request, Response
|
||||
|
@ -1312,7 +1322,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
|||
"logging": [
|
||||
{
|
||||
"callback_name": "langfuse",
|
||||
"callback_type": "success",
|
||||
"callback_type": callback_type,
|
||||
"callback_vars": {
|
||||
"langfuse_public_key": "my-mock-public-key",
|
||||
"langfuse_secret_key": "my-mock-secret-key",
|
||||
|
@ -1359,14 +1369,21 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
|
|||
}
|
||||
|
||||
new_data = await add_litellm_data_to_request(**data)
|
||||
print("NEW DATA: {}".format(new_data))
|
||||
|
||||
assert "success_callback" in new_data
|
||||
assert new_data["success_callback"] == ["langfuse"]
|
||||
assert "langfuse_public_key" in new_data
|
||||
assert new_data["langfuse_public_key"] == "my-mock-public-key"
|
||||
assert "langfuse_secret_key" in new_data
|
||||
assert new_data["langfuse_secret_key"] == "my-mock-secret-key"
|
||||
|
||||
if expected_success_callbacks:
|
||||
assert "success_callback" in new_data
|
||||
assert new_data["success_callback"] == expected_success_callbacks
|
||||
|
||||
if expected_failure_callbacks:
|
||||
assert "failure_callback" in new_data
|
||||
assert new_data["failure_callback"] == expected_failure_callbacks
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_pass_through_endpoint():
|
||||
|
|
|
@ -91,3 +91,72 @@ async def test_router_free_paid_tier():
|
|||
print("response_extra_info: ", response_extra_info)
|
||||
|
||||
assert response_extra_info["model_id"] == "very-expensive-model"
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_default_tagged_deployments():
|
||||
"""
|
||||
- only use default deployment for untagged requests
|
||||
- if a request has tag "default", use default deployment
|
||||
"""
|
||||
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-4",
|
||||
"litellm_params": {
|
||||
"model": "gpt-4o",
|
||||
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||
"tags": ["default"],
|
||||
},
|
||||
"model_info": {"id": "default-model"},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4",
|
||||
"litellm_params": {
|
||||
"model": "gpt-4o",
|
||||
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||
},
|
||||
"model_info": {"id": "default-model-2"},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4",
|
||||
"litellm_params": {
|
||||
"model": "gpt-4o-mini",
|
||||
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||
"tags": ["teamA"],
|
||||
},
|
||||
"model_info": {"id": "very-expensive-model"},
|
||||
},
|
||||
],
|
||||
enable_tag_filtering=True,
|
||||
)
|
||||
|
||||
for _ in range(5):
|
||||
# Untagged request, this should pick model with id == "default-model"
|
||||
response = await router.acompletion(
|
||||
model="gpt-4",
|
||||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||
)
|
||||
|
||||
print("Response: ", response)
|
||||
|
||||
response_extra_info = response._hidden_params
|
||||
print("response_extra_info: ", response_extra_info)
|
||||
|
||||
assert response_extra_info["model_id"] == "default-model"
|
||||
|
||||
for _ in range(5):
|
||||
# requests tagged with "default", this should pick model with id == "default-model"
|
||||
response = await router.acompletion(
|
||||
model="gpt-4",
|
||||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||
metadata={"tags": ["default"]},
|
||||
)
|
||||
|
||||
print("Response: ", response)
|
||||
|
||||
response_extra_info = response._hidden_params
|
||||
print("response_extra_info: ", response_extra_info)
|
||||
|
||||
assert response_extra_info["model_id"] == "default-model"
|
||||
|
|
|
@ -4239,3 +4239,14 @@ def test_completion_vllm():
|
|||
mock_call.assert_called_once()
|
||||
|
||||
assert "hello" in mock_call.call_args.kwargs["extra_body"]
|
||||
|
||||
|
||||
def test_completion_fireworks_ai_multiple_choices():
|
||||
litellm.set_verbose = True
|
||||
response = litellm.text_completion(
|
||||
model="fireworks_ai/llama-v3p1-8b-instruct",
|
||||
prompt=["halo", "hi", "halo", "hi"],
|
||||
)
|
||||
print(response.choices)
|
||||
|
||||
assert len(response.choices) == 4
|
||||
|
|
|
@ -2512,16 +2512,16 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_token": 0.00000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
||||
"output_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-flash-latest": {
|
||||
"max_tokens": 8192,
|
||||
|
@ -2533,16 +2533,16 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_token": 0.00000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
||||
"output_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-pro": {
|
||||
"max_tokens": 8192,
|
||||
|
|
|
@ -148,6 +148,7 @@ router_settings:
|
|||
redis_password: os.environ/REDIS_PASSWORD
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
enable_pre_call_checks: true
|
||||
model_group_alias: {"my-special-fake-model-alias-name": "fake-openai-endpoint-3"}
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.44.22"
|
||||
version = "1.44.23"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.44.22"
|
||||
version = "1.44.23"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue