forked from phoenix/litellm-mirror
LiteLLM Minor Fixes and Improvements (09/09/2024) (#5602)
* fix(main.py): pass default azure api version as alternative in completion call Fixes api error caused due to api version Closes https://github.com/BerriAI/litellm/issues/5584 * Fixed gemini-1.5-flash pricing (#5590) * add /key/list endpoint * bump: version 1.44.21 → 1.44.22 * docs architecture * Fixed gemini-1.5-flash pricing --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> * fix(bedrock/chat.py): fix converse api stop sequence param mapping Fixes https://github.com/BerriAI/litellm/issues/5592 * fix(databricks/cost_calculator.py): handle databricks model name changes Fixes https://github.com/BerriAI/litellm/issues/5597 * fix(azure.py): support azure api version 2024-08-01-preview Closes https://github.com/BerriAI/litellm/issues/5377 * fix(proxy/_types.py): allow dev keys to call cohere /rerank endpoint Fixes issue where only admin could call rerank endpoint * fix(azure.py): check if model is gpt-4o * fix(proxy/_types.py): support /v1/rerank on non-admin routes as well * fix(cost_calculator.py): fix split on `/` logic in cost calculator --------- Co-authored-by: F1bos <44951186+F1bos@users.noreply.github.com> Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
parent
4ac66bd843
commit
2d2282101b
14 changed files with 139 additions and 56 deletions
|
@ -118,7 +118,7 @@ in_memory_llm_clients_cache: dict = {}
|
||||||
safe_memory_mode: bool = False
|
safe_memory_mode: bool = False
|
||||||
enable_azure_ad_token_refresh: Optional[bool] = False
|
enable_azure_ad_token_refresh: Optional[bool] = False
|
||||||
### DEFAULT AZURE API VERSION ###
|
### DEFAULT AZURE API VERSION ###
|
||||||
AZURE_DEFAULT_API_VERSION = "2024-07-01-preview" # this is updated to the latest
|
AZURE_DEFAULT_API_VERSION = "2024-08-01-preview" # this is updated to the latest
|
||||||
### COHERE EMBEDDINGS DEFAULT TYPE ###
|
### COHERE EMBEDDINGS DEFAULT TYPE ###
|
||||||
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document"
|
COHERE_DEFAULT_EMBEDDING_INPUT_TYPE = "search_document"
|
||||||
### GUARDRAILS ###
|
### GUARDRAILS ###
|
||||||
|
@ -868,7 +868,7 @@ from .llms.custom_llm import CustomLLM
|
||||||
from .llms.huggingface_restapi import HuggingfaceConfig
|
from .llms.huggingface_restapi import HuggingfaceConfig
|
||||||
from .llms.anthropic.chat import AnthropicConfig
|
from .llms.anthropic.chat import AnthropicConfig
|
||||||
from .llms.anthropic.completion import AnthropicTextConfig
|
from .llms.anthropic.completion import AnthropicTextConfig
|
||||||
from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig
|
from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig
|
||||||
from .llms.predibase import PredibaseConfig
|
from .llms.predibase import PredibaseConfig
|
||||||
from .llms.replicate import ReplicateConfig
|
from .llms.replicate import ReplicateConfig
|
||||||
from .llms.cohere.completion import CohereConfig
|
from .llms.cohere.completion import CohereConfig
|
||||||
|
|
|
@ -22,6 +22,9 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_cha
|
||||||
from litellm.llms.anthropic.cost_calculation import (
|
from litellm.llms.anthropic.cost_calculation import (
|
||||||
cost_per_token as anthropic_cost_per_token,
|
cost_per_token as anthropic_cost_per_token,
|
||||||
)
|
)
|
||||||
|
from litellm.llms.databricks.cost_calculator import (
|
||||||
|
cost_per_token as databricks_cost_per_token,
|
||||||
|
)
|
||||||
from litellm.rerank_api.types import RerankResponse
|
from litellm.rerank_api.types import RerankResponse
|
||||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||||
|
@ -159,7 +162,7 @@ def cost_per_token(
|
||||||
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
|
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
|
||||||
|
|
||||||
model_without_prefix = model
|
model_without_prefix = model
|
||||||
model_parts = model.split("/")
|
model_parts = model.split("/", 1)
|
||||||
if len(model_parts) > 1:
|
if len(model_parts) > 1:
|
||||||
model_without_prefix = model_parts[1]
|
model_without_prefix = model_parts[1]
|
||||||
else:
|
else:
|
||||||
|
@ -212,6 +215,8 @@ def cost_per_token(
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "anthropic":
|
elif custom_llm_provider == "anthropic":
|
||||||
return anthropic_cost_per_token(model=model, usage=usage_block)
|
return anthropic_cost_per_token(model=model, usage=usage_block)
|
||||||
|
elif custom_llm_provider == "databricks":
|
||||||
|
return databricks_cost_per_token(model=model, usage=usage_block)
|
||||||
elif custom_llm_provider == "gemini":
|
elif custom_llm_provider == "gemini":
|
||||||
return google_cost_per_token(
|
return google_cost_per_token(
|
||||||
model=model_without_prefix,
|
model=model_without_prefix,
|
||||||
|
|
|
@ -245,7 +245,10 @@ class AzureOpenAIConfig:
|
||||||
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
|
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
|
||||||
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective.
|
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective.
|
||||||
"""
|
"""
|
||||||
if json_schema is not None:
|
if json_schema is not None and (
|
||||||
|
(api_version_year <= "2024" and api_version_month < "08")
|
||||||
|
or "gpt-4o" not in model
|
||||||
|
): # azure api version "2024-08-01-preview" onwards supports 'json_schema' only for gpt-4o
|
||||||
_tool_choice = ChatCompletionToolChoiceObjectParam(
|
_tool_choice = ChatCompletionToolChoiceObjectParam(
|
||||||
type="function",
|
type="function",
|
||||||
function=ChatCompletionToolChoiceFunctionParam(
|
function=ChatCompletionToolChoiceFunctionParam(
|
||||||
|
|
|
@ -736,7 +736,9 @@ class BedrockLLM(BaseAWSLLM):
|
||||||
|
|
||||||
if (stream is not None and stream is True) and provider != "ai21":
|
if (stream is not None and stream is True) and provider != "ai21":
|
||||||
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke-with-response-stream"
|
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke-with-response-stream"
|
||||||
proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke-with-response-stream"
|
proxy_endpoint_url = (
|
||||||
|
f"{proxy_endpoint_url}/model/{modelId}/invoke-with-response-stream"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke"
|
endpoint_url = f"{endpoint_url}/model/{modelId}/invoke"
|
||||||
proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke"
|
proxy_endpoint_url = f"{proxy_endpoint_url}/model/{modelId}/invoke"
|
||||||
|
@ -1268,7 +1270,7 @@ class AmazonConverseConfig:
|
||||||
if len(value) == 0: # converse raises error for empty strings
|
if len(value) == 0: # converse raises error for empty strings
|
||||||
continue
|
continue
|
||||||
value = [value]
|
value = [value]
|
||||||
optional_params["stop_sequences"] = value
|
optional_params["stopSequences"] = value
|
||||||
if param == "temperature":
|
if param == "temperature":
|
||||||
optional_params["temperature"] = value
|
optional_params["temperature"] = value
|
||||||
if param == "top_p":
|
if param == "top_p":
|
||||||
|
|
|
@ -29,8 +29,8 @@ from litellm.types.utils import (
|
||||||
)
|
)
|
||||||
from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage
|
from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage
|
||||||
|
|
||||||
from .base import BaseLLM
|
from ..base import BaseLLM
|
||||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
from ..prompt_templates.factory import custom_prompt, prompt_factory
|
||||||
|
|
||||||
|
|
||||||
class DatabricksError(Exception):
|
class DatabricksError(Exception):
|
||||||
|
@ -328,6 +328,7 @@ class DatabricksChatCompletion(BaseLLM):
|
||||||
api_base: str,
|
api_base: str,
|
||||||
custom_prompt_dict: dict,
|
custom_prompt_dict: dict,
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
|
custom_llm_provider: str,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
encoding,
|
encoding,
|
||||||
api_key,
|
api_key,
|
||||||
|
@ -371,6 +372,8 @@ class DatabricksChatCompletion(BaseLLM):
|
||||||
)
|
)
|
||||||
response = ModelResponse(**response_json)
|
response = ModelResponse(**response_json)
|
||||||
|
|
||||||
|
response.model = custom_llm_provider + "/" + response.model
|
||||||
|
|
||||||
if base_model is not None:
|
if base_model is not None:
|
||||||
response._hidden_params["model"] = base_model
|
response._hidden_params["model"] = base_model
|
||||||
return response
|
return response
|
||||||
|
@ -472,6 +475,7 @@ class DatabricksChatCompletion(BaseLLM):
|
||||||
data=data,
|
data=data,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
custom_prompt_dict=custom_prompt_dict,
|
custom_prompt_dict=custom_prompt_dict,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
|
@ -528,6 +532,8 @@ class DatabricksChatCompletion(BaseLLM):
|
||||||
|
|
||||||
response = ModelResponse(**response_json)
|
response = ModelResponse(**response_json)
|
||||||
|
|
||||||
|
response.model = custom_llm_provider + "/" + response.model
|
||||||
|
|
||||||
if base_model is not None:
|
if base_model is not None:
|
||||||
response._hidden_params["model"] = base_model
|
response._hidden_params["model"] = base_model
|
||||||
|
|
39
litellm/llms/databricks/cost_calculator.py
Normal file
39
litellm/llms/databricks/cost_calculator.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
"""
|
||||||
|
Helper util for handling databricks-specific cost calculation
|
||||||
|
- e.g.: handling 'dbrx-instruct-*'
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
from litellm.types.utils import Usage
|
||||||
|
from litellm.utils import get_model_info
|
||||||
|
|
||||||
|
|
||||||
|
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
- model: str, the model name without provider prefix
|
||||||
|
- usage: LiteLLM Usage block, containing anthropic caching information
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||||
|
"""
|
||||||
|
base_model = model
|
||||||
|
if model.startswith("databricks/dbrx-instruct") or model.startswith(
|
||||||
|
"dbrx-instruct"
|
||||||
|
):
|
||||||
|
base_model = "databricks-dbrx-instruct"
|
||||||
|
|
||||||
|
## GET MODEL INFO
|
||||||
|
model_info = get_model_info(model=base_model, custom_llm_provider="databricks")
|
||||||
|
|
||||||
|
## CALCULATE INPUT COST
|
||||||
|
|
||||||
|
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
|
||||||
|
|
||||||
|
## CALCULATE OUTPUT COST
|
||||||
|
completion_cost = usage["completion_tokens"] * model_info["output_cost_per_token"]
|
||||||
|
|
||||||
|
return prompt_cost, completion_cost
|
|
@ -273,7 +273,7 @@ class SagemakerLLM(BaseAWSLLM):
|
||||||
model_id = optional_params.get("model_id", None)
|
model_id = optional_params.get("model_id", None)
|
||||||
|
|
||||||
if use_messages_api is True:
|
if use_messages_api is True:
|
||||||
from litellm.llms.databricks import DatabricksChatCompletion
|
from litellm.llms.databricks.chat import DatabricksChatCompletion
|
||||||
|
|
||||||
openai_like_chat_completions = DatabricksChatCompletion()
|
openai_like_chat_completions = DatabricksChatCompletion()
|
||||||
inference_params["stream"] = True if stream is True else False
|
inference_params["stream"] = True if stream is True else False
|
||||||
|
|
|
@ -80,7 +80,7 @@ class VertexAIPartnerModels(BaseLLM):
|
||||||
import vertexai
|
import vertexai
|
||||||
from google.cloud import aiplatform
|
from google.cloud import aiplatform
|
||||||
|
|
||||||
from litellm.llms.databricks import DatabricksChatCompletion
|
from litellm.llms.databricks.chat import DatabricksChatCompletion
|
||||||
from litellm.llms.OpenAI.openai import OpenAIChatCompletion
|
from litellm.llms.OpenAI.openai import OpenAIChatCompletion
|
||||||
from litellm.llms.text_completion_codestral import CodestralTextCompletion
|
from litellm.llms.text_completion_codestral import CodestralTextCompletion
|
||||||
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
|
||||||
|
|
|
@ -92,7 +92,7 @@ from .llms.cohere import chat as cohere_chat
|
||||||
from .llms.cohere import completion as cohere_completion # type: ignore
|
from .llms.cohere import completion as cohere_completion # type: ignore
|
||||||
from .llms.cohere import embed as cohere_embed
|
from .llms.cohere import embed as cohere_embed
|
||||||
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
||||||
from .llms.databricks import DatabricksChatCompletion
|
from .llms.databricks.chat import DatabricksChatCompletion
|
||||||
from .llms.huggingface_restapi import Huggingface
|
from .llms.huggingface_restapi import Huggingface
|
||||||
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
|
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
|
||||||
from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
|
from .llms.OpenAI.openai import OpenAIChatCompletion, OpenAITextCompletion
|
||||||
|
@ -1013,7 +1013,10 @@ def completion(
|
||||||
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE")
|
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE")
|
||||||
|
|
||||||
api_version = (
|
api_version = (
|
||||||
api_version or litellm.api_version or get_secret("AZURE_API_VERSION")
|
api_version
|
||||||
|
or litellm.api_version
|
||||||
|
or get_secret("AZURE_API_VERSION")
|
||||||
|
or litellm.AZURE_DEFAULT_API_VERSION
|
||||||
)
|
)
|
||||||
|
|
||||||
api_key = (
|
api_key = (
|
||||||
|
|
|
@ -2512,16 +2512,16 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_token": 0.00000035,
|
"input_cost_per_token": 0.000000075,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
||||||
"output_cost_per_token": 0.00000105,
|
"output_cost_per_token": 0.0000003,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||||
"litellm_provider": "gemini",
|
"litellm_provider": "gemini",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://ai.google.dev/pricing"
|
||||||
},
|
},
|
||||||
"gemini/gemini-1.5-flash-latest": {
|
"gemini/gemini-1.5-flash-latest": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -2533,16 +2533,16 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_token": 0.00000035,
|
"input_cost_per_token": 0.000000075,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
||||||
"output_cost_per_token": 0.00000105,
|
"output_cost_per_token": 0.0000003,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||||
"litellm_provider": "gemini",
|
"litellm_provider": "gemini",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://ai.google.dev/pricing"
|
||||||
},
|
},
|
||||||
"gemini/gemini-pro": {
|
"gemini/gemini-pro": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
|
|
@ -242,6 +242,9 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/v1/models",
|
"/v1/models",
|
||||||
# token counter
|
# token counter
|
||||||
"/utils/token_counter",
|
"/utils/token_counter",
|
||||||
|
# rerank
|
||||||
|
"/rerank",
|
||||||
|
"/v1/rerank",
|
||||||
]
|
]
|
||||||
|
|
||||||
mapped_pass_through_routes: List = [
|
mapped_pass_through_routes: List = [
|
||||||
|
|
|
@ -891,18 +891,29 @@ def encode_image(image_path):
|
||||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(
|
@pytest.mark.parametrize(
|
||||||
reason="we already test claude-3, this is just another way to pass images"
|
"model",
|
||||||
)
|
[
|
||||||
def test_completion_claude_3_base64():
|
"gpt-4o",
|
||||||
|
"azure/gpt-4o",
|
||||||
|
"anthropic/claude-3-opus-20240229",
|
||||||
|
],
|
||||||
|
) #
|
||||||
|
def test_completion_base64(model):
|
||||||
try:
|
try:
|
||||||
|
import base64
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
litellm.num_retries = 3
|
url = "https://dummyimage.com/100/100/fff&text=Test+image"
|
||||||
image_path = "../proxy/cached_logo.jpg"
|
response = requests.get(url)
|
||||||
# Getting the base64 string
|
file_data = response.content
|
||||||
base64_image = encode_image(image_path)
|
|
||||||
|
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||||
|
base64_image = f"data:image/png;base64,{encoded_file}"
|
||||||
resp = litellm.completion(
|
resp = litellm.completion(
|
||||||
model="anthropic/claude-3-opus-20240229",
|
model=model,
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
@ -910,9 +921,7 @@ def test_completion_claude_3_base64():
|
||||||
{"type": "text", "text": "Whats in this image?"},
|
{"type": "text", "text": "Whats in this image?"},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {"url": base64_image},
|
||||||
"url": "data:image/jpeg;base64," + base64_image
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -921,7 +930,6 @@ def test_completion_claude_3_base64():
|
||||||
print(f"\nResponse: {resp}")
|
print(f"\nResponse: {resp}")
|
||||||
|
|
||||||
prompt_tokens = resp.usage.prompt_tokens
|
prompt_tokens = resp.usage.prompt_tokens
|
||||||
raise Exception("it worked!")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if "500 Internal error encountered.'" in str(e):
|
if "500 Internal error encountered.'" in str(e):
|
||||||
pass
|
pass
|
||||||
|
@ -2176,15 +2184,16 @@ def test_completion_openai():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model",
|
"model, api_version",
|
||||||
[
|
[
|
||||||
"gpt-4o-2024-08-06",
|
("gpt-4o-2024-08-06", None),
|
||||||
"azure/chatgpt-v-2",
|
("azure/chatgpt-v-2", None),
|
||||||
"bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
("bedrock/anthropic.claude-3-sonnet-20240229-v1:0", None),
|
||||||
|
("azure/gpt-4o", "2024-08-01-preview"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@pytest.mark.flaky(retries=3, delay=1)
|
@pytest.mark.flaky(retries=3, delay=1)
|
||||||
def test_completion_openai_pydantic(model):
|
def test_completion_openai_pydantic(model, api_version):
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
@ -2209,6 +2218,7 @@ def test_completion_openai_pydantic(model):
|
||||||
messages=messages,
|
messages=messages,
|
||||||
metadata={"hi": "bye"},
|
metadata={"hi": "bye"},
|
||||||
response_format=EventsList,
|
response_format=EventsList,
|
||||||
|
api_version=api_version,
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
except litellm.JSONSchemaValidationError:
|
except litellm.JSONSchemaValidationError:
|
||||||
|
@ -3471,14 +3481,14 @@ def response_format_tests(response: litellm.ModelResponse):
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model",
|
"model",
|
||||||
[
|
[
|
||||||
# "bedrock/cohere.command-r-plus-v1:0",
|
"bedrock/mistral.mistral-large-2407-v1:0",
|
||||||
|
"bedrock/cohere.command-r-plus-v1:0",
|
||||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
# "anthropic.claude-instant-v1",
|
"anthropic.claude-instant-v1",
|
||||||
# "bedrock/ai21.j2-mid",
|
"mistral.mistral-7b-instruct-v0:2",
|
||||||
# "mistral.mistral-7b-instruct-v0:2",
|
|
||||||
# "bedrock/amazon.titan-tg1-large",
|
# "bedrock/amazon.titan-tg1-large",
|
||||||
# "meta.llama3-8b-instruct-v1:0",
|
"meta.llama3-8b-instruct-v1:0",
|
||||||
# "cohere.command-text-v14",
|
"cohere.command-text-v14",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@ -3493,6 +3503,7 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
|
||||||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
max_tokens=200,
|
max_tokens=200,
|
||||||
|
stop=["stop sequence"],
|
||||||
)
|
)
|
||||||
|
|
||||||
assert isinstance(response, litellm.ModelResponse)
|
assert isinstance(response, litellm.ModelResponse)
|
||||||
|
@ -3504,6 +3515,7 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
|
||||||
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
max_tokens=100,
|
max_tokens=100,
|
||||||
|
stop=["stop sequence"],
|
||||||
)
|
)
|
||||||
|
|
||||||
assert isinstance(response, litellm.ModelResponse)
|
assert isinstance(response, litellm.ModelResponse)
|
||||||
|
|
|
@ -1219,3 +1219,13 @@ def test_completion_cost_anthropic_prompt_caching():
|
||||||
cost_2 = completion_cost(model=model, completion_response=response_2)
|
cost_2 = completion_cost(model=model, completion_response=response_2)
|
||||||
|
|
||||||
assert cost_1 > cost_2
|
assert cost_1 > cost_2
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_cost_databricks():
|
||||||
|
model, messages = "databricks/databricks-dbrx-instruct", [
|
||||||
|
{"role": "user", "content": "What is 2+2?"}
|
||||||
|
]
|
||||||
|
|
||||||
|
resp = litellm.completion(model=model, messages=messages) # works fine
|
||||||
|
|
||||||
|
cost = completion_cost(completion_response=resp)
|
||||||
|
|
|
@ -2512,16 +2512,16 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_token": 0.00000035,
|
"input_cost_per_token": 0.000000075,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
||||||
"output_cost_per_token": 0.00000105,
|
"output_cost_per_token": 0.0000003,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||||
"litellm_provider": "gemini",
|
"litellm_provider": "gemini",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://ai.google.dev/pricing"
|
||||||
},
|
},
|
||||||
"gemini/gemini-1.5-flash-latest": {
|
"gemini/gemini-1.5-flash-latest": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
@ -2533,16 +2533,16 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_token": 0.00000035,
|
"input_cost_per_token": 0.000000075,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
"input_cost_per_token_above_128k_tokens": 0.00000015,
|
||||||
"output_cost_per_token": 0.00000105,
|
"output_cost_per_token": 0.0000003,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||||
"litellm_provider": "gemini",
|
"litellm_provider": "gemini",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true,
|
"supports_vision": true,
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
"source": "https://ai.google.dev/pricing"
|
||||||
},
|
},
|
||||||
"gemini/gemini-pro": {
|
"gemini/gemini-pro": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue