forked from phoenix/litellm-mirror
feat(cost_calculator.py): add cost calculating for dynamic context window (vertex ai / google ai studio)
This commit is contained in:
parent
577b90aad8
commit
f597aa432b
5 changed files with 247 additions and 51 deletions
|
@ -1,20 +1,24 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## File for 'response_cost' calculation in Logging
|
## File for 'response_cost' calculation in Logging
|
||||||
from typing import Optional, Union, Literal, List, Tuple
|
from typing import List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
|
import litellm
|
||||||
import litellm._logging
|
import litellm._logging
|
||||||
|
from litellm import verbose_logger
|
||||||
|
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||||
|
cost_per_token as google_cost_per_token,
|
||||||
|
)
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
ModelResponse,
|
CallTypes,
|
||||||
|
CostPerToken,
|
||||||
EmbeddingResponse,
|
EmbeddingResponse,
|
||||||
ImageResponse,
|
ImageResponse,
|
||||||
TranscriptionResponse,
|
ModelResponse,
|
||||||
TextCompletionResponse,
|
TextCompletionResponse,
|
||||||
CallTypes,
|
TranscriptionResponse,
|
||||||
print_verbose,
|
print_verbose,
|
||||||
CostPerToken,
|
|
||||||
token_counter,
|
token_counter,
|
||||||
)
|
)
|
||||||
import litellm
|
|
||||||
from litellm import verbose_logger
|
|
||||||
|
|
||||||
|
|
||||||
def _cost_per_token_custom_pricing_helper(
|
def _cost_per_token_custom_pricing_helper(
|
||||||
|
@ -42,10 +46,10 @@ def _cost_per_token_custom_pricing_helper(
|
||||||
|
|
||||||
def cost_per_token(
|
def cost_per_token(
|
||||||
model: str = "",
|
model: str = "",
|
||||||
prompt_tokens=0,
|
prompt_tokens: float = 0,
|
||||||
completion_tokens=0,
|
completion_tokens: float = 0,
|
||||||
response_time_ms=None,
|
response_time_ms=None,
|
||||||
custom_llm_provider=None,
|
custom_llm_provider: Optional[str] = None,
|
||||||
region_name=None,
|
region_name=None,
|
||||||
### CUSTOM PRICING ###
|
### CUSTOM PRICING ###
|
||||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||||
|
@ -66,6 +70,7 @@ def cost_per_token(
|
||||||
Returns:
|
Returns:
|
||||||
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
||||||
"""
|
"""
|
||||||
|
args = locals()
|
||||||
if model is None:
|
if model is None:
|
||||||
raise Exception("Invalid arg. Model cannot be none.")
|
raise Exception("Invalid arg. Model cannot be none.")
|
||||||
## CUSTOM PRICING ##
|
## CUSTOM PRICING ##
|
||||||
|
@ -94,7 +99,8 @@ def cost_per_token(
|
||||||
model_with_provider_and_region in model_cost_ref
|
model_with_provider_and_region in model_cost_ref
|
||||||
): # use region based pricing, if it's available
|
): # use region based pricing, if it's available
|
||||||
model_with_provider = model_with_provider_and_region
|
model_with_provider = model_with_provider_and_region
|
||||||
|
else:
|
||||||
|
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
|
||||||
model_without_prefix = model
|
model_without_prefix = model
|
||||||
model_parts = model.split("/")
|
model_parts = model.split("/")
|
||||||
if len(model_parts) > 1:
|
if len(model_parts) > 1:
|
||||||
|
@ -120,7 +126,14 @@ def cost_per_token(
|
||||||
|
|
||||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||||
if model in model_cost_ref:
|
if custom_llm_provider == "vertex_ai" or custom_llm_provider == "gemini":
|
||||||
|
return google_cost_per_token(
|
||||||
|
model=model_without_prefix,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
elif model in model_cost_ref:
|
||||||
print_verbose(f"Success: model={model} in model_cost_map")
|
print_verbose(f"Success: model={model} in model_cost_map")
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
|
f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
|
||||||
|
|
82
litellm/litellm_core_utils/llm_cost_calc/google.py
Normal file
82
litellm/litellm_core_utils/llm_cost_calc/google.py
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
# What is this?
|
||||||
|
## Cost calculation for Google AI Studio / Vertex AI models
|
||||||
|
from typing import Literal, Tuple
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
"""
|
||||||
|
Gemini pricing covers:
|
||||||
|
- token
|
||||||
|
- image
|
||||||
|
- audio
|
||||||
|
- video
|
||||||
|
"""
|
||||||
|
|
||||||
|
models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]
|
||||||
|
|
||||||
|
|
||||||
|
def _is_above_128k(tokens: float) -> bool:
|
||||||
|
if tokens > 128000:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def cost_per_token(
|
||||||
|
model: str,
|
||||||
|
custom_llm_provider: str,
|
||||||
|
prompt_tokens: float,
|
||||||
|
completion_tokens: float,
|
||||||
|
) -> Tuple[float, float]:
|
||||||
|
"""
|
||||||
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
- model: str, the model name without provider prefix
|
||||||
|
- custom_llm_provider: str, either "vertex_ai-*" or "gemini"
|
||||||
|
- prompt_tokens: float, the number of input tokens
|
||||||
|
- completion_tokens: float, the number of output tokens
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception if model requires >128k pricing, but model cost not mapped
|
||||||
|
"""
|
||||||
|
## GET MODEL INFO
|
||||||
|
model_info = litellm.get_model_info(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
|
## CALCULATE INPUT COST
|
||||||
|
if (
|
||||||
|
_is_above_128k(tokens=prompt_tokens)
|
||||||
|
and model not in models_without_dynamic_pricing
|
||||||
|
):
|
||||||
|
assert (
|
||||||
|
model_info["input_cost_per_token_above_128k_tokens"] is not None
|
||||||
|
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
prompt_cost = (
|
||||||
|
prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
prompt_cost = prompt_tokens * model_info["input_cost_per_token"]
|
||||||
|
|
||||||
|
## CALCULATE OUTPUT COST
|
||||||
|
if (
|
||||||
|
_is_above_128k(tokens=completion_tokens)
|
||||||
|
and model not in models_without_dynamic_pricing
|
||||||
|
):
|
||||||
|
assert (
|
||||||
|
model_info["output_cost_per_token_above_128k_tokens"] is not None
|
||||||
|
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
completion_cost = (
|
||||||
|
completion_tokens * model_info["output_cost_per_token_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
completion_cost = completion_tokens * model_info["output_cost_per_token"]
|
||||||
|
|
||||||
|
return prompt_cost, completion_cost
|
|
@ -1,20 +1,28 @@
|
||||||
import sys, os
|
import os
|
||||||
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
import litellm.cost_calculator
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
|
import asyncio
|
||||||
import time
|
import time
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import (
|
from litellm import (
|
||||||
|
TranscriptionResponse,
|
||||||
|
completion_cost,
|
||||||
|
cost_per_token,
|
||||||
get_max_tokens,
|
get_max_tokens,
|
||||||
model_cost,
|
model_cost,
|
||||||
open_ai_chat_completion_models,
|
open_ai_chat_completion_models,
|
||||||
TranscriptionResponse,
|
|
||||||
)
|
)
|
||||||
from litellm.litellm_core_utils.litellm_logging import CustomLogger
|
from litellm.litellm_core_utils.litellm_logging import CustomLogger
|
||||||
import pytest, asyncio
|
|
||||||
|
|
||||||
|
|
||||||
class CustomLoggingHandler(CustomLogger):
|
class CustomLoggingHandler(CustomLogger):
|
||||||
|
@ -66,7 +74,7 @@ async def test_custom_pricing(sync_mode):
|
||||||
|
|
||||||
|
|
||||||
def test_custom_pricing_as_completion_cost_param():
|
def test_custom_pricing_as_completion_cost_param():
|
||||||
from litellm import ModelResponse, Choices, Message
|
from litellm import Choices, Message, ModelResponse
|
||||||
from litellm.utils import Usage
|
from litellm.utils import Usage
|
||||||
|
|
||||||
resp = ModelResponse(
|
resp = ModelResponse(
|
||||||
|
@ -134,7 +142,7 @@ def test_cost_ft_gpt_35():
|
||||||
try:
|
try:
|
||||||
# this tests if litellm.completion_cost can calculate cost for ft:gpt-3.5-turbo:my-org:custom_suffix:id
|
# this tests if litellm.completion_cost can calculate cost for ft:gpt-3.5-turbo:my-org:custom_suffix:id
|
||||||
# it needs to lookup ft:gpt-3.5-turbo in the litellm model_cost map to get the correct cost
|
# it needs to lookup ft:gpt-3.5-turbo in the litellm model_cost map to get the correct cost
|
||||||
from litellm import ModelResponse, Choices, Message
|
from litellm import Choices, Message, ModelResponse
|
||||||
from litellm.utils import Usage
|
from litellm.utils import Usage
|
||||||
|
|
||||||
resp = ModelResponse(
|
resp = ModelResponse(
|
||||||
|
@ -179,7 +187,7 @@ def test_cost_azure_gpt_35():
|
||||||
try:
|
try:
|
||||||
# this tests if litellm.completion_cost can calculate cost for azure/chatgpt-deployment-2 which maps to azure/gpt-3.5-turbo
|
# this tests if litellm.completion_cost can calculate cost for azure/chatgpt-deployment-2 which maps to azure/gpt-3.5-turbo
|
||||||
# for this test we check if passing `model` to completion_cost overrides the completion cost
|
# for this test we check if passing `model` to completion_cost overrides the completion cost
|
||||||
from litellm import ModelResponse, Choices, Message
|
from litellm import Choices, Message, ModelResponse
|
||||||
from litellm.utils import Usage
|
from litellm.utils import Usage
|
||||||
|
|
||||||
resp = ModelResponse(
|
resp = ModelResponse(
|
||||||
|
@ -266,7 +274,7 @@ def test_cost_bedrock_pricing():
|
||||||
"""
|
"""
|
||||||
- get pricing specific to region for a model
|
- get pricing specific to region for a model
|
||||||
"""
|
"""
|
||||||
from litellm import ModelResponse, Choices, Message
|
from litellm import Choices, Message, ModelResponse
|
||||||
from litellm.utils import Usage
|
from litellm.utils import Usage
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
@ -475,13 +483,13 @@ def test_replicate_llama3_cost_tracking():
|
||||||
@pytest.mark.parametrize("is_streaming", [True, False]) #
|
@pytest.mark.parametrize("is_streaming", [True, False]) #
|
||||||
def test_groq_response_cost_tracking(is_streaming):
|
def test_groq_response_cost_tracking(is_streaming):
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
ModelResponse,
|
|
||||||
Choices,
|
|
||||||
Message,
|
|
||||||
Usage,
|
|
||||||
CallTypes,
|
CallTypes,
|
||||||
StreamingChoices,
|
Choices,
|
||||||
Delta,
|
Delta,
|
||||||
|
Message,
|
||||||
|
ModelResponse,
|
||||||
|
StreamingChoices,
|
||||||
|
Usage,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = ModelResponse(
|
response = ModelResponse(
|
||||||
|
@ -565,3 +573,58 @@ def test_together_ai_qwen_completion_cost():
|
||||||
)
|
)
|
||||||
|
|
||||||
assert response == "together-ai-41.1b-80b"
|
assert response == "together-ai-41.1b-80b"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("above_128k", [False, True])
|
||||||
|
@pytest.mark.parametrize("provider", ["vertex_ai", "gemini"])
|
||||||
|
def test_gemini_completion_cost(above_128k, provider):
|
||||||
|
"""
|
||||||
|
Check if cost correctly calculated for gemini models based on context window
|
||||||
|
"""
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
if provider == "gemini":
|
||||||
|
model_name = "gemini-1.5-flash-latest"
|
||||||
|
else:
|
||||||
|
model_name = "gemini-1.5-flash-preview-0514"
|
||||||
|
if above_128k:
|
||||||
|
prompt_tokens = 128001.0
|
||||||
|
output_tokens = 228001.0
|
||||||
|
else:
|
||||||
|
prompt_tokens = 128.0
|
||||||
|
output_tokens = 228.0
|
||||||
|
## GET MODEL FROM LITELLM.MODEL_INFO
|
||||||
|
model_info = litellm.get_model_info(model=model_name, custom_llm_provider=provider)
|
||||||
|
|
||||||
|
## EXPECTED COST
|
||||||
|
if above_128k:
|
||||||
|
assert (
|
||||||
|
model_info["input_cost_per_token_above_128k_tokens"] is not None
|
||||||
|
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
|
model_name, model_info
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
model_info["output_cost_per_token_above_128k_tokens"] is not None
|
||||||
|
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
|
model_name, model_info
|
||||||
|
)
|
||||||
|
input_cost = (
|
||||||
|
prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
output_cost = (
|
||||||
|
output_tokens * model_info["output_cost_per_token_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
input_cost = prompt_tokens * model_info["input_cost_per_token"]
|
||||||
|
output_cost = output_tokens * model_info["output_cost_per_token"]
|
||||||
|
|
||||||
|
## CALCULATED COST
|
||||||
|
calculated_input_cost, calculated_output_cost = cost_per_token(
|
||||||
|
model=model_name,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=output_tokens,
|
||||||
|
custom_llm_provider=provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert calculated_input_cost == input_cost
|
||||||
|
assert calculated_output_cost == output_cost
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
from typing import List, Optional, Union, Dict, Tuple, Literal
|
|
||||||
from typing_extensions import TypedDict
|
|
||||||
from enum import Enum
|
|
||||||
from typing_extensions import override, Required, Dict
|
|
||||||
from .llms.openai import ChatCompletionUsageBlock, ChatCompletionToolCallChunk
|
|
||||||
from ..litellm_core_utils.core_helpers import map_finish_reason
|
|
||||||
from openai._models import BaseModel as OpenAIObject
|
|
||||||
from pydantic import ConfigDict
|
|
||||||
import uuid
|
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Dict, List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
|
from openai._models import BaseModel as OpenAIObject
|
||||||
|
from pydantic import ConfigDict
|
||||||
|
from typing_extensions import Dict, Required, TypedDict, override
|
||||||
|
|
||||||
|
from ..litellm_core_utils.core_helpers import map_finish_reason
|
||||||
|
from .llms.openai import ChatCompletionToolCallChunk, ChatCompletionUsageBlock
|
||||||
|
|
||||||
|
|
||||||
def _generate_id(): # private helper function
|
def _generate_id(): # private helper function
|
||||||
|
@ -34,21 +35,31 @@ class ProviderField(TypedDict):
|
||||||
field_value: str
|
field_value: str
|
||||||
|
|
||||||
|
|
||||||
class ModelInfo(TypedDict):
|
class ModelInfo(TypedDict, total=False):
|
||||||
"""
|
"""
|
||||||
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
|
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
|
||||||
"""
|
"""
|
||||||
|
|
||||||
max_tokens: Optional[int]
|
max_tokens: Required[Optional[int]]
|
||||||
max_input_tokens: Optional[int]
|
max_input_tokens: Required[Optional[int]]
|
||||||
max_output_tokens: Optional[int]
|
max_output_tokens: Required[Optional[int]]
|
||||||
input_cost_per_token: float
|
input_cost_per_token: Required[float]
|
||||||
output_cost_per_token: float
|
input_cost_per_token_above_128k_tokens: Optional[float]
|
||||||
litellm_provider: str
|
input_cost_per_image: Optional[float]
|
||||||
mode: Literal[
|
input_cost_per_audio_per_second: Optional[float]
|
||||||
"completion", "embedding", "image_generation", "chat", "audio_transcription"
|
input_cost_per_video_per_second: Optional[float]
|
||||||
|
output_cost_per_token: Required[float]
|
||||||
|
output_cost_per_token_above_128k_tokens: Optional[float]
|
||||||
|
output_cost_per_image: Optional[float]
|
||||||
|
output_cost_per_video_per_second: Optional[float]
|
||||||
|
output_cost_per_audio_per_second: Optional[float]
|
||||||
|
litellm_provider: Required[str]
|
||||||
|
mode: Required[
|
||||||
|
Literal[
|
||||||
|
"completion", "embedding", "image_generation", "chat", "audio_transcription"
|
||||||
|
]
|
||||||
]
|
]
|
||||||
supported_openai_params: Optional[List[str]]
|
supported_openai_params: Required[Optional[List[str]]]
|
||||||
|
|
||||||
|
|
||||||
class GenericStreamingChunk(TypedDict):
|
class GenericStreamingChunk(TypedDict):
|
||||||
|
|
|
@ -4286,8 +4286,10 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
split_model, custom_llm_provider, _, _ = get_llm_provider(model=model)
|
split_model, custom_llm_provider, _, _ = get_llm_provider(model=model)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
combined_model_name = model
|
||||||
else:
|
else:
|
||||||
split_model = model
|
split_model = model
|
||||||
|
combined_model_name = "{}/{}".format(custom_llm_provider, model)
|
||||||
#########################
|
#########################
|
||||||
|
|
||||||
supported_openai_params = litellm.get_supported_openai_params(
|
supported_openai_params = litellm.get_supported_openai_params(
|
||||||
|
@ -4305,33 +4307,58 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
"""
|
"""
|
||||||
Check if:
|
Check if: (in order of specificity)
|
||||||
1. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost
|
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
|
||||||
2. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost
|
2. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" and custom_llm_provider=None
|
||||||
|
3. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
|
||||||
"""
|
"""
|
||||||
if model in litellm.model_cost:
|
if combined_model_name in litellm.model_cost:
|
||||||
|
_model_info = litellm.model_cost[combined_model_name]
|
||||||
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
|
if (
|
||||||
|
"litellm_provider" in _model_info
|
||||||
|
and _model_info["litellm_provider"] != custom_llm_provider
|
||||||
|
):
|
||||||
|
if custom_llm_provider == "vertex_ai" and _model_info[
|
||||||
|
"litellm_provider"
|
||||||
|
].startswith("vertex_ai"):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
return _model_info
|
||||||
|
elif model in litellm.model_cost:
|
||||||
_model_info = litellm.model_cost[model]
|
_model_info = litellm.model_cost[model]
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
if (
|
if (
|
||||||
"litellm_provider" in _model_info
|
"litellm_provider" in _model_info
|
||||||
and _model_info["litellm_provider"] != custom_llm_provider
|
and _model_info["litellm_provider"] != custom_llm_provider
|
||||||
):
|
):
|
||||||
raise Exception
|
if custom_llm_provider == "vertex_ai" and _model_info[
|
||||||
|
"litellm_provider"
|
||||||
|
].startswith("vertex_ai"):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
return _model_info
|
return _model_info
|
||||||
if split_model in litellm.model_cost:
|
elif split_model in litellm.model_cost:
|
||||||
_model_info = litellm.model_cost[split_model]
|
_model_info = litellm.model_cost[split_model]
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
if (
|
if (
|
||||||
"litellm_provider" in _model_info
|
"litellm_provider" in _model_info
|
||||||
and _model_info["litellm_provider"] != custom_llm_provider
|
and _model_info["litellm_provider"] != custom_llm_provider
|
||||||
):
|
):
|
||||||
raise Exception
|
if custom_llm_provider == "vertex_ai" and _model_info[
|
||||||
|
"litellm_provider"
|
||||||
|
].startswith("vertex_ai"):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
return _model_info
|
return _model_info
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
)
|
)
|
||||||
except:
|
except Exception:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue