mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Merge branch 'main' into litellm_region_based_routing
This commit is contained in:
commit
8ad979cdfe
85 changed files with 793 additions and 448 deletions
|
@ -14,7 +14,7 @@ import subprocess, os
|
|||
from os.path import abspath, join, dirname
|
||||
import litellm, openai
|
||||
import itertools
|
||||
import random, uuid, requests
|
||||
import random, uuid, requests # type: ignore
|
||||
from functools import wraps
|
||||
import datetime, time
|
||||
import tiktoken
|
||||
|
@ -36,7 +36,7 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
|||
|
||||
try:
|
||||
# this works in python 3.8
|
||||
import pkg_resources
|
||||
import pkg_resources # type: ignore
|
||||
|
||||
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
|
||||
# try:
|
||||
|
@ -4161,8 +4161,30 @@ def cost_per_token(
|
|||
model_with_provider_and_region in model_cost_ref
|
||||
): # use region based pricing, if it's available
|
||||
model_with_provider = model_with_provider_and_region
|
||||
if model_with_provider in model_cost_ref:
|
||||
|
||||
model_without_prefix = model
|
||||
model_parts = model.split("/")
|
||||
if len(model_parts) > 1:
|
||||
model_without_prefix = model_parts[1]
|
||||
else:
|
||||
model_without_prefix = model
|
||||
"""
|
||||
Code block that formats model to lookup in litellm.model_cost
|
||||
Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
|
||||
Option2. model = "openai/gpt-4" - model = provider/model
|
||||
Option3. model = "anthropic.claude-3" - model = model
|
||||
"""
|
||||
if (
|
||||
model_with_provider in model_cost_ref
|
||||
): # Option 2. use model with provider, model = "openai/gpt-4"
|
||||
model = model_with_provider
|
||||
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
|
||||
model = model
|
||||
elif (
|
||||
model_without_prefix in model_cost_ref
|
||||
): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
|
||||
model = model_without_prefix
|
||||
|
||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||
if model in model_cost_ref:
|
||||
|
@ -7766,11 +7788,11 @@ def _calculate_retry_after(
|
|||
try:
|
||||
retry_after = int(retry_header)
|
||||
except Exception:
|
||||
retry_date_tuple = email.utils.parsedate_tz(retry_header)
|
||||
retry_date_tuple = email.utils.parsedate_tz(retry_header) # type: ignore
|
||||
if retry_date_tuple is None:
|
||||
retry_after = -1
|
||||
else:
|
||||
retry_date = email.utils.mktime_tz(retry_date_tuple)
|
||||
retry_date = email.utils.mktime_tz(retry_date_tuple) # type: ignore
|
||||
retry_after = int(retry_date - time.time())
|
||||
else:
|
||||
retry_after = -1
|
||||
|
@ -10545,6 +10567,18 @@ class CustomStreamWrapper:
|
|||
elif self.custom_llm_provider == "watsonx":
|
||||
response_obj = self.handle_watsonx_stream(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||
if getattr(model_response, "usage", None) is None:
|
||||
model_response.usage = Usage()
|
||||
if response_obj.get("prompt_tokens") is not None:
|
||||
prompt_token_count = getattr(model_response.usage, "prompt_tokens", 0)
|
||||
model_response.usage.prompt_tokens = (prompt_token_count+response_obj["prompt_tokens"])
|
||||
if response_obj.get("completion_tokens") is not None:
|
||||
model_response.usage.completion_tokens = response_obj["completion_tokens"]
|
||||
model_response.usage.total_tokens = (
|
||||
getattr(model_response.usage, "prompt_tokens", 0)
|
||||
+ getattr(model_response.usage, "completion_tokens", 0)
|
||||
)
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "text-completion-openai":
|
||||
|
@ -10949,6 +10983,7 @@ class CustomStreamWrapper:
|
|||
or self.custom_llm_provider == "sagemaker"
|
||||
or self.custom_llm_provider == "gemini"
|
||||
or self.custom_llm_provider == "cached_response"
|
||||
or self.custom_llm_provider == "watsonx"
|
||||
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
||||
):
|
||||
async for chunk in self.completion_stream:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue