forked from phoenix/litellm-mirror
* build(model_prices_and_context_window.json): add bedrock llama3.2 pricing
* build(model_prices_and_context_window.json): add bedrock cross region inference pricing
* Revert "(perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165)"
This reverts commit 2a5624af47
.
* add azure/gpt-4o-2024-05-13 (#6174)
* LiteLLM Minor Fixes & Improvements (10/10/2024) (#6158)
* refactor(vertex_ai_partner_models/anthropic): refactor anthropic to use partner model logic
* fix(vertex_ai/): support passing custom api base to partner models
Fixes https://github.com/BerriAI/litellm/issues/4317
* fix(proxy_server.py): Fix prometheus premium user check logic
* docs(prometheus.md): update quick start docs
* fix(custom_llm.py): support passing dynamic api key + api base
* fix(realtime_api/main.py): Add request/response logging for realtime api endpoints
Closes https://github.com/BerriAI/litellm/issues/6081
* feat(openai/realtime): add openai realtime api logging
Closes https://github.com/BerriAI/litellm/issues/6081
* fix(realtime_streaming.py): fix linting errors
* fix(realtime_streaming.py): fix linting errors
* fix: fix linting errors
* fix pattern match router
* Add literalai in the sidebar observability category (#6163)
* fix: add literalai in the sidebar
* fix: typo
* update (#6160)
* Feat: Add Langtrace integration (#5341)
* Feat: Add Langtrace integration
* add langtrace service name
* fix timestamps for traces
* add tests
* Discard Callback + use existing otel logger
* cleanup
* remove print statments
* remove callback
* add docs
* docs
* add logging docs
* format logging
* remove emoji and add litellm proxy example
* format logging
* format `logging.md`
* add langtrace docs to logging.md
* sync conflict
* docs fix
* (perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165)
* fix move s3 to use customLogger
* add basic s3 logging test
* add s3 to custom logger compatible
* use batch logger for s3
* s3 set flush interval and batch size
* fix s3 logging
* add notes on s3 logging
* fix s3 logging
* add basic s3 logging test
* fix s3 type errors
* add test for sync logging on s3
* fix: fix to debug log
---------
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Willy Douhard <willy.douhard@gmail.com>
Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
Co-authored-by: Ali Waleed <ali@scale3labs.com>
* docs(custom_llm_server.md): update doc on passing custom params
* fix(pass_through_endpoints.py): don't require headers
Fixes https://github.com/BerriAI/litellm/issues/6128
* feat(utils.py): add support for caching rerank endpoints
Closes https://github.com/BerriAI/litellm/issues/6144
* feat(litellm_logging.py'): add response headers for failed requests
Closes https://github.com/BerriAI/litellm/issues/6159
---------
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Willy Douhard <willy.douhard@gmail.com>
Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
Co-authored-by: Ali Waleed <ali@scale3labs.com>
1434 lines
43 KiB
Python
1434 lines
43 KiB
Python
import json
|
|
import time
|
|
import uuid
|
|
from enum import Enum
|
|
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|
|
|
from openai._models import BaseModel as OpenAIObject
|
|
from openai.types.audio.transcription_create_params import FileTypes # type: ignore
|
|
from openai.types.completion_usage import (
|
|
CompletionTokensDetails,
|
|
CompletionUsage,
|
|
PromptTokensDetails,
|
|
)
|
|
from pydantic import BaseModel, ConfigDict, PrivateAttr
|
|
from typing_extensions import Callable, Dict, Required, TypedDict, override
|
|
|
|
from ..litellm_core_utils.core_helpers import map_finish_reason
|
|
from .llms.openai import (
|
|
ChatCompletionToolCallChunk,
|
|
ChatCompletionUsageBlock,
|
|
OpenAIChatCompletionChunk,
|
|
)
|
|
|
|
|
|
def _generate_id(): # private helper function
|
|
return "chatcmpl-" + str(uuid.uuid4())
|
|
|
|
|
|
class LiteLLMCommonStrings(Enum):
|
|
redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
|
|
|
|
|
|
SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
|
|
|
|
|
|
class CostPerToken(TypedDict):
|
|
input_cost_per_token: float
|
|
output_cost_per_token: float
|
|
|
|
|
|
class ProviderField(TypedDict):
|
|
field_name: str
|
|
field_type: Literal["string"]
|
|
field_description: str
|
|
field_value: str
|
|
|
|
|
|
class ModelInfo(TypedDict, total=False):
|
|
"""
|
|
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
|
|
"""
|
|
|
|
key: Required[str] # the key in litellm.model_cost which is returned
|
|
|
|
max_tokens: Required[Optional[int]]
|
|
max_input_tokens: Required[Optional[int]]
|
|
max_output_tokens: Required[Optional[int]]
|
|
input_cost_per_token: Required[float]
|
|
cache_creation_input_token_cost: Optional[float]
|
|
cache_read_input_token_cost: Optional[float]
|
|
input_cost_per_character: Optional[float] # only for vertex ai models
|
|
input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
|
|
input_cost_per_character_above_128k_tokens: Optional[
|
|
float
|
|
] # only for vertex ai models
|
|
input_cost_per_query: Optional[float] # only for rerank models
|
|
input_cost_per_image: Optional[float] # only for vertex ai models
|
|
input_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
|
input_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
|
input_cost_per_second: Optional[float] # for OpenAI Speech models
|
|
output_cost_per_token: Required[float]
|
|
output_cost_per_character: Optional[float] # only for vertex ai models
|
|
output_cost_per_token_above_128k_tokens: Optional[
|
|
float
|
|
] # only for vertex ai models
|
|
output_cost_per_character_above_128k_tokens: Optional[
|
|
float
|
|
] # only for vertex ai models
|
|
output_cost_per_image: Optional[float]
|
|
output_vector_size: Optional[int]
|
|
output_cost_per_video_per_second: Optional[float] # only for vertex ai models
|
|
output_cost_per_audio_per_second: Optional[float] # only for vertex ai models
|
|
output_cost_per_second: Optional[float] # for OpenAI Speech models
|
|
|
|
litellm_provider: Required[str]
|
|
mode: Required[
|
|
Literal[
|
|
"completion", "embedding", "image_generation", "chat", "audio_transcription"
|
|
]
|
|
]
|
|
supported_openai_params: Required[Optional[List[str]]]
|
|
supports_system_messages: Optional[bool]
|
|
supports_response_schema: Optional[bool]
|
|
supports_vision: Optional[bool]
|
|
supports_function_calling: Optional[bool]
|
|
supports_assistant_prefill: Optional[bool]
|
|
supports_prompt_caching: Optional[bool]
|
|
|
|
|
|
class GenericStreamingChunk(TypedDict, total=False):
|
|
text: Required[str]
|
|
tool_use: Optional[ChatCompletionToolCallChunk]
|
|
is_finished: Required[bool]
|
|
finish_reason: Required[str]
|
|
usage: Required[Optional[ChatCompletionUsageBlock]]
|
|
index: int
|
|
|
|
# use this dict if you want to return any provider specific fields in the response
|
|
provider_specific_fields: Optional[Dict[str, Any]]
|
|
|
|
|
|
from enum import Enum
|
|
|
|
|
|
class CallTypes(Enum):
|
|
embedding = "embedding"
|
|
aembedding = "aembedding"
|
|
completion = "completion"
|
|
acompletion = "acompletion"
|
|
atext_completion = "atext_completion"
|
|
text_completion = "text_completion"
|
|
image_generation = "image_generation"
|
|
aimage_generation = "aimage_generation"
|
|
moderation = "moderation"
|
|
amoderation = "amoderation"
|
|
atranscription = "atranscription"
|
|
transcription = "transcription"
|
|
aspeech = "aspeech"
|
|
speech = "speech"
|
|
rerank = "rerank"
|
|
arerank = "arerank"
|
|
arealtime = "_arealtime"
|
|
|
|
|
|
class PassthroughCallTypes(Enum):
|
|
passthrough_image_generation = "passthrough-image-generation"
|
|
|
|
|
|
class TopLogprob(OpenAIObject):
|
|
token: str
|
|
"""The token."""
|
|
|
|
bytes: Optional[List[int]] = None
|
|
"""A list of integers representing the UTF-8 bytes representation of the token.
|
|
|
|
Useful in instances where characters are represented by multiple tokens and
|
|
their byte representations must be combined to generate the correct text
|
|
representation. Can be `null` if there is no bytes representation for the token.
|
|
"""
|
|
|
|
logprob: float
|
|
"""The log probability of this token, if it is within the top 20 most likely
|
|
tokens.
|
|
|
|
Otherwise, the value `-9999.0` is used to signify that the token is very
|
|
unlikely.
|
|
"""
|
|
|
|
|
|
class ChatCompletionTokenLogprob(OpenAIObject):
|
|
token: str
|
|
"""The token."""
|
|
|
|
bytes: Optional[List[int]] = None
|
|
"""A list of integers representing the UTF-8 bytes representation of the token.
|
|
|
|
Useful in instances where characters are represented by multiple tokens and
|
|
their byte representations must be combined to generate the correct text
|
|
representation. Can be `null` if there is no bytes representation for the token.
|
|
"""
|
|
|
|
logprob: float
|
|
"""The log probability of this token, if it is within the top 20 most likely
|
|
tokens.
|
|
|
|
Otherwise, the value `-9999.0` is used to signify that the token is very
|
|
unlikely.
|
|
"""
|
|
|
|
top_logprobs: List[TopLogprob]
|
|
"""List of the most likely tokens and their log probability, at this token
|
|
position.
|
|
|
|
In rare cases, there may be fewer than the number of requested `top_logprobs`
|
|
returned.
|
|
"""
|
|
|
|
|
|
class ChoiceLogprobs(OpenAIObject):
|
|
content: Optional[List[ChatCompletionTokenLogprob]] = None
|
|
"""A list of message content tokens with log probability information."""
|
|
|
|
|
|
class FunctionCall(OpenAIObject):
|
|
arguments: str
|
|
name: Optional[str] = None
|
|
|
|
|
|
class Function(OpenAIObject):
|
|
arguments: str
|
|
name: Optional[
|
|
str
|
|
] # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)
|
|
|
|
def __init__(
|
|
self,
|
|
arguments: Optional[Union[Dict, str]],
|
|
name: Optional[str] = None,
|
|
**params,
|
|
):
|
|
if arguments is None:
|
|
arguments = ""
|
|
elif isinstance(arguments, Dict):
|
|
arguments = json.dumps(arguments)
|
|
else:
|
|
arguments = arguments
|
|
|
|
name = name
|
|
|
|
# Build a dictionary with the structure your BaseModel expects
|
|
data = {"arguments": arguments, "name": name, **params}
|
|
|
|
super(Function, self).__init__(**data)
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
|
|
class ChatCompletionDeltaToolCall(OpenAIObject):
|
|
id: Optional[str] = None
|
|
function: Function
|
|
type: Optional[str] = None
|
|
index: int
|
|
|
|
|
|
class HiddenParams(OpenAIObject):
|
|
original_response: Optional[Union[str, Any]] = None
|
|
model_id: Optional[str] = None # used in Router for individual deployments
|
|
api_base: Optional[str] = None # returns api base used for making completion call
|
|
|
|
model_config = ConfigDict(extra="allow", protected_namespaces=())
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
def json(self, **kwargs): # type: ignore
|
|
try:
|
|
return self.model_dump() # noqa
|
|
except Exception:
|
|
# if using pydantic v1
|
|
return self.dict()
|
|
|
|
|
|
class ChatCompletionMessageToolCall(OpenAIObject):
|
|
def __init__(
|
|
self,
|
|
function: Union[Dict, Function],
|
|
id: Optional[str] = None,
|
|
type: Optional[str] = None,
|
|
**params,
|
|
):
|
|
super(ChatCompletionMessageToolCall, self).__init__(**params)
|
|
if isinstance(function, Dict):
|
|
self.function = Function(**function)
|
|
else:
|
|
self.function = function
|
|
|
|
if id is not None:
|
|
self.id = id
|
|
else:
|
|
self.id = f"{uuid.uuid4()}"
|
|
|
|
if type is not None:
|
|
self.type = type
|
|
else:
|
|
self.type = "function"
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
|
|
"""
|
|
Reference:
|
|
ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None))
|
|
"""
|
|
|
|
|
|
class Message(OpenAIObject):
|
|
|
|
content: Optional[str]
|
|
role: Literal["assistant", "user", "system", "tool", "function"]
|
|
tool_calls: Optional[List[ChatCompletionMessageToolCall]]
|
|
function_call: Optional[FunctionCall]
|
|
|
|
def __init__(
|
|
self,
|
|
content: Optional[str] = None,
|
|
role: Literal["assistant"] = "assistant",
|
|
function_call=None,
|
|
tool_calls: Optional[list] = None,
|
|
**params,
|
|
):
|
|
init_values = {
|
|
"content": content,
|
|
"role": role or "assistant", # handle null input
|
|
"function_call": (
|
|
FunctionCall(**function_call) if function_call is not None else None
|
|
),
|
|
"tool_calls": (
|
|
[
|
|
(
|
|
ChatCompletionMessageToolCall(**tool_call)
|
|
if isinstance(tool_call, dict)
|
|
else tool_call
|
|
)
|
|
for tool_call in tool_calls
|
|
]
|
|
if tool_calls is not None and len(tool_calls) > 0
|
|
else None
|
|
),
|
|
}
|
|
super(Message, self).__init__(
|
|
**init_values, # type: ignore
|
|
**params,
|
|
)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
def json(self, **kwargs): # type: ignore
|
|
try:
|
|
return self.model_dump() # noqa
|
|
except Exception:
|
|
# if using pydantic v1
|
|
return self.dict()
|
|
|
|
|
|
class Delta(OpenAIObject):
|
|
def __init__(
|
|
self,
|
|
content=None,
|
|
role=None,
|
|
function_call=None,
|
|
tool_calls=None,
|
|
**params,
|
|
):
|
|
super(Delta, self).__init__(**params)
|
|
self.content = content
|
|
self.role = role
|
|
|
|
if function_call is not None and isinstance(function_call, dict):
|
|
self.function_call = FunctionCall(**function_call)
|
|
else:
|
|
self.function_call = function_call
|
|
if tool_calls is not None and isinstance(tool_calls, list):
|
|
self.tool_calls = []
|
|
for tool_call in tool_calls:
|
|
if isinstance(tool_call, dict):
|
|
if tool_call.get("index", None) is None:
|
|
tool_call["index"] = 0
|
|
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
|
|
elif isinstance(tool_call, ChatCompletionDeltaToolCall):
|
|
self.tool_calls.append(tool_call)
|
|
else:
|
|
self.tool_calls = tool_calls
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
|
|
class Choices(OpenAIObject):
|
|
def __init__(
|
|
self,
|
|
finish_reason=None,
|
|
index=0,
|
|
message: Optional[Union[Message, dict]] = None,
|
|
logprobs=None,
|
|
enhancements=None,
|
|
**params,
|
|
):
|
|
super(Choices, self).__init__(**params)
|
|
if finish_reason is not None:
|
|
self.finish_reason = map_finish_reason(
|
|
finish_reason
|
|
) # set finish_reason for all responses
|
|
else:
|
|
self.finish_reason = "stop"
|
|
self.index = index
|
|
if message is None:
|
|
self.message = Message()
|
|
else:
|
|
if isinstance(message, Message):
|
|
self.message = message
|
|
elif isinstance(message, dict):
|
|
self.message = Message(**message)
|
|
if logprobs is not None:
|
|
self.logprobs = logprobs
|
|
if enhancements is not None:
|
|
self.enhancements = enhancements
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
|
|
class Usage(CompletionUsage):
|
|
_cache_creation_input_tokens: int = PrivateAttr(
|
|
0
|
|
) # hidden param for prompt caching. Might change, once openai introduces their equivalent.
|
|
_cache_read_input_tokens: int = PrivateAttr(
|
|
0
|
|
) # hidden param for prompt caching. Might change, once openai introduces their equivalent.
|
|
|
|
def __init__(
|
|
self,
|
|
prompt_tokens: Optional[int] = None,
|
|
completion_tokens: Optional[int] = None,
|
|
total_tokens: Optional[int] = None,
|
|
reasoning_tokens: Optional[int] = None,
|
|
prompt_tokens_details: Optional[Union[PromptTokensDetails, dict]] = None,
|
|
completion_tokens_details: Optional[
|
|
Union[CompletionTokensDetails, dict]
|
|
] = None,
|
|
**params,
|
|
):
|
|
# handle reasoning_tokens
|
|
_completion_tokens_details: Optional[CompletionTokensDetails] = None
|
|
if reasoning_tokens:
|
|
completion_tokens_details = CompletionTokensDetails(
|
|
reasoning_tokens=reasoning_tokens
|
|
)
|
|
|
|
# Ensure completion_tokens_details is properly handled
|
|
if completion_tokens_details:
|
|
if isinstance(completion_tokens_details, dict):
|
|
_completion_tokens_details = CompletionTokensDetails(
|
|
**completion_tokens_details
|
|
)
|
|
elif isinstance(completion_tokens_details, CompletionTokensDetails):
|
|
_completion_tokens_details = completion_tokens_details
|
|
|
|
## DEEPSEEK MAPPING ##
|
|
if "prompt_cache_hit_tokens" in params and isinstance(
|
|
params["prompt_cache_hit_tokens"], int
|
|
):
|
|
if prompt_tokens_details is None:
|
|
prompt_tokens_details = PromptTokensDetails(
|
|
cached_tokens=params["prompt_cache_hit_tokens"]
|
|
)
|
|
|
|
## ANTHROPIC MAPPING ##
|
|
if "cache_read_input_tokens" in params and isinstance(
|
|
params["cache_read_input_tokens"], int
|
|
):
|
|
if prompt_tokens_details is None:
|
|
prompt_tokens_details = PromptTokensDetails(
|
|
cached_tokens=params["cache_read_input_tokens"]
|
|
)
|
|
|
|
# handle prompt_tokens_details
|
|
_prompt_tokens_details: Optional[PromptTokensDetails] = None
|
|
if prompt_tokens_details:
|
|
if isinstance(prompt_tokens_details, dict):
|
|
_prompt_tokens_details = PromptTokensDetails(**prompt_tokens_details)
|
|
elif isinstance(prompt_tokens_details, PromptTokensDetails):
|
|
_prompt_tokens_details = prompt_tokens_details
|
|
|
|
super().__init__(
|
|
prompt_tokens=prompt_tokens or 0,
|
|
completion_tokens=completion_tokens or 0,
|
|
total_tokens=total_tokens or 0,
|
|
completion_tokens_details=_completion_tokens_details or None,
|
|
prompt_tokens_details=_prompt_tokens_details or None,
|
|
)
|
|
|
|
## ANTHROPIC MAPPING ##
|
|
if "cache_creation_input_tokens" in params and isinstance(
|
|
params["cache_creation_input_tokens"], int
|
|
):
|
|
self._cache_creation_input_tokens = params["cache_creation_input_tokens"]
|
|
|
|
if "cache_read_input_tokens" in params and isinstance(
|
|
params["cache_read_input_tokens"], int
|
|
):
|
|
self._cache_read_input_tokens = params["cache_read_input_tokens"]
|
|
|
|
## DEEPSEEK MAPPING ##
|
|
if "prompt_cache_hit_tokens" in params and isinstance(
|
|
params["prompt_cache_hit_tokens"], int
|
|
):
|
|
self._cache_read_input_tokens = params["prompt_cache_hit_tokens"]
|
|
|
|
for k, v in params.items():
|
|
setattr(self, k, v)
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
|
|
class StreamingChoices(OpenAIObject):
|
|
def __init__(
|
|
self,
|
|
finish_reason=None,
|
|
index=0,
|
|
delta: Optional[Delta] = None,
|
|
logprobs=None,
|
|
enhancements=None,
|
|
**params,
|
|
):
|
|
super(StreamingChoices, self).__init__(**params)
|
|
if finish_reason:
|
|
self.finish_reason = map_finish_reason(finish_reason)
|
|
else:
|
|
self.finish_reason = None
|
|
self.index = index
|
|
if delta is not None:
|
|
if isinstance(delta, Delta):
|
|
self.delta = delta
|
|
elif isinstance(delta, dict):
|
|
self.delta = Delta(**delta)
|
|
else:
|
|
self.delta = Delta()
|
|
if enhancements is not None:
|
|
self.enhancements = enhancements
|
|
|
|
if logprobs is not None and isinstance(logprobs, dict):
|
|
self.logprobs = ChoiceLogprobs(**logprobs)
|
|
else:
|
|
self.logprobs = logprobs # type: ignore
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
|
|
class StreamingChatCompletionChunk(OpenAIChatCompletionChunk):
|
|
def __init__(self, **kwargs):
|
|
|
|
new_choices = []
|
|
for choice in kwargs["choices"]:
|
|
new_choice = StreamingChoices(**choice).model_dump()
|
|
new_choices.append(new_choice)
|
|
kwargs["choices"] = new_choices
|
|
super().__init__(**kwargs)
|
|
|
|
|
|
class ModelResponse(OpenAIObject):
|
|
id: str
|
|
"""A unique identifier for the completion."""
|
|
|
|
choices: List[Union[Choices, StreamingChoices]]
|
|
"""The list of completion choices the model generated for the input prompt."""
|
|
|
|
created: int
|
|
"""The Unix timestamp (in seconds) of when the completion was created."""
|
|
|
|
model: Optional[str] = None
|
|
"""The model used for completion."""
|
|
|
|
object: str
|
|
"""The object type, which is always "text_completion" """
|
|
|
|
system_fingerprint: Optional[str] = None
|
|
"""This fingerprint represents the backend configuration that the model runs with.
|
|
|
|
Can be used in conjunction with the `seed` request parameter to understand when
|
|
backend changes have been made that might impact determinism.
|
|
"""
|
|
|
|
_hidden_params: dict = {}
|
|
|
|
_response_headers: Optional[dict] = None
|
|
|
|
def __init__(
|
|
self,
|
|
id=None,
|
|
choices=None,
|
|
created=None,
|
|
model=None,
|
|
object=None,
|
|
system_fingerprint=None,
|
|
usage=None,
|
|
stream=None,
|
|
stream_options=None,
|
|
response_ms=None,
|
|
hidden_params=None,
|
|
_response_headers=None,
|
|
**params,
|
|
) -> None:
|
|
if stream is not None and stream is True:
|
|
object = "chat.completion.chunk"
|
|
if choices is not None and isinstance(choices, list):
|
|
new_choices = []
|
|
for choice in choices:
|
|
_new_choice = None
|
|
if isinstance(choice, StreamingChoices):
|
|
_new_choice = choice
|
|
elif isinstance(choice, dict):
|
|
_new_choice = StreamingChoices(**choice)
|
|
elif isinstance(choice, BaseModel):
|
|
_new_choice = StreamingChoices(**choice.model_dump())
|
|
new_choices.append(_new_choice)
|
|
choices = new_choices
|
|
else:
|
|
choices = [StreamingChoices()]
|
|
else:
|
|
object = "chat.completion"
|
|
if choices is not None and isinstance(choices, list):
|
|
new_choices = []
|
|
for choice in choices:
|
|
if isinstance(choice, Choices):
|
|
_new_choice = choice # type: ignore
|
|
elif isinstance(choice, dict):
|
|
_new_choice = Choices(**choice) # type: ignore
|
|
else:
|
|
_new_choice = choice
|
|
new_choices.append(_new_choice)
|
|
choices = new_choices
|
|
else:
|
|
choices = [Choices()]
|
|
if id is None:
|
|
id = _generate_id()
|
|
else:
|
|
id = id
|
|
if created is None:
|
|
created = int(time.time())
|
|
else:
|
|
created = created
|
|
model = model
|
|
if usage is not None:
|
|
if isinstance(usage, dict):
|
|
usage = Usage(**usage)
|
|
else:
|
|
usage = usage
|
|
elif stream is None or stream is False:
|
|
usage = Usage()
|
|
if hidden_params:
|
|
self._hidden_params = hidden_params
|
|
|
|
if _response_headers:
|
|
self._response_headers = _response_headers
|
|
|
|
init_values = {
|
|
"id": id,
|
|
"choices": choices,
|
|
"created": created,
|
|
"model": model,
|
|
"object": object,
|
|
"system_fingerprint": system_fingerprint,
|
|
}
|
|
|
|
if usage is not None:
|
|
init_values["usage"] = usage
|
|
|
|
super().__init__(
|
|
**init_values,
|
|
**params,
|
|
)
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def json(self, **kwargs): # type: ignore
|
|
try:
|
|
return self.model_dump() # noqa
|
|
except Exception:
|
|
# if using pydantic v1
|
|
return self.dict()
|
|
|
|
|
|
class Embedding(OpenAIObject):
|
|
embedding: Union[list, str] = []
|
|
index: int
|
|
object: Literal["embedding"]
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
|
|
class EmbeddingResponse(OpenAIObject):
|
|
model: Optional[str] = None
|
|
"""The model used for embedding."""
|
|
|
|
data: List
|
|
"""The actual embedding value"""
|
|
|
|
object: Literal["list"]
|
|
"""The object type, which is always "list" """
|
|
|
|
usage: Optional[Usage] = None
|
|
"""Usage statistics for the embedding request."""
|
|
|
|
_hidden_params: dict = {}
|
|
_response_headers: Optional[Dict] = None
|
|
_response_ms: Optional[float] = None
|
|
|
|
def __init__(
|
|
self,
|
|
model: Optional[str] = None,
|
|
usage: Optional[Usage] = None,
|
|
response_ms=None,
|
|
data: Optional[List] = None,
|
|
hidden_params=None,
|
|
_response_headers=None,
|
|
**params,
|
|
):
|
|
object = "list"
|
|
if response_ms:
|
|
_response_ms = response_ms
|
|
else:
|
|
_response_ms = None
|
|
if data:
|
|
data = data
|
|
else:
|
|
data = []
|
|
|
|
if usage:
|
|
usage = usage
|
|
else:
|
|
usage = Usage()
|
|
|
|
if _response_headers:
|
|
self._response_headers = _response_headers
|
|
|
|
model = model
|
|
super().__init__(model=model, object=object, data=data, usage=usage) # type: ignore
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
def json(self, **kwargs): # type: ignore
|
|
try:
|
|
return self.model_dump() # noqa
|
|
except Exception:
|
|
# if using pydantic v1
|
|
return self.dict()
|
|
|
|
|
|
class Logprobs(OpenAIObject):
|
|
text_offset: List[int]
|
|
token_logprobs: List[float]
|
|
tokens: List[str]
|
|
top_logprobs: List[Dict[str, float]]
|
|
|
|
|
|
class TextChoices(OpenAIObject):
|
|
def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
|
|
super(TextChoices, self).__init__(**params)
|
|
if finish_reason:
|
|
self.finish_reason = map_finish_reason(finish_reason)
|
|
else:
|
|
self.finish_reason = None
|
|
self.index = index
|
|
if text is not None:
|
|
self.text = text
|
|
else:
|
|
self.text = None
|
|
if logprobs is None:
|
|
self.logprobs = None
|
|
else:
|
|
if isinstance(logprobs, dict):
|
|
self.logprobs = Logprobs(**logprobs)
|
|
else:
|
|
self.logprobs = logprobs
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
def json(self, **kwargs): # type: ignore
|
|
try:
|
|
return self.model_dump() # noqa
|
|
except Exception:
|
|
# if using pydantic v1
|
|
return self.dict()
|
|
|
|
|
|
class TextCompletionResponse(OpenAIObject):
|
|
"""
|
|
{
|
|
"id": response["id"],
|
|
"object": "text_completion",
|
|
"created": response["created"],
|
|
"model": response["model"],
|
|
"choices": [
|
|
{
|
|
"text": response["choices"][0]["message"]["content"],
|
|
"index": response["choices"][0]["index"],
|
|
"logprobs": transformed_logprobs,
|
|
"finish_reason": response["choices"][0]["finish_reason"]
|
|
}
|
|
],
|
|
"usage": response["usage"]
|
|
}
|
|
"""
|
|
|
|
id: str
|
|
object: str
|
|
created: int
|
|
model: Optional[str]
|
|
choices: List[TextChoices]
|
|
usage: Optional[Usage]
|
|
_response_ms: Optional[int] = None
|
|
_hidden_params: HiddenParams
|
|
|
|
def __init__(
|
|
self,
|
|
id=None,
|
|
choices=None,
|
|
created=None,
|
|
model=None,
|
|
usage=None,
|
|
stream=False,
|
|
response_ms=None,
|
|
object=None,
|
|
**params,
|
|
):
|
|
if stream:
|
|
object = "text_completion.chunk"
|
|
choices = [TextChoices()]
|
|
else:
|
|
object = "text_completion"
|
|
if choices is not None and isinstance(choices, list):
|
|
new_choices = []
|
|
for choice in choices:
|
|
_new_choice = None
|
|
if isinstance(choice, TextChoices):
|
|
_new_choice = choice
|
|
elif isinstance(choice, dict):
|
|
_new_choice = TextChoices(**choice)
|
|
new_choices.append(_new_choice)
|
|
choices = new_choices
|
|
else:
|
|
choices = [TextChoices()]
|
|
if object is not None:
|
|
object = object
|
|
if id is None:
|
|
id = _generate_id()
|
|
else:
|
|
id = id
|
|
if created is None:
|
|
created = int(time.time())
|
|
else:
|
|
created = created
|
|
|
|
model = model
|
|
if usage:
|
|
usage = usage
|
|
else:
|
|
usage = Usage()
|
|
|
|
super(TextCompletionResponse, self).__init__(
|
|
id=id, # type: ignore
|
|
object=object, # type: ignore
|
|
created=created, # type: ignore
|
|
model=model, # type: ignore
|
|
choices=choices, # type: ignore
|
|
usage=usage, # type: ignore
|
|
**params,
|
|
)
|
|
|
|
if response_ms:
|
|
self._response_ms = response_ms
|
|
else:
|
|
self._response_ms = None
|
|
self._hidden_params = HiddenParams()
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
|
|
from openai.types.images_response import Image as OpenAIImage
|
|
|
|
|
|
class ImageObject(OpenAIImage):
|
|
"""
|
|
Represents the url or the content of an image generated by the OpenAI API.
|
|
|
|
Attributes:
|
|
b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
|
|
url: The URL of the generated image, if response_format is url (default).
|
|
revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.
|
|
|
|
https://platform.openai.com/docs/api-reference/images/object
|
|
"""
|
|
|
|
b64_json: Optional[str] = None
|
|
url: Optional[str] = None
|
|
revised_prompt: Optional[str] = None
|
|
|
|
def __init__(self, b64_json=None, url=None, revised_prompt=None):
|
|
super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) # type: ignore
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
def json(self, **kwargs): # type: ignore
|
|
try:
|
|
return self.model_dump() # noqa
|
|
except Exception:
|
|
# if using pydantic v1
|
|
return self.dict()
|
|
|
|
|
|
from openai.types.images_response import ImagesResponse as OpenAIImageResponse
|
|
|
|
|
|
class ImageResponse(OpenAIImageResponse):
|
|
_hidden_params: dict = {}
|
|
|
|
def __init__(
|
|
self,
|
|
created: Optional[int] = None,
|
|
data: Optional[List[ImageObject]] = None,
|
|
response_ms=None,
|
|
):
|
|
if response_ms:
|
|
_response_ms = response_ms
|
|
else:
|
|
_response_ms = None
|
|
if data:
|
|
data = data
|
|
else:
|
|
data = []
|
|
|
|
if created:
|
|
created = created
|
|
else:
|
|
created = int(time.time())
|
|
|
|
_data: List[OpenAIImage] = []
|
|
for d in data:
|
|
if isinstance(d, dict):
|
|
_data.append(ImageObject(**d))
|
|
elif isinstance(d, BaseModel):
|
|
_data.append(ImageObject(**d.model_dump()))
|
|
super().__init__(created=created, data=_data)
|
|
self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
def json(self, **kwargs): # type: ignore
|
|
try:
|
|
return self.model_dump() # noqa
|
|
except Exception:
|
|
# if using pydantic v1
|
|
return self.dict()
|
|
|
|
|
|
class TranscriptionResponse(OpenAIObject):
|
|
text: Optional[str] = None
|
|
|
|
_hidden_params: dict = {}
|
|
_response_headers: Optional[dict] = None
|
|
|
|
def __init__(self, text=None):
|
|
super().__init__(text=text) # type: ignore
|
|
|
|
def __contains__(self, key):
|
|
# Define custom behavior for the 'in' operator
|
|
return hasattr(self, key)
|
|
|
|
def get(self, key, default=None):
|
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
|
return getattr(self, key, default)
|
|
|
|
def __getitem__(self, key):
|
|
# Allow dictionary-style access to attributes
|
|
return getattr(self, key)
|
|
|
|
def __setitem__(self, key, value):
|
|
# Allow dictionary-style assignment of attributes
|
|
setattr(self, key, value)
|
|
|
|
def json(self, **kwargs): # type: ignore
|
|
try:
|
|
return self.model_dump() # noqa
|
|
except Exception:
|
|
# if using pydantic v1
|
|
return self.dict()
|
|
|
|
|
|
class GenericImageParsingChunk(TypedDict):
|
|
# {
|
|
# "type": "base64",
|
|
# "media_type": f"image/{image_format}",
|
|
# "data": base64_data,
|
|
# }
|
|
type: str
|
|
media_type: str
|
|
data: str
|
|
|
|
|
|
class ResponseFormatChunk(TypedDict, total=False):
|
|
type: Required[Literal["json_object", "text"]]
|
|
response_schema: dict
|
|
|
|
|
|
all_litellm_params = [
|
|
"metadata",
|
|
"tags",
|
|
"acompletion",
|
|
"atext_completion",
|
|
"text_completion",
|
|
"caching",
|
|
"mock_response",
|
|
"api_key",
|
|
"api_version",
|
|
"api_base",
|
|
"force_timeout",
|
|
"logger_fn",
|
|
"verbose",
|
|
"custom_llm_provider",
|
|
"litellm_logging_obj",
|
|
"litellm_call_id",
|
|
"use_client",
|
|
"id",
|
|
"fallbacks",
|
|
"azure",
|
|
"headers",
|
|
"model_list",
|
|
"num_retries",
|
|
"context_window_fallback_dict",
|
|
"retry_policy",
|
|
"roles",
|
|
"final_prompt_value",
|
|
"bos_token",
|
|
"eos_token",
|
|
"request_timeout",
|
|
"complete_response",
|
|
"self",
|
|
"client",
|
|
"rpm",
|
|
"tpm",
|
|
"max_parallel_requests",
|
|
"input_cost_per_token",
|
|
"output_cost_per_token",
|
|
"input_cost_per_second",
|
|
"output_cost_per_second",
|
|
"hf_model_name",
|
|
"model_info",
|
|
"proxy_server_request",
|
|
"preset_cache_key",
|
|
"caching_groups",
|
|
"ttl",
|
|
"cache",
|
|
"no-log",
|
|
"base_model",
|
|
"stream_timeout",
|
|
"supports_system_message",
|
|
"region_name",
|
|
"allowed_model_region",
|
|
"model_config",
|
|
"fastest_response",
|
|
"cooldown_time",
|
|
"cache_key",
|
|
"max_retries",
|
|
"azure_ad_token_provider",
|
|
"tenant_id",
|
|
"client_id",
|
|
"client_secret",
|
|
"user_continue_message",
|
|
"configurable_clientside_auth_params",
|
|
"weight",
|
|
]
|
|
|
|
|
|
class LoggedLiteLLMParams(TypedDict, total=False):
|
|
force_timeout: Optional[float]
|
|
custom_llm_provider: Optional[str]
|
|
api_base: Optional[str]
|
|
litellm_call_id: Optional[str]
|
|
model_alias_map: Optional[dict]
|
|
metadata: Optional[dict]
|
|
model_info: Optional[dict]
|
|
proxy_server_request: Optional[dict]
|
|
acompletion: Optional[bool]
|
|
preset_cache_key: Optional[str]
|
|
no_log: Optional[bool]
|
|
input_cost_per_second: Optional[float]
|
|
input_cost_per_token: Optional[float]
|
|
output_cost_per_token: Optional[float]
|
|
output_cost_per_second: Optional[float]
|
|
cooldown_time: Optional[float]
|
|
|
|
|
|
class AdapterCompletionStreamWrapper:
|
|
def __init__(self, completion_stream):
|
|
self.completion_stream = completion_stream
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __aiter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
try:
|
|
for chunk in self.completion_stream:
|
|
if chunk == "None" or chunk is None:
|
|
raise Exception
|
|
return chunk
|
|
raise StopIteration
|
|
except StopIteration:
|
|
raise StopIteration
|
|
except Exception as e:
|
|
print(f"AdapterCompletionStreamWrapper - {e}") # noqa
|
|
|
|
async def __anext__(self):
|
|
try:
|
|
async for chunk in self.completion_stream:
|
|
if chunk == "None" or chunk is None:
|
|
raise Exception
|
|
return chunk
|
|
raise StopIteration
|
|
except StopIteration:
|
|
raise StopAsyncIteration
|
|
|
|
|
|
class StandardLoggingMetadata(TypedDict):
|
|
"""
|
|
Specific metadata k,v pairs logged to integration for easier cost tracking
|
|
"""
|
|
|
|
user_api_key_hash: Optional[str] # hash of the litellm virtual key used
|
|
user_api_key_alias: Optional[str]
|
|
user_api_key_team_id: Optional[str]
|
|
user_api_key_user_id: Optional[str]
|
|
user_api_key_team_alias: Optional[str]
|
|
spend_logs_metadata: Optional[
|
|
dict
|
|
] # special param to log k,v pairs to spendlogs for a call
|
|
requester_ip_address: Optional[str]
|
|
requester_metadata: Optional[dict]
|
|
|
|
|
|
class StandardLoggingHiddenParams(TypedDict):
|
|
model_id: Optional[str]
|
|
cache_key: Optional[str]
|
|
api_base: Optional[str]
|
|
response_cost: Optional[str]
|
|
additional_headers: Optional[dict]
|
|
|
|
|
|
class StandardLoggingModelInformation(TypedDict):
|
|
model_map_key: str
|
|
model_map_value: Optional[ModelInfo]
|
|
|
|
|
|
class StandardLoggingModelCostFailureDebugInformation(TypedDict, total=False):
|
|
"""
|
|
Debug information, if cost tracking fails.
|
|
|
|
Avoid logging sensitive information like response or optional params
|
|
"""
|
|
|
|
error_str: Required[str]
|
|
traceback_str: Required[str]
|
|
model: str
|
|
cache_hit: Optional[bool]
|
|
custom_llm_provider: Optional[str]
|
|
base_model: Optional[str]
|
|
call_type: str
|
|
custom_pricing: Optional[bool]
|
|
|
|
|
|
StandardLoggingPayloadStatus = Literal["success", "failure"]
|
|
|
|
|
|
class StandardLoggingPayload(TypedDict):
|
|
id: str
|
|
call_type: str
|
|
response_cost: float
|
|
response_cost_failure_debug_info: Optional[
|
|
StandardLoggingModelCostFailureDebugInformation
|
|
]
|
|
status: StandardLoggingPayloadStatus
|
|
total_tokens: int
|
|
prompt_tokens: int
|
|
completion_tokens: int
|
|
startTime: float
|
|
endTime: float
|
|
completionStartTime: float
|
|
model_map_information: StandardLoggingModelInformation
|
|
model: str
|
|
model_id: Optional[str]
|
|
model_group: Optional[str]
|
|
api_base: str
|
|
metadata: StandardLoggingMetadata
|
|
cache_hit: Optional[bool]
|
|
cache_key: Optional[str]
|
|
saved_cache_cost: float
|
|
request_tags: list
|
|
end_user: Optional[str]
|
|
requester_ip_address: Optional[str]
|
|
messages: Optional[Union[str, list, dict]]
|
|
response: Optional[Union[str, list, dict]]
|
|
error_str: Optional[str]
|
|
model_parameters: dict
|
|
hidden_params: StandardLoggingHiddenParams
|
|
|
|
|
|
from typing import AsyncIterator, Iterator
|
|
|
|
|
|
class CustomStreamingDecoder:
|
|
async def aiter_bytes(
|
|
self, iterator: AsyncIterator[bytes]
|
|
) -> AsyncIterator[
|
|
Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]
|
|
]:
|
|
raise NotImplementedError
|
|
|
|
def iter_bytes(
|
|
self, iterator: Iterator[bytes]
|
|
) -> Iterator[Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]]:
|
|
raise NotImplementedError
|
|
|
|
|
|
class StandardPassThroughResponseObject(TypedDict):
|
|
response: str
|
|
|
|
|
|
OPENAI_RESPONSE_HEADERS = [
|
|
"x-ratelimit-remaining-requests",
|
|
"x-ratelimit-remaining-tokens",
|
|
"x-ratelimit-limit-requests",
|
|
"x-ratelimit-limit-tokens",
|
|
"x-ratelimit-reset-requests",
|
|
"x-ratelimit-reset-tokens",
|
|
]
|
|
|
|
|
|
class StandardCallbackDynamicParams(TypedDict, total=False):
|
|
# Langfuse dynamic params
|
|
langfuse_public_key: Optional[str]
|
|
langfuse_secret: Optional[str]
|
|
langfuse_secret_key: Optional[str]
|
|
langfuse_host: Optional[str]
|
|
|
|
# GCS dynamic params
|
|
gcs_bucket_name: Optional[str]
|
|
gcs_path_service_account: Optional[str]
|
|
|
|
|
|
CachingSupportedCallTypes = Literal[
|
|
"completion",
|
|
"acompletion",
|
|
"embedding",
|
|
"aembedding",
|
|
"atranscription",
|
|
"transcription",
|
|
"atext_completion",
|
|
"text_completion",
|
|
"arerank",
|
|
"rerank",
|
|
]
|