mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Litellm Minor Fixes & Improvements (10/12/2024) (#6179)
* build(model_prices_and_context_window.json): add bedrock llama3.2 pricing
* build(model_prices_and_context_window.json): add bedrock cross region inference pricing
* Revert "(perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165)"
This reverts commit 2a5624af47
.
* add azure/gpt-4o-2024-05-13 (#6174)
* LiteLLM Minor Fixes & Improvements (10/10/2024) (#6158)
* refactor(vertex_ai_partner_models/anthropic): refactor anthropic to use partner model logic
* fix(vertex_ai/): support passing custom api base to partner models
Fixes https://github.com/BerriAI/litellm/issues/4317
* fix(proxy_server.py): Fix prometheus premium user check logic
* docs(prometheus.md): update quick start docs
* fix(custom_llm.py): support passing dynamic api key + api base
* fix(realtime_api/main.py): Add request/response logging for realtime api endpoints
Closes https://github.com/BerriAI/litellm/issues/6081
* feat(openai/realtime): add openai realtime api logging
Closes https://github.com/BerriAI/litellm/issues/6081
* fix(realtime_streaming.py): fix linting errors
* fix(realtime_streaming.py): fix linting errors
* fix: fix linting errors
* fix pattern match router
* Add literalai in the sidebar observability category (#6163)
* fix: add literalai in the sidebar
* fix: typo
* update (#6160)
* Feat: Add Langtrace integration (#5341)
* Feat: Add Langtrace integration
* add langtrace service name
* fix timestamps for traces
* add tests
* Discard Callback + use existing otel logger
* cleanup
* remove print statments
* remove callback
* add docs
* docs
* add logging docs
* format logging
* remove emoji and add litellm proxy example
* format logging
* format `logging.md`
* add langtrace docs to logging.md
* sync conflict
* docs fix
* (perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165)
* fix move s3 to use customLogger
* add basic s3 logging test
* add s3 to custom logger compatible
* use batch logger for s3
* s3 set flush interval and batch size
* fix s3 logging
* add notes on s3 logging
* fix s3 logging
* add basic s3 logging test
* fix s3 type errors
* add test for sync logging on s3
* fix: fix to debug log
---------
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Willy Douhard <willy.douhard@gmail.com>
Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
Co-authored-by: Ali Waleed <ali@scale3labs.com>
* docs(custom_llm_server.md): update doc on passing custom params
* fix(pass_through_endpoints.py): don't require headers
Fixes https://github.com/BerriAI/litellm/issues/6128
* feat(utils.py): add support for caching rerank endpoints
Closes https://github.com/BerriAI/litellm/issues/6144
* feat(litellm_logging.py'): add response headers for failed requests
Closes https://github.com/BerriAI/litellm/issues/6159
---------
Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Co-authored-by: Willy Douhard <willy.douhard@gmail.com>
Co-authored-by: yujonglee <yujonglee.dev@gmail.com>
Co-authored-by: Ali Waleed <ali@scale3labs.com>
This commit is contained in:
parent
2cb65b450d
commit
2acb0c0675
18 changed files with 533 additions and 82 deletions
|
@ -20,13 +20,13 @@ from datetime import timedelta
|
|||
from enum import Enum
|
||||
from typing import Any, List, Literal, Optional, Tuple, Union
|
||||
|
||||
from openai._models import BaseModel as OpenAIObject
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||
from litellm.types.utils import all_litellm_params
|
||||
from litellm.types.utils import CachingSupportedCallTypes, all_litellm_params
|
||||
|
||||
|
||||
def print_verbose(print_statement):
|
||||
|
@ -2139,20 +2139,7 @@ class Cache:
|
|||
default_in_memory_ttl: Optional[float] = None,
|
||||
default_in_redis_ttl: Optional[float] = None,
|
||||
similarity_threshold: Optional[float] = None,
|
||||
supported_call_types: Optional[
|
||||
List[
|
||||
Literal[
|
||||
"completion",
|
||||
"acompletion",
|
||||
"embedding",
|
||||
"aembedding",
|
||||
"atranscription",
|
||||
"transcription",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
]
|
||||
]
|
||||
] = [
|
||||
supported_call_types: Optional[List[CachingSupportedCallTypes]] = [
|
||||
"completion",
|
||||
"acompletion",
|
||||
"embedding",
|
||||
|
@ -2161,6 +2148,8 @@ class Cache:
|
|||
"transcription",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
"arerank",
|
||||
"rerank",
|
||||
],
|
||||
# s3 Bucket, boto3 configuration
|
||||
s3_bucket_name: Optional[str] = None,
|
||||
|
@ -2353,9 +2342,20 @@ class Cache:
|
|||
"file",
|
||||
"language",
|
||||
]
|
||||
rerank_only_kwargs = [
|
||||
"top_n",
|
||||
"rank_fields",
|
||||
"return_documents",
|
||||
"max_chunks_per_doc",
|
||||
"documents",
|
||||
"query",
|
||||
]
|
||||
# combined_kwargs - NEEDS to be ordered across get_cache_key(). Do not use a set()
|
||||
combined_kwargs = (
|
||||
completion_kwargs + embedding_only_kwargs + transcription_only_kwargs
|
||||
completion_kwargs
|
||||
+ embedding_only_kwargs
|
||||
+ transcription_only_kwargs
|
||||
+ rerank_only_kwargs
|
||||
)
|
||||
litellm_param_kwargs = all_litellm_params
|
||||
for param in kwargs:
|
||||
|
@ -2557,7 +2557,7 @@ class Cache:
|
|||
else:
|
||||
cache_key = self.get_cache_key(*args, **kwargs)
|
||||
if cache_key is not None:
|
||||
if isinstance(result, OpenAIObject):
|
||||
if isinstance(result, BaseModel):
|
||||
result = result.model_dump_json()
|
||||
|
||||
## DEFAULT TTL ##
|
||||
|
@ -2778,20 +2778,7 @@ def enable_cache(
|
|||
host: Optional[str] = None,
|
||||
port: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
supported_call_types: Optional[
|
||||
List[
|
||||
Literal[
|
||||
"completion",
|
||||
"acompletion",
|
||||
"embedding",
|
||||
"aembedding",
|
||||
"atranscription",
|
||||
"transcription",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
]
|
||||
]
|
||||
] = [
|
||||
supported_call_types: Optional[List[CachingSupportedCallTypes]] = [
|
||||
"completion",
|
||||
"acompletion",
|
||||
"embedding",
|
||||
|
@ -2800,6 +2787,8 @@ def enable_cache(
|
|||
"transcription",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
"arerank",
|
||||
"rerank",
|
||||
],
|
||||
**kwargs,
|
||||
):
|
||||
|
@ -2847,20 +2836,7 @@ def update_cache(
|
|||
host: Optional[str] = None,
|
||||
port: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
supported_call_types: Optional[
|
||||
List[
|
||||
Literal[
|
||||
"completion",
|
||||
"acompletion",
|
||||
"embedding",
|
||||
"aembedding",
|
||||
"atranscription",
|
||||
"transcription",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
]
|
||||
]
|
||||
] = [
|
||||
supported_call_types: Optional[List[CachingSupportedCallTypes]] = [
|
||||
"completion",
|
||||
"acompletion",
|
||||
"embedding",
|
||||
|
@ -2869,6 +2845,8 @@ def update_cache(
|
|||
"transcription",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
"arerank",
|
||||
"rerank",
|
||||
],
|
||||
**kwargs,
|
||||
):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue