mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Support post-call guards for stream and non-stream responses
This commit is contained in:
parent
44184c4113
commit
b01cf5577c
8 changed files with 297 additions and 33 deletions
|
@ -23,6 +23,11 @@ from typing import (
|
|||
get_origin,
|
||||
get_type_hints,
|
||||
)
|
||||
from litellm.types.utils import (
|
||||
ModelResponse,
|
||||
ModelResponseStream,
|
||||
TextCompletionResponse,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from opentelemetry.trace import Span as _Span
|
||||
|
@ -1374,6 +1379,10 @@ async def _run_background_health_check():
|
|||
await asyncio.sleep(health_check_interval)
|
||||
|
||||
|
||||
class StreamingCallbackError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ProxyConfig:
|
||||
"""
|
||||
Abstraction class on top of config loading/updating logic. Gives us one place to control all config updating logic.
|
||||
|
@ -3035,8 +3044,7 @@ async def async_data_generator(
|
|||
):
|
||||
verbose_proxy_logger.debug("inside generator")
|
||||
try:
|
||||
time.time()
|
||||
async for chunk in response:
|
||||
async for chunk in proxy_logging_obj.async_post_call_streaming_iterator_hook(user_api_key_dict=user_api_key_dict, response=response, request_data=request_data):
|
||||
verbose_proxy_logger.debug(
|
||||
"async_data_generator: received streaming chunk - {}".format(chunk)
|
||||
)
|
||||
|
@ -3073,6 +3081,8 @@ async def async_data_generator(
|
|||
|
||||
if isinstance(e, HTTPException):
|
||||
raise e
|
||||
elif isinstance(e, StreamingCallbackError):
|
||||
error_msg = str(e)
|
||||
else:
|
||||
error_traceback = traceback.format_exc()
|
||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||
|
@ -5403,11 +5413,11 @@ async def token_counter(request: TokenCountRequest):
|
|||
)
|
||||
async def supported_openai_params(model: str):
|
||||
"""
|
||||
Returns supported openai params for a given litellm model name
|
||||
Returns supported openai params for a given litellm model name
|
||||
|
||||
e.g. `gpt-4` vs `gpt-3.5-turbo`
|
||||
e.g. `gpt-4` vs `gpt-3.5-turbo`
|
||||
|
||||
Example curl:
|
||||
Example curl:
|
||||
```
|
||||
curl -X GET --location 'http://localhost:4000/utils/supported_openai_params?model=gpt-3.5-turbo-16k' \
|
||||
--header 'Authorization: Bearer sk-1234'
|
||||
|
@ -6405,7 +6415,7 @@ async def model_group_info(
|
|||
- /model_group/info returns all model groups. End users of proxy should use /model_group/info since those models will be used for /chat/completions, /embeddings, etc.
|
||||
- /model_group/info?model_group=rerank-english-v3.0 returns all model groups for a specific model group (`model_name` in config.yaml)
|
||||
|
||||
|
||||
|
||||
|
||||
Example Request (All Models):
|
||||
```shell
|
||||
|
@ -6423,10 +6433,10 @@ async def model_group_info(
|
|||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
Example Request (Specific Wildcard Model Group): (e.g. `model_name: openai/*` on config.yaml)
|
||||
Example Request (Specific Wildcard Model Group): (e.g. `model_name: openai/*` on config.yaml)
|
||||
```shell
|
||||
curl -X 'GET' \
|
||||
'http://localhost:4000/model_group/info?model_group=openai/tts-1'
|
||||
'http://localhost:4000/model_group/info?model_group=openai/tts-1'
|
||||
-H 'accept: application/json' \
|
||||
-H 'Authorization: Bearersk-1234'
|
||||
```
|
||||
|
@ -7531,7 +7541,7 @@ async def invitation_update(
|
|||
):
|
||||
"""
|
||||
Update when invitation is accepted
|
||||
|
||||
|
||||
```
|
||||
curl -X POST 'http://localhost:4000/invitation/update' \
|
||||
-H 'Content-Type: application/json' \
|
||||
|
@ -7592,7 +7602,7 @@ async def invitation_delete(
|
|||
):
|
||||
"""
|
||||
Delete invitation link
|
||||
|
||||
|
||||
```
|
||||
curl -X POST 'http://localhost:4000/invitation/delete' \
|
||||
-H 'Content-Type: application/json' \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue