forked from phoenix/litellm-mirror
fix(proxy_server.py): support langfuse logging for rejected requests on /v1/chat/completions
This commit is contained in:
parent
017af34866
commit
7618ec43b3
7 changed files with 74 additions and 33 deletions
|
@ -447,13 +447,24 @@ class OpenTelemetry(CustomLogger):
|
|||
# cast sr -> dict
|
||||
import json
|
||||
|
||||
_raw_response = json.loads(_raw_response)
|
||||
for param, val in _raw_response.items():
|
||||
if not isinstance(val, str):
|
||||
val = str(val)
|
||||
try:
|
||||
_raw_response = json.loads(_raw_response)
|
||||
for param, val in _raw_response.items():
|
||||
if not isinstance(val, str):
|
||||
val = str(val)
|
||||
span.set_attribute(
|
||||
f"llm.{custom_llm_provider}.{param}",
|
||||
val,
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
verbose_logger.debug(
|
||||
"litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format(
|
||||
_raw_response
|
||||
)
|
||||
)
|
||||
span.set_attribute(
|
||||
f"llm.{custom_llm_provider}.{param}",
|
||||
val,
|
||||
f"llm.{custom_llm_provider}.stringified_raw_response",
|
||||
_raw_response,
|
||||
)
|
||||
|
||||
pass
|
||||
|
|
|
@ -1394,7 +1394,7 @@ class BedrockConverseLLM(BaseLLM):
|
|||
content_str = ""
|
||||
tools: List[ChatCompletionToolCallChunk] = []
|
||||
if message is not None:
|
||||
for content in message["content"]:
|
||||
for idx, content in enumerate(message["content"]):
|
||||
"""
|
||||
- Content is either a tool response or text
|
||||
"""
|
||||
|
@ -1409,6 +1409,7 @@ class BedrockConverseLLM(BaseLLM):
|
|||
id=content["toolUse"]["toolUseId"],
|
||||
type="function",
|
||||
function=_function_chunk,
|
||||
index=idx,
|
||||
)
|
||||
tools.append(_tool_response_chunk)
|
||||
chat_completion_message["content"] = content_str
|
||||
|
@ -2001,6 +2002,7 @@ class AWSEventStreamDecoder:
|
|||
"name": start_obj["toolUse"]["name"],
|
||||
"arguments": "",
|
||||
},
|
||||
"index": index,
|
||||
}
|
||||
elif "delta" in chunk_data:
|
||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||
|
@ -2014,6 +2016,7 @@ class AWSEventStreamDecoder:
|
|||
"name": None,
|
||||
"arguments": delta_obj["toolUse"]["input"],
|
||||
},
|
||||
"index": index,
|
||||
}
|
||||
elif "stopReason" in chunk_data:
|
||||
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
||||
|
|
|
@ -687,6 +687,7 @@ class VertexLLM(BaseLLM):
|
|||
id=f"call_{str(uuid.uuid4())}",
|
||||
type="function",
|
||||
function=_function_chunk,
|
||||
index=candidate.get("index", idx),
|
||||
)
|
||||
tools.append(_tool_response_chunk)
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
model_list:
|
||||
- model_name: claude-3-5-sonnet # all requests where model not in your config go to this deployment
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: "openai/*"
|
||||
mock_response: "Hello world!"
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["otel"]
|
||||
cache: True
|
||||
success_callback: ["langfuse"]
|
||||
failure_callback: ["langfuse"]
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
|
|
|
@ -2753,12 +2753,8 @@ async def chat_completion(
|
|||
if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
|
||||
data["model"] = litellm.model_alias_map[data["model"]]
|
||||
|
||||
### CALL HOOKS ### - modify/reject incoming data before calling the model
|
||||
data = await proxy_logging_obj.pre_call_hook( # type: ignore
|
||||
user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
|
||||
)
|
||||
|
||||
## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
|
||||
## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
|
||||
data["litellm_call_id"] = str(uuid.uuid4())
|
||||
logging_obj, data = litellm.utils.function_setup(
|
||||
original_function="acompletion",
|
||||
|
@ -2769,6 +2765,11 @@ async def chat_completion(
|
|||
|
||||
data["litellm_logging_obj"] = logging_obj
|
||||
|
||||
### CALL HOOKS ### - modify/reject incoming data before calling the model
|
||||
data = await proxy_logging_obj.pre_call_hook( # type: ignore
|
||||
user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
|
||||
)
|
||||
|
||||
tasks = []
|
||||
tasks.append(
|
||||
proxy_logging_obj.during_call_hook(
|
||||
|
|
|
@ -31,6 +31,7 @@ from litellm.caching import DualCache, RedisCache
|
|||
from litellm.exceptions import RejectedRequestError
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.integrations.slack_alerting import SlackAlerting
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||
from litellm.proxy._types import (
|
||||
AlertType,
|
||||
|
@ -595,6 +596,23 @@ class ProxyLogging:
|
|||
)
|
||||
)
|
||||
|
||||
### LOGGING ###
|
||||
litellm_logging_obj: Optional[Logging] = request_data.get(
|
||||
"litellm_logging_obj", None
|
||||
)
|
||||
|
||||
if (
|
||||
isinstance(original_exception, HTTPException)
|
||||
and litellm_logging_obj is not None
|
||||
):
|
||||
# log the custom exception
|
||||
await litellm_logging_obj.async_failure_handler(
|
||||
exception=original_exception,
|
||||
traceback_exception=traceback.format_exc(),
|
||||
start_time=time.time(),
|
||||
end_time=time.time(),
|
||||
)
|
||||
|
||||
for callback in litellm.callbacks:
|
||||
try:
|
||||
_callback: Optional[CustomLogger] = None
|
||||
|
@ -611,6 +629,7 @@ class ProxyLogging:
|
|||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
return
|
||||
|
||||
async def post_call_success_hook(
|
||||
|
|
|
@ -1,32 +1,37 @@
|
|||
from os import PathLike
|
||||
from typing import (
|
||||
Optional,
|
||||
Union,
|
||||
IO,
|
||||
Any,
|
||||
BinaryIO,
|
||||
Literal,
|
||||
Iterable,
|
||||
List,
|
||||
Literal,
|
||||
Mapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypedDict,
|
||||
Union,
|
||||
)
|
||||
from typing_extensions import override, Required, Dict
|
||||
from pydantic import BaseModel
|
||||
from openai.types.beta.threads.message_content import MessageContent
|
||||
from openai.types.beta.threads.message import Message as OpenAIMessage
|
||||
from openai.types.beta.thread_create_params import (
|
||||
Message as OpenAICreateThreadParamsMessage,
|
||||
)
|
||||
|
||||
from openai._legacy_response import HttpxBinaryResponseContent
|
||||
from openai.lib.streaming._assistants import (
|
||||
AssistantEventHandler,
|
||||
AssistantStreamManager,
|
||||
AsyncAssistantStreamManager,
|
||||
AsyncAssistantEventHandler,
|
||||
AsyncAssistantStreamManager,
|
||||
)
|
||||
from openai.types.beta.assistant_tool_param import AssistantToolParam
|
||||
from openai.types.beta.threads.run import Run
|
||||
from openai.pagination import AsyncCursorPage, SyncCursorPage
|
||||
from openai.types import Batch, FileObject
|
||||
from openai.types.beta.assistant import Assistant
|
||||
from openai.pagination import SyncCursorPage, AsyncCursorPage
|
||||
from os import PathLike
|
||||
from openai.types import FileObject, Batch
|
||||
from openai._legacy_response import HttpxBinaryResponseContent
|
||||
from typing import TypedDict, List, Optional, Tuple, Mapping, IO
|
||||
from openai.types.beta.assistant_tool_param import AssistantToolParam
|
||||
from openai.types.beta.thread_create_params import (
|
||||
Message as OpenAICreateThreadParamsMessage,
|
||||
)
|
||||
from openai.types.beta.threads.message import Message as OpenAIMessage
|
||||
from openai.types.beta.threads.message_content import MessageContent
|
||||
from openai.types.beta.threads.run import Run
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import Dict, Required, override
|
||||
|
||||
FileContent = Union[IO[bytes], bytes, PathLike]
|
||||
|
||||
|
@ -304,6 +309,7 @@ class ChatCompletionToolCallChunk(TypedDict):
|
|||
id: Optional[str]
|
||||
type: Literal["function"]
|
||||
function: ChatCompletionToolCallFunctionChunk
|
||||
index: int
|
||||
|
||||
|
||||
class ChatCompletionDeltaToolCallChunk(TypedDict):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue