forked from phoenix/litellm-mirror
fix(proxy_server.py): support langfuse logging for rejected requests on /v1/chat/completions
This commit is contained in:
parent
017af34866
commit
7618ec43b3
7 changed files with 74 additions and 33 deletions
|
@ -447,6 +447,7 @@ class OpenTelemetry(CustomLogger):
|
||||||
# cast sr -> dict
|
# cast sr -> dict
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
try:
|
||||||
_raw_response = json.loads(_raw_response)
|
_raw_response = json.loads(_raw_response)
|
||||||
for param, val in _raw_response.items():
|
for param, val in _raw_response.items():
|
||||||
if not isinstance(val, str):
|
if not isinstance(val, str):
|
||||||
|
@ -455,6 +456,16 @@ class OpenTelemetry(CustomLogger):
|
||||||
f"llm.{custom_llm_provider}.{param}",
|
f"llm.{custom_llm_provider}.{param}",
|
||||||
val,
|
val,
|
||||||
)
|
)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format(
|
||||||
|
_raw_response
|
||||||
|
)
|
||||||
|
)
|
||||||
|
span.set_attribute(
|
||||||
|
f"llm.{custom_llm_provider}.stringified_raw_response",
|
||||||
|
_raw_response,
|
||||||
|
)
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -1394,7 +1394,7 @@ class BedrockConverseLLM(BaseLLM):
|
||||||
content_str = ""
|
content_str = ""
|
||||||
tools: List[ChatCompletionToolCallChunk] = []
|
tools: List[ChatCompletionToolCallChunk] = []
|
||||||
if message is not None:
|
if message is not None:
|
||||||
for content in message["content"]:
|
for idx, content in enumerate(message["content"]):
|
||||||
"""
|
"""
|
||||||
- Content is either a tool response or text
|
- Content is either a tool response or text
|
||||||
"""
|
"""
|
||||||
|
@ -1409,6 +1409,7 @@ class BedrockConverseLLM(BaseLLM):
|
||||||
id=content["toolUse"]["toolUseId"],
|
id=content["toolUse"]["toolUseId"],
|
||||||
type="function",
|
type="function",
|
||||||
function=_function_chunk,
|
function=_function_chunk,
|
||||||
|
index=idx,
|
||||||
)
|
)
|
||||||
tools.append(_tool_response_chunk)
|
tools.append(_tool_response_chunk)
|
||||||
chat_completion_message["content"] = content_str
|
chat_completion_message["content"] = content_str
|
||||||
|
@ -2001,6 +2002,7 @@ class AWSEventStreamDecoder:
|
||||||
"name": start_obj["toolUse"]["name"],
|
"name": start_obj["toolUse"]["name"],
|
||||||
"arguments": "",
|
"arguments": "",
|
||||||
},
|
},
|
||||||
|
"index": index,
|
||||||
}
|
}
|
||||||
elif "delta" in chunk_data:
|
elif "delta" in chunk_data:
|
||||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||||
|
@ -2014,6 +2016,7 @@ class AWSEventStreamDecoder:
|
||||||
"name": None,
|
"name": None,
|
||||||
"arguments": delta_obj["toolUse"]["input"],
|
"arguments": delta_obj["toolUse"]["input"],
|
||||||
},
|
},
|
||||||
|
"index": index,
|
||||||
}
|
}
|
||||||
elif "stopReason" in chunk_data:
|
elif "stopReason" in chunk_data:
|
||||||
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
||||||
|
|
|
@ -687,6 +687,7 @@ class VertexLLM(BaseLLM):
|
||||||
id=f"call_{str(uuid.uuid4())}",
|
id=f"call_{str(uuid.uuid4())}",
|
||||||
type="function",
|
type="function",
|
||||||
function=_function_chunk,
|
function=_function_chunk,
|
||||||
|
index=candidate.get("index", idx),
|
||||||
)
|
)
|
||||||
tools.append(_tool_response_chunk)
|
tools.append(_tool_response_chunk)
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: claude-3-5-sonnet # all requests where model not in your config go to this deployment
|
- model_name: "*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "openai/*"
|
model: "openai/*"
|
||||||
mock_response: "Hello world!"
|
mock_response: "Hello world!"
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
callbacks: ["otel"]
|
success_callback: ["langfuse"]
|
||||||
cache: True
|
failure_callback: ["langfuse"]
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
alerting: ["slack"]
|
alerting: ["slack"]
|
||||||
|
|
|
@ -2753,12 +2753,8 @@ async def chat_completion(
|
||||||
if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
|
if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
|
||||||
data["model"] = litellm.model_alias_map[data["model"]]
|
data["model"] = litellm.model_alias_map[data["model"]]
|
||||||
|
|
||||||
### CALL HOOKS ### - modify/reject incoming data before calling the model
|
|
||||||
data = await proxy_logging_obj.pre_call_hook( # type: ignore
|
|
||||||
user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
|
|
||||||
)
|
|
||||||
|
|
||||||
## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
|
## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
|
||||||
|
## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
|
||||||
data["litellm_call_id"] = str(uuid.uuid4())
|
data["litellm_call_id"] = str(uuid.uuid4())
|
||||||
logging_obj, data = litellm.utils.function_setup(
|
logging_obj, data = litellm.utils.function_setup(
|
||||||
original_function="acompletion",
|
original_function="acompletion",
|
||||||
|
@ -2769,6 +2765,11 @@ async def chat_completion(
|
||||||
|
|
||||||
data["litellm_logging_obj"] = logging_obj
|
data["litellm_logging_obj"] = logging_obj
|
||||||
|
|
||||||
|
### CALL HOOKS ### - modify/reject incoming data before calling the model
|
||||||
|
data = await proxy_logging_obj.pre_call_hook( # type: ignore
|
||||||
|
user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
|
||||||
|
)
|
||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
tasks.append(
|
tasks.append(
|
||||||
proxy_logging_obj.during_call_hook(
|
proxy_logging_obj.during_call_hook(
|
||||||
|
|
|
@ -31,6 +31,7 @@ from litellm.caching import DualCache, RedisCache
|
||||||
from litellm.exceptions import RejectedRequestError
|
from litellm.exceptions import RejectedRequestError
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.integrations.slack_alerting import SlackAlerting
|
from litellm.integrations.slack_alerting import SlackAlerting
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
AlertType,
|
AlertType,
|
||||||
|
@ -595,6 +596,23 @@ class ProxyLogging:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
### LOGGING ###
|
||||||
|
litellm_logging_obj: Optional[Logging] = request_data.get(
|
||||||
|
"litellm_logging_obj", None
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
isinstance(original_exception, HTTPException)
|
||||||
|
and litellm_logging_obj is not None
|
||||||
|
):
|
||||||
|
# log the custom exception
|
||||||
|
await litellm_logging_obj.async_failure_handler(
|
||||||
|
exception=original_exception,
|
||||||
|
traceback_exception=traceback.format_exc(),
|
||||||
|
start_time=time.time(),
|
||||||
|
end_time=time.time(),
|
||||||
|
)
|
||||||
|
|
||||||
for callback in litellm.callbacks:
|
for callback in litellm.callbacks:
|
||||||
try:
|
try:
|
||||||
_callback: Optional[CustomLogger] = None
|
_callback: Optional[CustomLogger] = None
|
||||||
|
@ -611,6 +629,7 @@ class ProxyLogging:
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
async def post_call_success_hook(
|
async def post_call_success_hook(
|
||||||
|
|
|
@ -1,32 +1,37 @@
|
||||||
|
from os import PathLike
|
||||||
from typing import (
|
from typing import (
|
||||||
Optional,
|
IO,
|
||||||
Union,
|
|
||||||
Any,
|
Any,
|
||||||
BinaryIO,
|
BinaryIO,
|
||||||
Literal,
|
|
||||||
Iterable,
|
Iterable,
|
||||||
|
List,
|
||||||
|
Literal,
|
||||||
|
Mapping,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
TypedDict,
|
||||||
|
Union,
|
||||||
)
|
)
|
||||||
from typing_extensions import override, Required, Dict
|
|
||||||
from pydantic import BaseModel
|
from openai._legacy_response import HttpxBinaryResponseContent
|
||||||
from openai.types.beta.threads.message_content import MessageContent
|
|
||||||
from openai.types.beta.threads.message import Message as OpenAIMessage
|
|
||||||
from openai.types.beta.thread_create_params import (
|
|
||||||
Message as OpenAICreateThreadParamsMessage,
|
|
||||||
)
|
|
||||||
from openai.lib.streaming._assistants import (
|
from openai.lib.streaming._assistants import (
|
||||||
AssistantEventHandler,
|
AssistantEventHandler,
|
||||||
AssistantStreamManager,
|
AssistantStreamManager,
|
||||||
AsyncAssistantStreamManager,
|
|
||||||
AsyncAssistantEventHandler,
|
AsyncAssistantEventHandler,
|
||||||
|
AsyncAssistantStreamManager,
|
||||||
)
|
)
|
||||||
from openai.types.beta.assistant_tool_param import AssistantToolParam
|
from openai.pagination import AsyncCursorPage, SyncCursorPage
|
||||||
from openai.types.beta.threads.run import Run
|
from openai.types import Batch, FileObject
|
||||||
from openai.types.beta.assistant import Assistant
|
from openai.types.beta.assistant import Assistant
|
||||||
from openai.pagination import SyncCursorPage, AsyncCursorPage
|
from openai.types.beta.assistant_tool_param import AssistantToolParam
|
||||||
from os import PathLike
|
from openai.types.beta.thread_create_params import (
|
||||||
from openai.types import FileObject, Batch
|
Message as OpenAICreateThreadParamsMessage,
|
||||||
from openai._legacy_response import HttpxBinaryResponseContent
|
)
|
||||||
from typing import TypedDict, List, Optional, Tuple, Mapping, IO
|
from openai.types.beta.threads.message import Message as OpenAIMessage
|
||||||
|
from openai.types.beta.threads.message_content import MessageContent
|
||||||
|
from openai.types.beta.threads.run import Run
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from typing_extensions import Dict, Required, override
|
||||||
|
|
||||||
FileContent = Union[IO[bytes], bytes, PathLike]
|
FileContent = Union[IO[bytes], bytes, PathLike]
|
||||||
|
|
||||||
|
@ -304,6 +309,7 @@ class ChatCompletionToolCallChunk(TypedDict):
|
||||||
id: Optional[str]
|
id: Optional[str]
|
||||||
type: Literal["function"]
|
type: Literal["function"]
|
||||||
function: ChatCompletionToolCallFunctionChunk
|
function: ChatCompletionToolCallFunctionChunk
|
||||||
|
index: int
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionDeltaToolCallChunk(TypedDict):
|
class ChatCompletionDeltaToolCallChunk(TypedDict):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue