forked from phoenix/litellm-mirror
Merge pull request #4827 from BerriAI/litellm_use_anthropic_sdk_proxy
[Fix-Proxy] Allow non admin keys to access /v1/messages Anthropic Routes
This commit is contained in:
commit
55f0728a7e
14 changed files with 642 additions and 31 deletions
72
docs/my-website/docs/observability/arize_integration.md
Normal file
72
docs/my-website/docs/observability/arize_integration.md
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
# 🔥 Arize AI - Logging LLM Input/Output
|
||||||
|
|
||||||
|
AI Observability and Evaluation Platform
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
This is community maintained, Please make an issue if you run into a bug
|
||||||
|
https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Pre-Requisites
|
||||||
|
Make an account on [Arize AI](https://app.arize.com/auth/login)
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
Use just 2 lines of code, to instantly log your responses **across all providers** with arize
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm.callbacks = ["arize"]
|
||||||
|
```
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["ARIZE_SPACE_KEY"] = ""
|
||||||
|
os.environ["ARIZE_API_KEY"] = "" # defaults to litellm-completion
|
||||||
|
|
||||||
|
# LLM API Keys
|
||||||
|
os.environ['OPENAI_API_KEY']=""
|
||||||
|
|
||||||
|
# set arize as a callback, litellm will send the data to arize
|
||||||
|
litellm.callbacks = ["arize"]
|
||||||
|
|
||||||
|
# openai call
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using with LiteLLM Proxy
|
||||||
|
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["arize"]
|
||||||
|
|
||||||
|
environment_variables:
|
||||||
|
ARIZE_SPACE_KEY: "d0*****"
|
||||||
|
ARIZE_API_KEY: "141a****"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support & Talk to Founders
|
||||||
|
|
||||||
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
|
@ -38,7 +38,13 @@ success_callback: List[Union[str, Callable]] = []
|
||||||
failure_callback: List[Union[str, Callable]] = []
|
failure_callback: List[Union[str, Callable]] = []
|
||||||
service_callback: List[Union[str, Callable]] = []
|
service_callback: List[Union[str, Callable]] = []
|
||||||
_custom_logger_compatible_callbacks_literal = Literal[
|
_custom_logger_compatible_callbacks_literal = Literal[
|
||||||
"lago", "openmeter", "logfire", "dynamic_rate_limiter", "langsmith", "galileo"
|
"lago",
|
||||||
|
"openmeter",
|
||||||
|
"logfire",
|
||||||
|
"dynamic_rate_limiter",
|
||||||
|
"langsmith",
|
||||||
|
"galileo",
|
||||||
|
"arize",
|
||||||
]
|
]
|
||||||
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
||||||
_langfuse_default_tags: Optional[
|
_langfuse_default_tags: Optional[
|
||||||
|
|
286
litellm/integrations/_types/open_inference.py
Normal file
286
litellm/integrations/_types/open_inference.py
Normal file
|
@ -0,0 +1,286 @@
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class SpanAttributes:
|
||||||
|
OUTPUT_VALUE = "output.value"
|
||||||
|
OUTPUT_MIME_TYPE = "output.mime_type"
|
||||||
|
"""
|
||||||
|
The type of output.value. If unspecified, the type is plain text by default.
|
||||||
|
If type is JSON, the value is a string representing a JSON object.
|
||||||
|
"""
|
||||||
|
INPUT_VALUE = "input.value"
|
||||||
|
INPUT_MIME_TYPE = "input.mime_type"
|
||||||
|
"""
|
||||||
|
The type of input.value. If unspecified, the type is plain text by default.
|
||||||
|
If type is JSON, the value is a string representing a JSON object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
EMBEDDING_EMBEDDINGS = "embedding.embeddings"
|
||||||
|
"""
|
||||||
|
A list of objects containing embedding data, including the vector and represented piece of text.
|
||||||
|
"""
|
||||||
|
EMBEDDING_MODEL_NAME = "embedding.model_name"
|
||||||
|
"""
|
||||||
|
The name of the embedding model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
LLM_FUNCTION_CALL = "llm.function_call"
|
||||||
|
"""
|
||||||
|
For models and APIs that support function calling. Records attributes such as the function
|
||||||
|
name and arguments to the called function.
|
||||||
|
"""
|
||||||
|
LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters"
|
||||||
|
"""
|
||||||
|
Invocation parameters passed to the LLM or API, such as the model name, temperature, etc.
|
||||||
|
"""
|
||||||
|
LLM_INPUT_MESSAGES = "llm.input_messages"
|
||||||
|
"""
|
||||||
|
Messages provided to a chat API.
|
||||||
|
"""
|
||||||
|
LLM_OUTPUT_MESSAGES = "llm.output_messages"
|
||||||
|
"""
|
||||||
|
Messages received from a chat API.
|
||||||
|
"""
|
||||||
|
LLM_MODEL_NAME = "llm.model_name"
|
||||||
|
"""
|
||||||
|
The name of the model being used.
|
||||||
|
"""
|
||||||
|
LLM_PROMPTS = "llm.prompts"
|
||||||
|
"""
|
||||||
|
Prompts provided to a completions API.
|
||||||
|
"""
|
||||||
|
LLM_PROMPT_TEMPLATE = "llm.prompt_template.template"
|
||||||
|
"""
|
||||||
|
The prompt template as a Python f-string.
|
||||||
|
"""
|
||||||
|
LLM_PROMPT_TEMPLATE_VARIABLES = "llm.prompt_template.variables"
|
||||||
|
"""
|
||||||
|
A list of input variables to the prompt template.
|
||||||
|
"""
|
||||||
|
LLM_PROMPT_TEMPLATE_VERSION = "llm.prompt_template.version"
|
||||||
|
"""
|
||||||
|
The version of the prompt template being used.
|
||||||
|
"""
|
||||||
|
LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
|
||||||
|
"""
|
||||||
|
Number of tokens in the prompt.
|
||||||
|
"""
|
||||||
|
LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
|
||||||
|
"""
|
||||||
|
Number of tokens in the completion.
|
||||||
|
"""
|
||||||
|
LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
|
||||||
|
"""
|
||||||
|
Total number of tokens, including both prompt and completion.
|
||||||
|
"""
|
||||||
|
|
||||||
|
TOOL_NAME = "tool.name"
|
||||||
|
"""
|
||||||
|
Name of the tool being used.
|
||||||
|
"""
|
||||||
|
TOOL_DESCRIPTION = "tool.description"
|
||||||
|
"""
|
||||||
|
Description of the tool's purpose, typically used to select the tool.
|
||||||
|
"""
|
||||||
|
TOOL_PARAMETERS = "tool.parameters"
|
||||||
|
"""
|
||||||
|
Parameters of the tool represented a dictionary JSON string, e.g.
|
||||||
|
see https://platform.openai.com/docs/guides/gpt/function-calling
|
||||||
|
"""
|
||||||
|
|
||||||
|
RETRIEVAL_DOCUMENTS = "retrieval.documents"
|
||||||
|
|
||||||
|
METADATA = "metadata"
|
||||||
|
"""
|
||||||
|
Metadata attributes are used to store user-defined key-value pairs.
|
||||||
|
For example, LangChain uses metadata to store user-defined attributes for a chain.
|
||||||
|
"""
|
||||||
|
|
||||||
|
TAG_TAGS = "tag.tags"
|
||||||
|
"""
|
||||||
|
Custom categorical tags for the span.
|
||||||
|
"""
|
||||||
|
|
||||||
|
OPENINFERENCE_SPAN_KIND = "openinference.span.kind"
|
||||||
|
|
||||||
|
SESSION_ID = "session.id"
|
||||||
|
"""
|
||||||
|
The id of the session
|
||||||
|
"""
|
||||||
|
USER_ID = "user.id"
|
||||||
|
"""
|
||||||
|
The id of the user
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class MessageAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for a message sent to or from an LLM
|
||||||
|
"""
|
||||||
|
|
||||||
|
MESSAGE_ROLE = "message.role"
|
||||||
|
"""
|
||||||
|
The role of the message, such as "user", "agent", "function".
|
||||||
|
"""
|
||||||
|
MESSAGE_CONTENT = "message.content"
|
||||||
|
"""
|
||||||
|
The content of the message to or from the llm, must be a string.
|
||||||
|
"""
|
||||||
|
MESSAGE_CONTENTS = "message.contents"
|
||||||
|
"""
|
||||||
|
The message contents to the llm, it is an array of
|
||||||
|
`message_content` prefixed attributes.
|
||||||
|
"""
|
||||||
|
MESSAGE_NAME = "message.name"
|
||||||
|
"""
|
||||||
|
The name of the message, often used to identify the function
|
||||||
|
that was used to generate the message.
|
||||||
|
"""
|
||||||
|
MESSAGE_TOOL_CALLS = "message.tool_calls"
|
||||||
|
"""
|
||||||
|
The tool calls generated by the model, such as function calls.
|
||||||
|
"""
|
||||||
|
MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name"
|
||||||
|
"""
|
||||||
|
The function name that is a part of the message list.
|
||||||
|
This is populated for role 'function' or 'agent' as a mechanism to identify
|
||||||
|
the function that was called during the execution of a tool.
|
||||||
|
"""
|
||||||
|
MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json"
|
||||||
|
"""
|
||||||
|
The JSON string representing the arguments passed to the function
|
||||||
|
during a function call.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class MessageContentAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for the contents of user messages sent to an LLM.
|
||||||
|
"""
|
||||||
|
|
||||||
|
MESSAGE_CONTENT_TYPE = "message_content.type"
|
||||||
|
"""
|
||||||
|
The type of the content, such as "text" or "image".
|
||||||
|
"""
|
||||||
|
MESSAGE_CONTENT_TEXT = "message_content.text"
|
||||||
|
"""
|
||||||
|
The text content of the message, if the type is "text".
|
||||||
|
"""
|
||||||
|
MESSAGE_CONTENT_IMAGE = "message_content.image"
|
||||||
|
"""
|
||||||
|
The image content of the message, if the type is "image".
|
||||||
|
An image can be made available to the model by passing a link to
|
||||||
|
the image or by passing the base64 encoded image directly in the
|
||||||
|
request.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ImageAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for images
|
||||||
|
"""
|
||||||
|
|
||||||
|
IMAGE_URL = "image.url"
|
||||||
|
"""
|
||||||
|
An http or base64 image url
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for a document.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DOCUMENT_ID = "document.id"
|
||||||
|
"""
|
||||||
|
The id of the document.
|
||||||
|
"""
|
||||||
|
DOCUMENT_SCORE = "document.score"
|
||||||
|
"""
|
||||||
|
The score of the document
|
||||||
|
"""
|
||||||
|
DOCUMENT_CONTENT = "document.content"
|
||||||
|
"""
|
||||||
|
The content of the document.
|
||||||
|
"""
|
||||||
|
DOCUMENT_METADATA = "document.metadata"
|
||||||
|
"""
|
||||||
|
The metadata of the document represented as a dictionary
|
||||||
|
JSON string, e.g. `"{ 'title': 'foo' }"`
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class RerankerAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for a reranker
|
||||||
|
"""
|
||||||
|
|
||||||
|
RERANKER_INPUT_DOCUMENTS = "reranker.input_documents"
|
||||||
|
"""
|
||||||
|
List of documents as input to the reranker
|
||||||
|
"""
|
||||||
|
RERANKER_OUTPUT_DOCUMENTS = "reranker.output_documents"
|
||||||
|
"""
|
||||||
|
List of documents as output from the reranker
|
||||||
|
"""
|
||||||
|
RERANKER_QUERY = "reranker.query"
|
||||||
|
"""
|
||||||
|
Query string for the reranker
|
||||||
|
"""
|
||||||
|
RERANKER_MODEL_NAME = "reranker.model_name"
|
||||||
|
"""
|
||||||
|
Model name of the reranker
|
||||||
|
"""
|
||||||
|
RERANKER_TOP_K = "reranker.top_k"
|
||||||
|
"""
|
||||||
|
Top K parameter of the reranker
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for an embedding
|
||||||
|
"""
|
||||||
|
|
||||||
|
EMBEDDING_TEXT = "embedding.text"
|
||||||
|
"""
|
||||||
|
The text represented by the embedding.
|
||||||
|
"""
|
||||||
|
EMBEDDING_VECTOR = "embedding.vector"
|
||||||
|
"""
|
||||||
|
The embedding vector.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ToolCallAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for a tool call
|
||||||
|
"""
|
||||||
|
|
||||||
|
TOOL_CALL_FUNCTION_NAME = "tool_call.function.name"
|
||||||
|
"""
|
||||||
|
The name of function that is being called during a tool call.
|
||||||
|
"""
|
||||||
|
TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments"
|
||||||
|
"""
|
||||||
|
The JSON string representing the arguments passed to the function
|
||||||
|
during a tool call.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class OpenInferenceSpanKindValues(Enum):
|
||||||
|
TOOL = "TOOL"
|
||||||
|
CHAIN = "CHAIN"
|
||||||
|
LLM = "LLM"
|
||||||
|
RETRIEVER = "RETRIEVER"
|
||||||
|
EMBEDDING = "EMBEDDING"
|
||||||
|
AGENT = "AGENT"
|
||||||
|
RERANKER = "RERANKER"
|
||||||
|
UNKNOWN = "UNKNOWN"
|
||||||
|
GUARDRAIL = "GUARDRAIL"
|
||||||
|
EVALUATOR = "EVALUATOR"
|
||||||
|
|
||||||
|
|
||||||
|
class OpenInferenceMimeTypeValues(Enum):
|
||||||
|
TEXT = "text/plain"
|
||||||
|
JSON = "application/json"
|
114
litellm/integrations/arize_ai.py
Normal file
114
litellm/integrations/arize_ai.py
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
"""
|
||||||
|
arize AI is OTEL compatible
|
||||||
|
|
||||||
|
this file has Arize ai specific helper functions
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
|
Span = _Span
|
||||||
|
else:
|
||||||
|
Span = Any
|
||||||
|
|
||||||
|
|
||||||
|
def set_arize_ai_attributes(span: Span, kwargs, response_obj):
|
||||||
|
from litellm.integrations._types.open_inference import (
|
||||||
|
MessageAttributes,
|
||||||
|
MessageContentAttributes,
|
||||||
|
OpenInferenceSpanKindValues,
|
||||||
|
SpanAttributes,
|
||||||
|
)
|
||||||
|
|
||||||
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
|
|
||||||
|
#############################################
|
||||||
|
############ LLM CALL METADATA ##############
|
||||||
|
#############################################
|
||||||
|
# commented out for now - looks like Arize AI could not log this
|
||||||
|
# metadata = litellm_params.get("metadata", {}) or {}
|
||||||
|
# span.set_attribute(SpanAttributes.METADATA, str(metadata))
|
||||||
|
|
||||||
|
#############################################
|
||||||
|
########## LLM Request Attributes ###########
|
||||||
|
#############################################
|
||||||
|
|
||||||
|
# The name of the LLM a request is being made to
|
||||||
|
if kwargs.get("model"):
|
||||||
|
span.set_attribute(SpanAttributes.LLM_MODEL_NAME, kwargs.get("model"))
|
||||||
|
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.OPENINFERENCE_SPAN_KIND, OpenInferenceSpanKindValues.LLM.value
|
||||||
|
)
|
||||||
|
messages = kwargs.get("messages")
|
||||||
|
|
||||||
|
# for /chat/completions
|
||||||
|
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||||
|
if messages:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.INPUT_VALUE,
|
||||||
|
messages[-1].get("content", ""), # get the last message for input
|
||||||
|
)
|
||||||
|
|
||||||
|
# LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page
|
||||||
|
for idx, msg in enumerate(messages):
|
||||||
|
# Set the role per message
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_ROLE}",
|
||||||
|
msg["role"],
|
||||||
|
)
|
||||||
|
# Set the content per message
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_CONTENT}",
|
||||||
|
msg.get("content", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
# The Generative AI Provider: Azure, OpenAI, etc.
|
||||||
|
span.set_attribute(SpanAttributes.LLM_INVOCATION_PARAMETERS, str(optional_params))
|
||||||
|
|
||||||
|
if optional_params.get("user"):
|
||||||
|
span.set_attribute(SpanAttributes.USER_ID, optional_params.get("user"))
|
||||||
|
|
||||||
|
#############################################
|
||||||
|
########## LLM Response Attributes ##########
|
||||||
|
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||||
|
#############################################
|
||||||
|
for choice in response_obj.get("choices"):
|
||||||
|
response_message = choice.get("message", {})
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.OUTPUT_VALUE, response_message.get("content", "")
|
||||||
|
)
|
||||||
|
|
||||||
|
# This shows up under `output_messages` tab on the span page
|
||||||
|
# This code assumes a single response
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_ROLE}",
|
||||||
|
response_message["role"],
|
||||||
|
)
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_CONTENT}",
|
||||||
|
response_message.get("content", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
usage = response_obj.get("usage")
|
||||||
|
if usage:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
|
||||||
|
usage.get("total_tokens"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# The number of tokens used in the LLM response (completion).
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
|
||||||
|
usage.get("completion_tokens"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# The number of tokens used in the LLM prompt.
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
|
||||||
|
usage.get("prompt_tokens"),
|
||||||
|
)
|
||||||
|
pass
|
|
@ -2,7 +2,7 @@ import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
@ -27,9 +27,10 @@ else:
|
||||||
|
|
||||||
|
|
||||||
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
||||||
LITELLM_RESOURCE = {
|
LITELLM_RESOURCE: Dict[Any, Any] = {
|
||||||
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
||||||
"deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
|
"deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
|
||||||
|
"model_id": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
||||||
}
|
}
|
||||||
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
|
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
|
||||||
LITELLM_REQUEST_SPAN_NAME = "litellm_request"
|
LITELLM_REQUEST_SPAN_NAME = "litellm_request"
|
||||||
|
@ -68,7 +69,9 @@ class OpenTelemetryConfig:
|
||||||
|
|
||||||
|
|
||||||
class OpenTelemetry(CustomLogger):
|
class OpenTelemetry(CustomLogger):
|
||||||
def __init__(self, config=OpenTelemetryConfig.from_env()):
|
def __init__(
|
||||||
|
self, config=OpenTelemetryConfig.from_env(), callback_name: Optional[str] = None
|
||||||
|
):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.sdk.resources import Resource
|
from opentelemetry.sdk.resources import Resource
|
||||||
from opentelemetry.sdk.trace import TracerProvider
|
from opentelemetry.sdk.trace import TracerProvider
|
||||||
|
@ -79,6 +82,7 @@ class OpenTelemetry(CustomLogger):
|
||||||
self.OTEL_HEADERS = self.config.headers
|
self.OTEL_HEADERS = self.config.headers
|
||||||
provider = TracerProvider(resource=Resource(attributes=LITELLM_RESOURCE))
|
provider = TracerProvider(resource=Resource(attributes=LITELLM_RESOURCE))
|
||||||
provider.add_span_processor(self._get_span_processor())
|
provider.add_span_processor(self._get_span_processor())
|
||||||
|
self.callback_name = callback_name
|
||||||
|
|
||||||
trace.set_tracer_provider(provider)
|
trace.set_tracer_provider(provider)
|
||||||
self.tracer = trace.get_tracer(LITELLM_TRACER_NAME)
|
self.tracer = trace.get_tracer(LITELLM_TRACER_NAME)
|
||||||
|
@ -120,8 +124,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
_start_time_ns = start_time
|
_start_time_ns = 0
|
||||||
_end_time_ns = end_time
|
_end_time_ns = 0
|
||||||
|
|
||||||
if isinstance(start_time, float):
|
if isinstance(start_time, float):
|
||||||
_start_time_ns = int(int(start_time) * 1e9)
|
_start_time_ns = int(int(start_time) * 1e9)
|
||||||
|
@ -159,8 +163,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
_start_time_ns = start_time
|
_start_time_ns = 0
|
||||||
_end_time_ns = end_time
|
_end_time_ns = 0
|
||||||
|
|
||||||
if isinstance(start_time, float):
|
if isinstance(start_time, float):
|
||||||
_start_time_ns = int(int(start_time) * 1e9)
|
_start_time_ns = int(int(start_time) * 1e9)
|
||||||
|
@ -294,6 +298,11 @@ class OpenTelemetry(CustomLogger):
|
||||||
return isinstance(value, (str, bool, int, float))
|
return isinstance(value, (str, bool, int, float))
|
||||||
|
|
||||||
def set_attributes(self, span: Span, kwargs, response_obj):
|
def set_attributes(self, span: Span, kwargs, response_obj):
|
||||||
|
if self.callback_name == "arize":
|
||||||
|
from litellm.integrations.arize_ai import set_arize_ai_attributes
|
||||||
|
|
||||||
|
set_arize_ai_attributes(span, kwargs, response_obj)
|
||||||
|
return
|
||||||
from litellm.proxy._types import SpanAttributes
|
from litellm.proxy._types import SpanAttributes
|
||||||
|
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
@ -612,8 +621,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
_start_time_ns = logging_payload.start_time
|
_start_time_ns = 0
|
||||||
_end_time_ns = logging_payload.end_time
|
_end_time_ns = 0
|
||||||
|
|
||||||
start_time = logging_payload.start_time
|
start_time = logging_payload.start_time
|
||||||
end_time = logging_payload.end_time
|
end_time = logging_payload.end_time
|
||||||
|
@ -658,8 +667,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
_start_time_ns = logging_payload.start_time
|
_start_time_ns = 0
|
||||||
_end_time_ns = logging_payload.end_time
|
_end_time_ns = 0
|
||||||
|
|
||||||
start_time = logging_payload.start_time
|
start_time = logging_payload.start_time
|
||||||
end_time = logging_payload.end_time
|
end_time = logging_payload.end_time
|
||||||
|
|
|
@ -1954,6 +1954,43 @@ def _init_custom_logger_compatible_class(
|
||||||
_langsmith_logger = LangsmithLogger()
|
_langsmith_logger = LangsmithLogger()
|
||||||
_in_memory_loggers.append(_langsmith_logger)
|
_in_memory_loggers.append(_langsmith_logger)
|
||||||
return _langsmith_logger # type: ignore
|
return _langsmith_logger # type: ignore
|
||||||
|
elif logging_integration == "arize":
|
||||||
|
if "ARIZE_SPACE_KEY" not in os.environ:
|
||||||
|
raise ValueError("ARIZE_SPACE_KEY not found in environment variables")
|
||||||
|
if "ARIZE_API_KEY" not in os.environ:
|
||||||
|
raise ValueError("ARIZE_API_KEY not found in environment variables")
|
||||||
|
from litellm.integrations.opentelemetry import (
|
||||||
|
OpenTelemetry,
|
||||||
|
OpenTelemetryConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
otel_config = OpenTelemetryConfig(
|
||||||
|
exporter="otlp_grpc",
|
||||||
|
endpoint="https://otlp.arize.com/v1",
|
||||||
|
)
|
||||||
|
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
||||||
|
f"space_key={os.getenv('ARIZE_SPACE_KEY')},api_key={os.getenv('ARIZE_API_KEY')}"
|
||||||
|
)
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if (
|
||||||
|
isinstance(callback, OpenTelemetry)
|
||||||
|
and callback.callback_name == "arize"
|
||||||
|
):
|
||||||
|
return callback # type: ignore
|
||||||
|
_otel_logger = OpenTelemetry(config=otel_config, callback_name="arize")
|
||||||
|
_in_memory_loggers.append(_otel_logger)
|
||||||
|
return _otel_logger # type: ignore
|
||||||
|
|
||||||
|
elif logging_integration == "otel":
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, OpenTelemetry):
|
||||||
|
return callback # type: ignore
|
||||||
|
|
||||||
|
otel_logger = OpenTelemetry()
|
||||||
|
_in_memory_loggers.append(otel_logger)
|
||||||
|
return otel_logger # type: ignore
|
||||||
|
|
||||||
elif logging_integration == "galileo":
|
elif logging_integration == "galileo":
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
|
@ -2027,6 +2064,25 @@ def get_custom_logger_compatible_class(
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if isinstance(callback, LangsmithLogger):
|
if isinstance(callback, LangsmithLogger):
|
||||||
return callback
|
return callback
|
||||||
|
elif logging_integration == "otel":
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, OpenTelemetry):
|
||||||
|
return callback
|
||||||
|
elif logging_integration == "arize":
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
|
||||||
|
if "ARIZE_SPACE_KEY" not in os.environ:
|
||||||
|
raise ValueError("ARIZE_SPACE_KEY not found in environment variables")
|
||||||
|
if "ARIZE_API_KEY" not in os.environ:
|
||||||
|
raise ValueError("ARIZE_API_KEY not found in environment variables")
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if (
|
||||||
|
isinstance(callback, OpenTelemetry)
|
||||||
|
and callback.callback_name == "arize"
|
||||||
|
):
|
||||||
|
return callback
|
||||||
elif logging_integration == "logfire":
|
elif logging_integration == "logfire":
|
||||||
if "LOGFIRE_TOKEN" not in os.environ:
|
if "LOGFIRE_TOKEN" not in os.environ:
|
||||||
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
|
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
|
||||||
|
|
|
@ -228,6 +228,10 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/utils/token_counter",
|
"/utils/token_counter",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
anthropic_routes: List = [
|
||||||
|
"/v1/messages",
|
||||||
|
]
|
||||||
|
|
||||||
info_routes: List = [
|
info_routes: List = [
|
||||||
"/key/info",
|
"/key/info",
|
||||||
"/team/info",
|
"/team/info",
|
||||||
|
|
|
@ -24,7 +24,7 @@ from litellm.proxy._types import (
|
||||||
LitellmUserRoles,
|
LitellmUserRoles,
|
||||||
UserAPIKeyAuth,
|
UserAPIKeyAuth,
|
||||||
)
|
)
|
||||||
from litellm.proxy.auth.auth_utils import is_openai_route
|
from litellm.proxy.auth.auth_utils import is_llm_api_route
|
||||||
from litellm.proxy.utils import PrismaClient, ProxyLogging, log_to_opentelemetry
|
from litellm.proxy.utils import PrismaClient, ProxyLogging, log_to_opentelemetry
|
||||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ def common_checks(
|
||||||
general_settings.get("enforce_user_param", None) is not None
|
general_settings.get("enforce_user_param", None) is not None
|
||||||
and general_settings["enforce_user_param"] == True
|
and general_settings["enforce_user_param"] == True
|
||||||
):
|
):
|
||||||
if is_openai_route(route=route) and "user" not in request_body:
|
if is_llm_api_route(route=route) and "user" not in request_body:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||||
)
|
)
|
||||||
|
@ -122,7 +122,7 @@ def common_checks(
|
||||||
+ CommonProxyErrors.not_premium_user.value
|
+ CommonProxyErrors.not_premium_user.value
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_openai_route(route=route):
|
if is_llm_api_route(route=route):
|
||||||
# loop through each enforced param
|
# loop through each enforced param
|
||||||
# example enforced_params ['user', 'metadata', 'metadata.generation_name']
|
# example enforced_params ['user', 'metadata', 'metadata.generation_name']
|
||||||
for enforced_param in general_settings["enforced_params"]:
|
for enforced_param in general_settings["enforced_params"]:
|
||||||
|
@ -150,7 +150,7 @@ def common_checks(
|
||||||
and global_proxy_spend is not None
|
and global_proxy_spend is not None
|
||||||
# only run global budget checks for OpenAI routes
|
# only run global budget checks for OpenAI routes
|
||||||
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
|
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
|
||||||
and is_openai_route(route=route)
|
and is_llm_api_route(route=route)
|
||||||
and route != "/v1/models"
|
and route != "/v1/models"
|
||||||
and route != "/models"
|
and route != "/models"
|
||||||
):
|
):
|
||||||
|
|
|
@ -46,7 +46,7 @@ def route_in_additonal_public_routes(current_route: str):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_openai_route(route: str) -> bool:
|
def is_llm_api_route(route: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Helper to checks if provided route is an OpenAI route
|
Helper to checks if provided route is an OpenAI route
|
||||||
|
|
||||||
|
@ -59,6 +59,9 @@ def is_openai_route(route: str) -> bool:
|
||||||
if route in LiteLLMRoutes.openai_routes.value:
|
if route in LiteLLMRoutes.openai_routes.value:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
if route in LiteLLMRoutes.anthropic_routes.value:
|
||||||
|
return True
|
||||||
|
|
||||||
# fuzzy match routes like "/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ"
|
# fuzzy match routes like "/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ"
|
||||||
# Check for routes with placeholders
|
# Check for routes with placeholders
|
||||||
for openai_route in LiteLLMRoutes.openai_routes.value:
|
for openai_route in LiteLLMRoutes.openai_routes.value:
|
||||||
|
|
|
@ -57,7 +57,7 @@ from litellm.proxy.auth.auth_checks import (
|
||||||
log_to_opentelemetry,
|
log_to_opentelemetry,
|
||||||
)
|
)
|
||||||
from litellm.proxy.auth.auth_utils import (
|
from litellm.proxy.auth.auth_utils import (
|
||||||
is_openai_route,
|
is_llm_api_route,
|
||||||
route_in_additonal_public_routes,
|
route_in_additonal_public_routes,
|
||||||
)
|
)
|
||||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||||
|
@ -994,9 +994,9 @@ async def user_api_key_auth(
|
||||||
_user_role = _get_user_role(user_id_information=user_id_information)
|
_user_role = _get_user_role(user_id_information=user_id_information)
|
||||||
|
|
||||||
if not _is_user_proxy_admin(user_id_information): # if non-admin
|
if not _is_user_proxy_admin(user_id_information): # if non-admin
|
||||||
if is_openai_route(route=route):
|
if is_llm_api_route(route=route):
|
||||||
pass
|
pass
|
||||||
elif is_openai_route(route=request["route"].name):
|
elif is_llm_api_route(route=request["route"].name):
|
||||||
pass
|
pass
|
||||||
elif (
|
elif (
|
||||||
route in LiteLLMRoutes.info_routes.value
|
route in LiteLLMRoutes.info_routes.value
|
||||||
|
@ -1049,7 +1049,7 @@ async def user_api_key_auth(
|
||||||
|
|
||||||
pass
|
pass
|
||||||
elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
|
elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
|
||||||
if is_openai_route(route=route):
|
if is_llm_api_route(route=route):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
detail=f"user not allowed to access this OpenAI routes, role= {_user_role}",
|
detail=f"user not allowed to access this OpenAI routes, role= {_user_role}",
|
||||||
|
|
|
@ -1,10 +1,15 @@
|
||||||
model_list:
|
model_list:
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
- model_name: fireworks-llama-v3-70b-instruct
|
- model_name: fireworks-llama-v3-70b-instruct
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
||||||
api_key: "os.environ/FIREWORKS_AI_API_KEY"
|
api_key: "os.environ/FIREWORKS"
|
||||||
|
|
||||||
router_settings:
|
|
||||||
enable_tag_filtering: True # 👈 Key Change
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["arize"]
|
22
litellm/proxy/tests/test_anthropic_sdk.py
Normal file
22
litellm/proxy/tests/test_anthropic_sdk.py
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from anthropic import Anthropic
|
||||||
|
|
||||||
|
client = Anthropic(
|
||||||
|
# This is the default and can be omitted
|
||||||
|
base_url="http://localhost:4000",
|
||||||
|
# this is a litellm proxy key :) - not a real anthropic key
|
||||||
|
api_key="sk-s4xN1IiLTCytwtZFJaYQrA",
|
||||||
|
)
|
||||||
|
|
||||||
|
message = client.messages.create(
|
||||||
|
max_tokens=1024,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, Claude",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model="claude-3-opus-20240229",
|
||||||
|
)
|
||||||
|
print(message.content)
|
29
litellm/tests/test_arize_ai.py
Normal file
29
litellm/tests/test_arize_ai.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_async_otel_callback():
|
||||||
|
litellm.set_verbose = True
|
||||||
|
litellm.callbacks = ["arize"]
|
||||||
|
|
||||||
|
await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "hi test from local arize"}],
|
||||||
|
mock_response="hello",
|
||||||
|
temperature=0.1,
|
||||||
|
user="OTEL_USER",
|
||||||
|
)
|
|
@ -19,7 +19,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.proxy._types import LiteLLMRoutes
|
from litellm.proxy._types import LiteLLMRoutes
|
||||||
from litellm.proxy.auth.auth_utils import is_openai_route
|
from litellm.proxy.auth.auth_utils import is_llm_api_route
|
||||||
from litellm.proxy.proxy_server import app
|
from litellm.proxy.proxy_server import app
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
|
@ -77,8 +77,8 @@ def test_routes_on_litellm_proxy():
|
||||||
("/v1/non_existent_endpoint", False),
|
("/v1/non_existent_endpoint", False),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_is_openai_route(route: str, expected: bool):
|
def test_is_llm_api_route(route: str, expected: bool):
|
||||||
assert is_openai_route(route) == expected
|
assert is_llm_api_route(route) == expected
|
||||||
|
|
||||||
|
|
||||||
# Test case for routes that are similar but should return False
|
# Test case for routes that are similar but should return False
|
||||||
|
@ -91,5 +91,10 @@ def test_is_openai_route(route: str, expected: bool):
|
||||||
"/engines/model/invalid/completions",
|
"/engines/model/invalid/completions",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_is_openai_route_similar_but_false(route: str):
|
def test_is_llm_api_route_similar_but_false(route: str):
|
||||||
assert is_openai_route(route) == False
|
assert is_llm_api_route(route) == False
|
||||||
|
|
||||||
|
|
||||||
|
def test_anthropic_api_routes():
|
||||||
|
# allow non proxy admins to call anthropic api routes
|
||||||
|
assert is_llm_api_route(route="/v1/messages") is True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue