diff --git a/docs/my-website/docs/observability/arize_integration.md b/docs/my-website/docs/observability/arize_integration.md new file mode 100644 index 000000000..d2592da6a --- /dev/null +++ b/docs/my-website/docs/observability/arize_integration.md @@ -0,0 +1,72 @@ +import Image from '@theme/IdealImage'; + +# 🔥 Arize AI - Logging LLM Input/Output + +AI Observability and Evaluation Platform + +:::tip + +This is community maintained, Please make an issue if you run into a bug +https://github.com/BerriAI/litellm + +::: + + + +## Pre-Requisites +Make an account on [Arize AI](https://app.arize.com/auth/login) + +## Quick Start +Use just 2 lines of code, to instantly log your responses **across all providers** with arize + + +```python +litellm.callbacks = ["arize"] +``` +```python +import litellm +import os + +os.environ["ARIZE_SPACE_KEY"] = "" +os.environ["ARIZE_API_KEY"] = "" # defaults to litellm-completion + +# LLM API Keys +os.environ['OPENAI_API_KEY']="" + +# set arize as a callback, litellm will send the data to arize +litellm.callbacks = ["arize"] + +# openai call +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": "Hi 👋 - i'm openai"} + ] +) +``` + +### Using with LiteLLM Proxy + + +```yaml +model_list: + - model_name: gpt-4 + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + +litellm_settings: + callbacks: ["arize"] + +environment_variables: + ARIZE_SPACE_KEY: "d0*****" + ARIZE_API_KEY: "141a****" +``` + +## Support & Talk to Founders + +- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) +- [Community Discord 💭](https://discord.gg/wuPM9dRgDw) +- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬ +- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai diff --git a/litellm/__init__.py b/litellm/__init__.py index 7dcc934a6..4283f4586 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -38,7 +38,13 @@ success_callback: List[Union[str, Callable]] = [] failure_callback: List[Union[str, Callable]] = [] service_callback: List[Union[str, Callable]] = [] _custom_logger_compatible_callbacks_literal = Literal[ - "lago", "openmeter", "logfire", "dynamic_rate_limiter", "langsmith", "galileo" + "lago", + "openmeter", + "logfire", + "dynamic_rate_limiter", + "langsmith", + "galileo", + "arize", ] callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = [] _langfuse_default_tags: Optional[ diff --git a/litellm/integrations/_types/open_inference.py b/litellm/integrations/_types/open_inference.py new file mode 100644 index 000000000..bcfabe9b7 --- /dev/null +++ b/litellm/integrations/_types/open_inference.py @@ -0,0 +1,286 @@ +from enum import Enum + + +class SpanAttributes: + OUTPUT_VALUE = "output.value" + OUTPUT_MIME_TYPE = "output.mime_type" + """ + The type of output.value. If unspecified, the type is plain text by default. + If type is JSON, the value is a string representing a JSON object. + """ + INPUT_VALUE = "input.value" + INPUT_MIME_TYPE = "input.mime_type" + """ + The type of input.value. If unspecified, the type is plain text by default. + If type is JSON, the value is a string representing a JSON object. + """ + + EMBEDDING_EMBEDDINGS = "embedding.embeddings" + """ + A list of objects containing embedding data, including the vector and represented piece of text. + """ + EMBEDDING_MODEL_NAME = "embedding.model_name" + """ + The name of the embedding model. + """ + + LLM_FUNCTION_CALL = "llm.function_call" + """ + For models and APIs that support function calling. Records attributes such as the function + name and arguments to the called function. + """ + LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters" + """ + Invocation parameters passed to the LLM or API, such as the model name, temperature, etc. + """ + LLM_INPUT_MESSAGES = "llm.input_messages" + """ + Messages provided to a chat API. + """ + LLM_OUTPUT_MESSAGES = "llm.output_messages" + """ + Messages received from a chat API. + """ + LLM_MODEL_NAME = "llm.model_name" + """ + The name of the model being used. + """ + LLM_PROMPTS = "llm.prompts" + """ + Prompts provided to a completions API. + """ + LLM_PROMPT_TEMPLATE = "llm.prompt_template.template" + """ + The prompt template as a Python f-string. + """ + LLM_PROMPT_TEMPLATE_VARIABLES = "llm.prompt_template.variables" + """ + A list of input variables to the prompt template. + """ + LLM_PROMPT_TEMPLATE_VERSION = "llm.prompt_template.version" + """ + The version of the prompt template being used. + """ + LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt" + """ + Number of tokens in the prompt. + """ + LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion" + """ + Number of tokens in the completion. + """ + LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total" + """ + Total number of tokens, including both prompt and completion. + """ + + TOOL_NAME = "tool.name" + """ + Name of the tool being used. + """ + TOOL_DESCRIPTION = "tool.description" + """ + Description of the tool's purpose, typically used to select the tool. + """ + TOOL_PARAMETERS = "tool.parameters" + """ + Parameters of the tool represented a dictionary JSON string, e.g. + see https://platform.openai.com/docs/guides/gpt/function-calling + """ + + RETRIEVAL_DOCUMENTS = "retrieval.documents" + + METADATA = "metadata" + """ + Metadata attributes are used to store user-defined key-value pairs. + For example, LangChain uses metadata to store user-defined attributes for a chain. + """ + + TAG_TAGS = "tag.tags" + """ + Custom categorical tags for the span. + """ + + OPENINFERENCE_SPAN_KIND = "openinference.span.kind" + + SESSION_ID = "session.id" + """ + The id of the session + """ + USER_ID = "user.id" + """ + The id of the user + """ + + +class MessageAttributes: + """ + Attributes for a message sent to or from an LLM + """ + + MESSAGE_ROLE = "message.role" + """ + The role of the message, such as "user", "agent", "function". + """ + MESSAGE_CONTENT = "message.content" + """ + The content of the message to or from the llm, must be a string. + """ + MESSAGE_CONTENTS = "message.contents" + """ + The message contents to the llm, it is an array of + `message_content` prefixed attributes. + """ + MESSAGE_NAME = "message.name" + """ + The name of the message, often used to identify the function + that was used to generate the message. + """ + MESSAGE_TOOL_CALLS = "message.tool_calls" + """ + The tool calls generated by the model, such as function calls. + """ + MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name" + """ + The function name that is a part of the message list. + This is populated for role 'function' or 'agent' as a mechanism to identify + the function that was called during the execution of a tool. + """ + MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json" + """ + The JSON string representing the arguments passed to the function + during a function call. + """ + + +class MessageContentAttributes: + """ + Attributes for the contents of user messages sent to an LLM. + """ + + MESSAGE_CONTENT_TYPE = "message_content.type" + """ + The type of the content, such as "text" or "image". + """ + MESSAGE_CONTENT_TEXT = "message_content.text" + """ + The text content of the message, if the type is "text". + """ + MESSAGE_CONTENT_IMAGE = "message_content.image" + """ + The image content of the message, if the type is "image". + An image can be made available to the model by passing a link to + the image or by passing the base64 encoded image directly in the + request. + """ + + +class ImageAttributes: + """ + Attributes for images + """ + + IMAGE_URL = "image.url" + """ + An http or base64 image url + """ + + +class DocumentAttributes: + """ + Attributes for a document. + """ + + DOCUMENT_ID = "document.id" + """ + The id of the document. + """ + DOCUMENT_SCORE = "document.score" + """ + The score of the document + """ + DOCUMENT_CONTENT = "document.content" + """ + The content of the document. + """ + DOCUMENT_METADATA = "document.metadata" + """ + The metadata of the document represented as a dictionary + JSON string, e.g. `"{ 'title': 'foo' }"` + """ + + +class RerankerAttributes: + """ + Attributes for a reranker + """ + + RERANKER_INPUT_DOCUMENTS = "reranker.input_documents" + """ + List of documents as input to the reranker + """ + RERANKER_OUTPUT_DOCUMENTS = "reranker.output_documents" + """ + List of documents as output from the reranker + """ + RERANKER_QUERY = "reranker.query" + """ + Query string for the reranker + """ + RERANKER_MODEL_NAME = "reranker.model_name" + """ + Model name of the reranker + """ + RERANKER_TOP_K = "reranker.top_k" + """ + Top K parameter of the reranker + """ + + +class EmbeddingAttributes: + """ + Attributes for an embedding + """ + + EMBEDDING_TEXT = "embedding.text" + """ + The text represented by the embedding. + """ + EMBEDDING_VECTOR = "embedding.vector" + """ + The embedding vector. + """ + + +class ToolCallAttributes: + """ + Attributes for a tool call + """ + + TOOL_CALL_FUNCTION_NAME = "tool_call.function.name" + """ + The name of function that is being called during a tool call. + """ + TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments" + """ + The JSON string representing the arguments passed to the function + during a tool call. + """ + + +class OpenInferenceSpanKindValues(Enum): + TOOL = "TOOL" + CHAIN = "CHAIN" + LLM = "LLM" + RETRIEVER = "RETRIEVER" + EMBEDDING = "EMBEDDING" + AGENT = "AGENT" + RERANKER = "RERANKER" + UNKNOWN = "UNKNOWN" + GUARDRAIL = "GUARDRAIL" + EVALUATOR = "EVALUATOR" + + +class OpenInferenceMimeTypeValues(Enum): + TEXT = "text/plain" + JSON = "application/json" diff --git a/litellm/integrations/arize_ai.py b/litellm/integrations/arize_ai.py new file mode 100644 index 000000000..45c6c1604 --- /dev/null +++ b/litellm/integrations/arize_ai.py @@ -0,0 +1,114 @@ +""" +arize AI is OTEL compatible + +this file has Arize ai specific helper functions +""" + +from typing import TYPE_CHECKING, Any, Optional, Union + +if TYPE_CHECKING: + from opentelemetry.trace import Span as _Span + + Span = _Span +else: + Span = Any + + +def set_arize_ai_attributes(span: Span, kwargs, response_obj): + from litellm.integrations._types.open_inference import ( + MessageAttributes, + MessageContentAttributes, + OpenInferenceSpanKindValues, + SpanAttributes, + ) + + optional_params = kwargs.get("optional_params", {}) + litellm_params = kwargs.get("litellm_params", {}) or {} + + ############################################# + ############ LLM CALL METADATA ############## + ############################################# + # commented out for now - looks like Arize AI could not log this + # metadata = litellm_params.get("metadata", {}) or {} + # span.set_attribute(SpanAttributes.METADATA, str(metadata)) + + ############################################# + ########## LLM Request Attributes ########### + ############################################# + + # The name of the LLM a request is being made to + if kwargs.get("model"): + span.set_attribute(SpanAttributes.LLM_MODEL_NAME, kwargs.get("model")) + + span.set_attribute( + SpanAttributes.OPENINFERENCE_SPAN_KIND, OpenInferenceSpanKindValues.LLM.value + ) + messages = kwargs.get("messages") + + # for /chat/completions + # https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions + if messages: + span.set_attribute( + SpanAttributes.INPUT_VALUE, + messages[-1].get("content", ""), # get the last message for input + ) + + # LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page + for idx, msg in enumerate(messages): + # Set the role per message + span.set_attribute( + f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_ROLE}", + msg["role"], + ) + # Set the content per message + span.set_attribute( + f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_CONTENT}", + msg.get("content", ""), + ) + + # The Generative AI Provider: Azure, OpenAI, etc. + span.set_attribute(SpanAttributes.LLM_INVOCATION_PARAMETERS, str(optional_params)) + + if optional_params.get("user"): + span.set_attribute(SpanAttributes.USER_ID, optional_params.get("user")) + + ############################################# + ########## LLM Response Attributes ########## + # https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions + ############################################# + for choice in response_obj.get("choices"): + response_message = choice.get("message", {}) + span.set_attribute( + SpanAttributes.OUTPUT_VALUE, response_message.get("content", "") + ) + + # This shows up under `output_messages` tab on the span page + # This code assumes a single response + span.set_attribute( + f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_ROLE}", + response_message["role"], + ) + span.set_attribute( + f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_CONTENT}", + response_message.get("content", ""), + ) + + usage = response_obj.get("usage") + if usage: + span.set_attribute( + SpanAttributes.LLM_TOKEN_COUNT_TOTAL, + usage.get("total_tokens"), + ) + + # The number of tokens used in the LLM response (completion). + span.set_attribute( + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, + usage.get("completion_tokens"), + ) + + # The number of tokens used in the LLM prompt. + span.set_attribute( + SpanAttributes.LLM_TOKEN_COUNT_PROMPT, + usage.get("prompt_tokens"), + ) + pass diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index 215a4f09f..bc58efad3 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -2,7 +2,7 @@ import os from dataclasses import dataclass from datetime import datetime from functools import wraps -from typing import TYPE_CHECKING, Any, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Optional, Union import litellm from litellm._logging import verbose_logger @@ -27,9 +27,10 @@ else: LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm") -LITELLM_RESOURCE = { +LITELLM_RESOURCE: Dict[Any, Any] = { "service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"), "deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"), + "model_id": os.getenv("OTEL_SERVICE_NAME", "litellm"), } RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request" LITELLM_REQUEST_SPAN_NAME = "litellm_request" @@ -68,7 +69,9 @@ class OpenTelemetryConfig: class OpenTelemetry(CustomLogger): - def __init__(self, config=OpenTelemetryConfig.from_env()): + def __init__( + self, config=OpenTelemetryConfig.from_env(), callback_name: Optional[str] = None + ): from opentelemetry import trace from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import TracerProvider @@ -79,6 +82,7 @@ class OpenTelemetry(CustomLogger): self.OTEL_HEADERS = self.config.headers provider = TracerProvider(resource=Resource(attributes=LITELLM_RESOURCE)) provider.add_span_processor(self._get_span_processor()) + self.callback_name = callback_name trace.set_tracer_provider(provider) self.tracer = trace.get_tracer(LITELLM_TRACER_NAME) @@ -120,8 +124,8 @@ class OpenTelemetry(CustomLogger): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode - _start_time_ns = start_time - _end_time_ns = end_time + _start_time_ns = 0 + _end_time_ns = 0 if isinstance(start_time, float): _start_time_ns = int(int(start_time) * 1e9) @@ -159,8 +163,8 @@ class OpenTelemetry(CustomLogger): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode - _start_time_ns = start_time - _end_time_ns = end_time + _start_time_ns = 0 + _end_time_ns = 0 if isinstance(start_time, float): _start_time_ns = int(int(start_time) * 1e9) @@ -294,6 +298,11 @@ class OpenTelemetry(CustomLogger): return isinstance(value, (str, bool, int, float)) def set_attributes(self, span: Span, kwargs, response_obj): + if self.callback_name == "arize": + from litellm.integrations.arize_ai import set_arize_ai_attributes + + set_arize_ai_attributes(span, kwargs, response_obj) + return from litellm.proxy._types import SpanAttributes optional_params = kwargs.get("optional_params", {}) @@ -612,8 +621,8 @@ class OpenTelemetry(CustomLogger): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode - _start_time_ns = logging_payload.start_time - _end_time_ns = logging_payload.end_time + _start_time_ns = 0 + _end_time_ns = 0 start_time = logging_payload.start_time end_time = logging_payload.end_time @@ -658,8 +667,8 @@ class OpenTelemetry(CustomLogger): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode - _start_time_ns = logging_payload.start_time - _end_time_ns = logging_payload.end_time + _start_time_ns = 0 + _end_time_ns = 0 start_time = logging_payload.start_time end_time = logging_payload.end_time diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 32633960f..e78eb5793 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1954,6 +1954,43 @@ def _init_custom_logger_compatible_class( _langsmith_logger = LangsmithLogger() _in_memory_loggers.append(_langsmith_logger) return _langsmith_logger # type: ignore + elif logging_integration == "arize": + if "ARIZE_SPACE_KEY" not in os.environ: + raise ValueError("ARIZE_SPACE_KEY not found in environment variables") + if "ARIZE_API_KEY" not in os.environ: + raise ValueError("ARIZE_API_KEY not found in environment variables") + from litellm.integrations.opentelemetry import ( + OpenTelemetry, + OpenTelemetryConfig, + ) + + otel_config = OpenTelemetryConfig( + exporter="otlp_grpc", + endpoint="https://otlp.arize.com/v1", + ) + os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = ( + f"space_key={os.getenv('ARIZE_SPACE_KEY')},api_key={os.getenv('ARIZE_API_KEY')}" + ) + for callback in _in_memory_loggers: + if ( + isinstance(callback, OpenTelemetry) + and callback.callback_name == "arize" + ): + return callback # type: ignore + _otel_logger = OpenTelemetry(config=otel_config, callback_name="arize") + _in_memory_loggers.append(_otel_logger) + return _otel_logger # type: ignore + + elif logging_integration == "otel": + from litellm.integrations.opentelemetry import OpenTelemetry + + for callback in _in_memory_loggers: + if isinstance(callback, OpenTelemetry): + return callback # type: ignore + + otel_logger = OpenTelemetry() + _in_memory_loggers.append(otel_logger) + return otel_logger # type: ignore elif logging_integration == "galileo": for callback in _in_memory_loggers: @@ -2027,6 +2064,25 @@ def get_custom_logger_compatible_class( for callback in _in_memory_loggers: if isinstance(callback, LangsmithLogger): return callback + elif logging_integration == "otel": + from litellm.integrations.opentelemetry import OpenTelemetry + + for callback in _in_memory_loggers: + if isinstance(callback, OpenTelemetry): + return callback + elif logging_integration == "arize": + from litellm.integrations.opentelemetry import OpenTelemetry + + if "ARIZE_SPACE_KEY" not in os.environ: + raise ValueError("ARIZE_SPACE_KEY not found in environment variables") + if "ARIZE_API_KEY" not in os.environ: + raise ValueError("ARIZE_API_KEY not found in environment variables") + for callback in _in_memory_loggers: + if ( + isinstance(callback, OpenTelemetry) + and callback.callback_name == "arize" + ): + return callback elif logging_integration == "logfire": if "LOGFIRE_TOKEN" not in os.environ: raise ValueError("LOGFIRE_TOKEN not found in environment variables") diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index e9371c1d8..0724867aa 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -228,6 +228,10 @@ class LiteLLMRoutes(enum.Enum): "/utils/token_counter", ] + anthropic_routes: List = [ + "/v1/messages", + ] + info_routes: List = [ "/key/info", "/team/info", diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index 96171f2ef..1650eb8aa 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -24,7 +24,7 @@ from litellm.proxy._types import ( LitellmUserRoles, UserAPIKeyAuth, ) -from litellm.proxy.auth.auth_utils import is_openai_route +from litellm.proxy.auth.auth_utils import is_llm_api_route from litellm.proxy.utils import PrismaClient, ProxyLogging, log_to_opentelemetry from litellm.types.services import ServiceLoggerPayload, ServiceTypes @@ -106,7 +106,7 @@ def common_checks( general_settings.get("enforce_user_param", None) is not None and general_settings["enforce_user_param"] == True ): - if is_openai_route(route=route) and "user" not in request_body: + if is_llm_api_route(route=route) and "user" not in request_body: raise Exception( f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}" ) @@ -122,7 +122,7 @@ def common_checks( + CommonProxyErrors.not_premium_user.value ) - if is_openai_route(route=route): + if is_llm_api_route(route=route): # loop through each enforced param # example enforced_params ['user', 'metadata', 'metadata.generation_name'] for enforced_param in general_settings["enforced_params"]: @@ -150,7 +150,7 @@ def common_checks( and global_proxy_spend is not None # only run global budget checks for OpenAI routes # Reason - the Admin UI should continue working if the proxy crosses it's global budget - and is_openai_route(route=route) + and is_llm_api_route(route=route) and route != "/v1/models" and route != "/models" ): diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index d3e030762..bd1e50ed0 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -46,7 +46,7 @@ def route_in_additonal_public_routes(current_route: str): return False -def is_openai_route(route: str) -> bool: +def is_llm_api_route(route: str) -> bool: """ Helper to checks if provided route is an OpenAI route @@ -59,6 +59,9 @@ def is_openai_route(route: str) -> bool: if route in LiteLLMRoutes.openai_routes.value: return True + if route in LiteLLMRoutes.anthropic_routes.value: + return True + # fuzzy match routes like "/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ" # Check for routes with placeholders for openai_route in LiteLLMRoutes.openai_routes.value: diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index c5549ffcb..b4c88148e 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -57,7 +57,7 @@ from litellm.proxy.auth.auth_checks import ( log_to_opentelemetry, ) from litellm.proxy.auth.auth_utils import ( - is_openai_route, + is_llm_api_route, route_in_additonal_public_routes, ) from litellm.proxy.common_utils.http_parsing_utils import _read_request_body @@ -994,9 +994,9 @@ async def user_api_key_auth( _user_role = _get_user_role(user_id_information=user_id_information) if not _is_user_proxy_admin(user_id_information): # if non-admin - if is_openai_route(route=route): + if is_llm_api_route(route=route): pass - elif is_openai_route(route=request["route"].name): + elif is_llm_api_route(route=request["route"].name): pass elif ( route in LiteLLMRoutes.info_routes.value @@ -1049,7 +1049,7 @@ async def user_api_key_auth( pass elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value: - if is_openai_route(route=route): + if is_llm_api_route(route=route): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=f"user not allowed to access this OpenAI routes, role= {_user_role}", diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index c114db25f..2508a48a1 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,10 +1,15 @@ model_list: + - model_name: gpt-4 + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ - model_name: fireworks-llama-v3-70b-instruct litellm_params: model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct - api_key: "os.environ/FIREWORKS_AI_API_KEY" - -router_settings: - enable_tag_filtering: True # 👈 Key Change + api_key: "os.environ/FIREWORKS" general_settings: - master_key: sk-1234 \ No newline at end of file + master_key: sk-1234 + +litellm_settings: + callbacks: ["arize"] \ No newline at end of file diff --git a/litellm/proxy/tests/test_anthropic_sdk.py b/litellm/proxy/tests/test_anthropic_sdk.py new file mode 100644 index 000000000..073fafb07 --- /dev/null +++ b/litellm/proxy/tests/test_anthropic_sdk.py @@ -0,0 +1,22 @@ +import os + +from anthropic import Anthropic + +client = Anthropic( + # This is the default and can be omitted + base_url="http://localhost:4000", + # this is a litellm proxy key :) - not a real anthropic key + api_key="sk-s4xN1IiLTCytwtZFJaYQrA", +) + +message = client.messages.create( + max_tokens=1024, + messages=[ + { + "role": "user", + "content": "Hello, Claude", + } + ], + model="claude-3-opus-20240229", +) +print(message.content) diff --git a/litellm/tests/test_arize_ai.py b/litellm/tests/test_arize_ai.py new file mode 100644 index 000000000..7c38db4c6 --- /dev/null +++ b/litellm/tests/test_arize_ai.py @@ -0,0 +1,29 @@ +import asyncio +import logging +import os +import time + +import pytest +from dotenv import load_dotenv +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + +import litellm +from litellm._logging import verbose_logger +from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig + +load_dotenv() +import logging + + +@pytest.mark.asyncio() +async def test_async_otel_callback(): + litellm.set_verbose = True + litellm.callbacks = ["arize"] + + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "hi test from local arize"}], + mock_response="hello", + temperature=0.1, + user="OTEL_USER", + ) diff --git a/litellm/tests/test_proxy_routes.py b/litellm/tests/test_proxy_routes.py index 776ad1e78..6f5774d3e 100644 --- a/litellm/tests/test_proxy_routes.py +++ b/litellm/tests/test_proxy_routes.py @@ -19,7 +19,7 @@ import pytest import litellm from litellm.proxy._types import LiteLLMRoutes -from litellm.proxy.auth.auth_utils import is_openai_route +from litellm.proxy.auth.auth_utils import is_llm_api_route from litellm.proxy.proxy_server import app # Configure logging @@ -77,8 +77,8 @@ def test_routes_on_litellm_proxy(): ("/v1/non_existent_endpoint", False), ], ) -def test_is_openai_route(route: str, expected: bool): - assert is_openai_route(route) == expected +def test_is_llm_api_route(route: str, expected: bool): + assert is_llm_api_route(route) == expected # Test case for routes that are similar but should return False @@ -91,5 +91,10 @@ def test_is_openai_route(route: str, expected: bool): "/engines/model/invalid/completions", ], ) -def test_is_openai_route_similar_but_false(route: str): - assert is_openai_route(route) == False +def test_is_llm_api_route_similar_but_false(route: str): + assert is_llm_api_route(route) == False + + +def test_anthropic_api_routes(): + # allow non proxy admins to call anthropic api routes + assert is_llm_api_route(route="/v1/messages") is True