diff --git a/README.md b/README.md index f26f382da3..a09025d469 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ for part in response: ``` ## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks)) -LiteLLM exposes pre defined callbacks to send data to Langfuse, DynamoDB, s3 Buckets, LLMonitor, Helicone, Promptlayer, Traceloop, Slack +LiteLLM exposes pre defined callbacks to send data to Langfuse, DynamoDB, s3 Buckets, LLMonitor, Helicone, Promptlayer, Traceloop, Athina, Slack ```python from litellm import completion @@ -108,11 +108,12 @@ from litellm import completion os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id" +os.environ["ATHINA_API_KEY"] = "your-athina-api-key" os.environ["OPENAI_API_KEY"] # set callbacks -litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase +litellm.success_callback = ["langfuse", "llmonitor", "athina"] # log input/output to langfuse, llmonitor, supabase, athina etc #openai call response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) diff --git a/cookbook/proxy-server/readme.md b/cookbook/proxy-server/readme.md index df74355d46..4b296831bc 100644 --- a/cookbook/proxy-server/readme.md +++ b/cookbook/proxy-server/readme.md @@ -33,7 +33,7 @@ - Call all models using the OpenAI format - `completion(model, messages)` - Text responses will always be available at `['choices'][0]['message']['content']` - **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`) -- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `LLMonitor,` `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/ +- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `LLMonitor`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/ **Example: Logs sent to Supabase** Screenshot 2023-08-11 at 4 02 46 PM diff --git a/docs/my-website/docs/observability/athina_integration.md b/docs/my-website/docs/observability/athina_integration.md new file mode 100644 index 0000000000..a6328f8413 --- /dev/null +++ b/docs/my-website/docs/observability/athina_integration.md @@ -0,0 +1,50 @@ +import Image from '@theme/IdealImage'; + +# Athina + +[Athina](https://athina.ai/) is an evaluation framework and production monitoring platform for your LLM-powered app. Athina is designed to enhance the performance and reliability of AI applications through real-time monitoring, granular analytics, and plug-and-play evaluations. + + + +## Getting Started + +Use Athina to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM) + +liteLLM provides `callbacks`, making it easy for you to log data depending on the status of your responses. + +## Using Callbacks + +First, sign up to get an API_KEY on the [Athina dashboard](https://app.athina.ai). + +Use just 1 line of code, to instantly log your responses **across all providers** with Athina: + +```python +litellm.success_callback = ["athina"] +``` + +### Complete code + +```python +from litellm import completion + +## set env variables +os.environ["ATHINA_API_KEY"] = "your-athina-api-key" +os.environ["OPENAI_API_KEY"]= "" + +# set callback +litellm.success_callback = ["athina"] + +#openai call +response = completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}] +) +``` + +## Support & Talk with us + +- [Schedule Demo 👋](https://cal.com/shiv-athina/30min) +- [Website 💻](https://athina.ai/?utm_source=litellm&utm_medium=website) +- [Docs 📖](https://docs.athina.ai/?utm_source=litellm&utm_medium=website) +- [Demo Video 📺](https://www.loom.com/share/d9ef2c62e91b46769a39c42bb6669834?sid=711df413-0adb-4267-9708-5f29cef929e3) +- Our emails ✉️ shiv@athina.ai, akshat@athina.ai, vivek@athina.ai diff --git a/docs/my-website/docs/observability/callbacks.md b/docs/my-website/docs/observability/callbacks.md index 892be93226..3b3d4eef32 100644 --- a/docs/my-website/docs/observability/callbacks.md +++ b/docs/my-website/docs/observability/callbacks.md @@ -10,6 +10,7 @@ liteLLM supports: - [LLMonitor](https://llmonitor.com/docs) - [Helicone](https://docs.helicone.ai/introduction) - [Traceloop](https://traceloop.com/docs) +- [Athina](https://docs.athina.ai/) - [Sentry](https://docs.sentry.io/platforms/python/) - [PostHog](https://posthog.com/docs/libraries/python) - [Slack](https://slack.dev/bolt-python/concepts) @@ -21,7 +22,7 @@ from litellm import completion # set callbacks litellm.input_callback=["sentry"] # for sentry breadcrumbing - logs the input being sent to the api -litellm.success_callback=["posthog", "helicone", "llmonitor"] +litellm.success_callback=["posthog", "helicone", "llmonitor", "athina"] litellm.failure_callback=["sentry", "llmonitor"] ## set env variables @@ -30,6 +31,7 @@ os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-u os.environ["HELICONE_API_KEY"] = "" os.environ["TRACELOOP_API_KEY"] = "" os.environ["LLMONITOR_APP_ID"] = "" +os.environ["ATHINA_API_KEY"] = "" response = completion(model="gpt-3.5-turbo", messages=messages) -``` +``` \ No newline at end of file diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 3f5596fc9e..4c3616e425 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -3,7 +3,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# 🔎 Logging - Custom Callbacks, Langfuse, s3 Bucket, Sentry, OpenTelemetry +# 🔎 Logging - Custom Callbacks, Langfuse, s3 Bucket, Sentry, OpenTelemetry, Athina Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket @@ -13,7 +13,8 @@ Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTeleme - [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets) - [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb) - [Logging to Sentry](#logging-proxy-inputoutput---sentry) -- [Logging to Traceloop (OpenTelemetry)](#opentelemetry---traceloop) +- [Logging to Traceloop (OpenTelemetry)](#logging-proxy-inputoutput-traceloop-opentelemetry) +- [Logging to Athina](#logging-proxy-inputoutput-athina) ## Custom Callback Class [Async] Use this when you want to run custom callbacks in `python` @@ -830,4 +831,46 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \ }' ``` +## Logging Proxy Input/Output Athina +[Athina](https://athina.ai/) allows you to log LLM Input/Output for monitoring, analytics, and observability. + +We will use the `--config` to set `litellm.success_callback = ["athina"]` this will log all successfull LLM calls to athina + +**Step 1** Set Athina API key + +```shell +ATHINA_API_KEY = "your-athina-api-key" +``` + +**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback` +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo +litellm_settings: + success_callback: ["athina"] +``` + +**Step 3**: Start the proxy, make a test request + +Start proxy +```shell +litellm --config config.yaml --debug +``` + +Test Request +``` +curl --location 'http://0.0.0.0:8000/chat/completions' \ + --header 'Content-Type: application/json' \ + --data ' { + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "which llm are you" + } + ] + }' +``` \ No newline at end of file diff --git a/docs/my-website/img/athina_dashboard.png b/docs/my-website/img/athina_dashboard.png new file mode 100644 index 0000000000..05694aab96 Binary files /dev/null and b/docs/my-website/img/athina_dashboard.png differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 2955aa6ed8..2747242913 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -170,6 +170,7 @@ const sidebars = { "observability/langsmith_integration", "observability/slack_integration", "observability/traceloop_integration", + "observability/athina_integration", "observability/llmonitor_integration", "observability/helicone_integration", "observability/supabase_integration", diff --git a/litellm/integrations/athina.py b/litellm/integrations/athina.py new file mode 100644 index 0000000000..f957384ea6 --- /dev/null +++ b/litellm/integrations/athina.py @@ -0,0 +1,56 @@ +import datetime + + +class AthinaLogger: + def __init__(self): + import os + self.athina_api_key = os.getenv("ATHINA_API_KEY") + self.headers = { + "athina-api-key": self.athina_api_key, + "Content-Type": "application/json" + } + self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference" + self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response"] + + def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose): + import requests + import json + import traceback + try: + response_json = response_obj.model_dump() if response_obj else {} + data = { + "language_model_id": kwargs.get("model"), + "request": kwargs, + "response": response_json, + "prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"), + "completion_tokens": response_json.get("usage", {}).get("completion_tokens"), + "total_tokens": response_json.get("usage", {}).get("total_tokens"), + } + + if type(end_time) == datetime.datetime and type(start_time) == datetime.datetime: + data["response_time"] = int((end_time - start_time).total_seconds() * 1000) + + if "messages" in kwargs: + data["prompt"] = kwargs.get("messages", None) + if kwargs.get("messages") and len(kwargs.get("messages")) > 0: + data["user_query"] = kwargs.get("messages")[0].get("content", None) + + # Directly add tools or functions if present + optional_params = kwargs.get("optional_params", {}) + data.update((k, v) for k, v in optional_params.items() if k in ["tools", "functions"]) + + # Add additional metadata keys + metadata = kwargs.get("litellm_params", {}).get("metadata", {}) + if metadata: + for key in self.additional_keys: + if key in metadata: + data[key] = metadata[key] + + response = requests.post(self.athina_logging_url, headers=self.headers, data=json.dumps(data, default=str)) + if response.status_code != 200: + print_verbose(f"Athina Logger Error - {response.text}, {response.status_code}") + else: + print_verbose(f"Athina Logger Succeeded - {response.text}") + except Exception as e: + print_verbose(f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}") + pass \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index e3516f7fdc..d263490288 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -55,6 +55,7 @@ encoding = tiktoken.get_encoding("cl100k_base") import importlib.metadata from ._logging import verbose_logger from .integrations.traceloop import TraceloopLogger +from .integrations.athina import AthinaLogger from .integrations.helicone import HeliconeLogger from .integrations.aispend import AISpendLogger from .integrations.berrispend import BerriSpendLogger @@ -114,6 +115,7 @@ posthog = None slack_app = None alerts_channel = None heliconeLogger = None +athinaLogger = None promptLayerLogger = None langsmithLogger = None weightsBiasesLogger = None @@ -1422,6 +1424,17 @@ class Logging: result = kwargs["complete_streaming_response"] # only add to cache once we have a complete streaming response litellm.cache.add_cache(result, **kwargs) + if callback == "athina": + deep_copy = {} + for k, v in self.model_call_details.items(): + deep_copy[k] = v + athinaLogger.log_event( + kwargs=deep_copy, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) if callback == "traceloop": deep_copy = {} for k, v in self.model_call_details.items(): @@ -5509,7 +5522,7 @@ def validate_environment(model: Optional[str] = None) -> dict: def set_callbacks(callback_list, function_id=None): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger try: for callback in callback_list: print_verbose(f"callback: {callback}") @@ -5564,6 +5577,9 @@ def set_callbacks(callback_list, function_id=None): print_verbose(f"Initialized Slack App: {slack_app}") elif callback == "traceloop": traceloopLogger = TraceloopLogger() + elif callback == "athina": + athinaLogger = AthinaLogger() + print_verbose("Initialized Athina Logger") elif callback == "helicone": heliconeLogger = HeliconeLogger() elif callback == "llmonitor":