Merge pull request #2163 from vivek-athina/feature/athina

Support for Athina logging
This commit is contained in:
Ishaan Jaff 2024-02-23 09:39:06 -08:00 committed by GitHub
commit 5ec3075fb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 177 additions and 8 deletions

View file

@ -100,7 +100,7 @@ for part in response:
```
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Langfuse, DynamoDB, s3 Buckets, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
LiteLLM exposes pre defined callbacks to send data to Langfuse, DynamoDB, s3 Buckets, LLMonitor, Helicone, Promptlayer, Traceloop, Athina, Slack
```python
from litellm import completion
@ -108,11 +108,12 @@ from litellm import completion
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
os.environ["OPENAI_API_KEY"]
# set callbacks
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase
litellm.success_callback = ["langfuse", "llmonitor", "athina"] # log input/output to langfuse, llmonitor, supabase, athina etc
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

View file

@ -33,7 +33,7 @@
- Call all models using the OpenAI format - `completion(model, messages)`
- Text responses will always be available at `['choices'][0]['message']['content']`
- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `LLMonitor,` `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `LLMonitor`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
**Example: Logs sent to Supabase**
<img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">

View file

@ -0,0 +1,50 @@
import Image from '@theme/IdealImage';
# Athina
[Athina](https://athina.ai/) is an evaluation framework and production monitoring platform for your LLM-powered app. Athina is designed to enhance the performance and reliability of AI applications through real-time monitoring, granular analytics, and plug-and-play evaluations.
<Image img={require('../../athina_dashboard.png')} />
## Getting Started
Use Athina to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
liteLLM provides `callbacks`, making it easy for you to log data depending on the status of your responses.
## Using Callbacks
First, sign up to get an API_KEY on the [Athina dashboard](https://app.athina.ai).
Use just 1 line of code, to instantly log your responses **across all providers** with Athina:
```python
litellm.success_callback = ["athina"]
```
### Complete code
```python
from litellm import completion
## set env variables
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
os.environ["OPENAI_API_KEY"]= ""
# set callback
litellm.success_callback = ["athina"]
#openai call
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]
)
```
## Support & Talk with us
- [Schedule Demo 👋](https://cal.com/shiv-athina/30min)
- [Website 💻](https://athina.ai/?utm_source=litellm&utm_medium=website)
- [Docs 📖](https://docs.athina.ai/?utm_source=litellm&utm_medium=website)
- [Demo Video 📺](https://www.loom.com/share/d9ef2c62e91b46769a39c42bb6669834?sid=711df413-0adb-4267-9708-5f29cef929e3)
- Our emails ✉️ shiv@athina.ai, akshat@athina.ai, vivek@athina.ai

View file

@ -10,6 +10,7 @@ liteLLM supports:
- [LLMonitor](https://llmonitor.com/docs)
- [Helicone](https://docs.helicone.ai/introduction)
- [Traceloop](https://traceloop.com/docs)
- [Athina](https://docs.athina.ai/)
- [Sentry](https://docs.sentry.io/platforms/python/)
- [PostHog](https://posthog.com/docs/libraries/python)
- [Slack](https://slack.dev/bolt-python/concepts)
@ -21,7 +22,7 @@ from litellm import completion
# set callbacks
litellm.input_callback=["sentry"] # for sentry breadcrumbing - logs the input being sent to the api
litellm.success_callback=["posthog", "helicone", "llmonitor"]
litellm.success_callback=["posthog", "helicone", "llmonitor", "athina"]
litellm.failure_callback=["sentry", "llmonitor"]
## set env variables
@ -30,6 +31,7 @@ os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-u
os.environ["HELICONE_API_KEY"] = ""
os.environ["TRACELOOP_API_KEY"] = ""
os.environ["LLMONITOR_APP_ID"] = ""
os.environ["ATHINA_API_KEY"] = ""
response = completion(model="gpt-3.5-turbo", messages=messages)
```

View file

@ -3,7 +3,7 @@ import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# 🔎 Logging - Custom Callbacks, Langfuse, s3 Bucket, Sentry, OpenTelemetry
# 🔎 Logging - Custom Callbacks, Langfuse, s3 Bucket, Sentry, OpenTelemetry, Athina
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket
@ -13,7 +13,8 @@ Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTeleme
- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets)
- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb)
- [Logging to Sentry](#logging-proxy-inputoutput---sentry)
- [Logging to Traceloop (OpenTelemetry)](#opentelemetry---traceloop)
- [Logging to Traceloop (OpenTelemetry)](#logging-proxy-inputoutput-traceloop-opentelemetry)
- [Logging to Athina](#logging-proxy-inputoutput-athina)
## Custom Callback Class [Async]
Use this when you want to run custom callbacks in `python`
@ -830,4 +831,46 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
}'
```
## Logging Proxy Input/Output Athina
[Athina](https://athina.ai/) allows you to log LLM Input/Output for monitoring, analytics, and observability.
We will use the `--config` to set `litellm.success_callback = ["athina"]` this will log all successfull LLM calls to athina
**Step 1** Set Athina API key
```shell
ATHINA_API_KEY = "your-athina-api-key"
```
**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
```yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
litellm_settings:
success_callback: ["athina"]
```
**Step 3**: Start the proxy, make a test request
Start proxy
```shell
litellm --config config.yaml --debug
```
Test Request
```
curl --location 'http://0.0.0.0:8000/chat/completions' \
--header 'Content-Type: application/json' \
--data ' {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "which llm are you"
}
]
}'
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 2 MiB

View file

@ -170,6 +170,7 @@ const sidebars = {
"observability/langsmith_integration",
"observability/slack_integration",
"observability/traceloop_integration",
"observability/athina_integration",
"observability/llmonitor_integration",
"observability/helicone_integration",
"observability/supabase_integration",

View file

@ -0,0 +1,56 @@
import datetime
class AthinaLogger:
def __init__(self):
import os
self.athina_api_key = os.getenv("ATHINA_API_KEY")
self.headers = {
"athina-api-key": self.athina_api_key,
"Content-Type": "application/json"
}
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response"]
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
import requests
import json
import traceback
try:
response_json = response_obj.model_dump() if response_obj else {}
data = {
"language_model_id": kwargs.get("model"),
"request": kwargs,
"response": response_json,
"prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"),
"completion_tokens": response_json.get("usage", {}).get("completion_tokens"),
"total_tokens": response_json.get("usage", {}).get("total_tokens"),
}
if type(end_time) == datetime.datetime and type(start_time) == datetime.datetime:
data["response_time"] = int((end_time - start_time).total_seconds() * 1000)
if "messages" in kwargs:
data["prompt"] = kwargs.get("messages", None)
if kwargs.get("messages") and len(kwargs.get("messages")) > 0:
data["user_query"] = kwargs.get("messages")[0].get("content", None)
# Directly add tools or functions if present
optional_params = kwargs.get("optional_params", {})
data.update((k, v) for k, v in optional_params.items() if k in ["tools", "functions"])
# Add additional metadata keys
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
if metadata:
for key in self.additional_keys:
if key in metadata:
data[key] = metadata[key]
response = requests.post(self.athina_logging_url, headers=self.headers, data=json.dumps(data, default=str))
if response.status_code != 200:
print_verbose(f"Athina Logger Error - {response.text}, {response.status_code}")
else:
print_verbose(f"Athina Logger Succeeded - {response.text}")
except Exception as e:
print_verbose(f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}")
pass

View file

@ -55,6 +55,7 @@ encoding = tiktoken.get_encoding("cl100k_base")
import importlib.metadata
from ._logging import verbose_logger
from .integrations.traceloop import TraceloopLogger
from .integrations.athina import AthinaLogger
from .integrations.helicone import HeliconeLogger
from .integrations.aispend import AISpendLogger
from .integrations.berrispend import BerriSpendLogger
@ -114,6 +115,7 @@ posthog = None
slack_app = None
alerts_channel = None
heliconeLogger = None
athinaLogger = None
promptLayerLogger = None
langsmithLogger = None
weightsBiasesLogger = None
@ -1422,6 +1424,17 @@ class Logging:
result = kwargs["complete_streaming_response"]
# only add to cache once we have a complete streaming response
litellm.cache.add_cache(result, **kwargs)
if callback == "athina":
deep_copy = {}
for k, v in self.model_call_details.items():
deep_copy[k] = v
athinaLogger.log_event(
kwargs=deep_copy,
response_obj=result,
start_time=start_time,
end_time=end_time,
print_verbose=print_verbose,
)
if callback == "traceloop":
deep_copy = {}
for k, v in self.model_call_details.items():
@ -5509,7 +5522,7 @@ def validate_environment(model: Optional[str] = None) -> dict:
def set_callbacks(callback_list, function_id=None):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger
try:
for callback in callback_list:
print_verbose(f"callback: {callback}")
@ -5564,6 +5577,9 @@ def set_callbacks(callback_list, function_id=None):
print_verbose(f"Initialized Slack App: {slack_app}")
elif callback == "traceloop":
traceloopLogger = TraceloopLogger()
elif callback == "athina":
athinaLogger = AthinaLogger()
print_verbose("Initialized Athina Logger")
elif callback == "helicone":
heliconeLogger = HeliconeLogger()
elif callback == "llmonitor":