diff --git a/README.md b/README.md index 7df894ea1..351b42c13 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ from litellm import completion ## set env variables for logging tools os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" +os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["ATHINA_API_KEY"] = "your-athina-api-key" @@ -127,7 +128,7 @@ os.environ["ATHINA_API_KEY"] = "your-athina-api-key" os.environ["OPENAI_API_KEY"] # set callbacks -litellm.success_callback = ["lunary", "langfuse", "athina"] # log input/output to lunary, langfuse, supabase, athina etc +litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc #openai call response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) diff --git a/docs/my-website/docs/getting_started.md b/docs/my-website/docs/getting_started.md index edbdf3c00..e9b2a0db6 100644 --- a/docs/my-website/docs/getting_started.md +++ b/docs/my-website/docs/getting_started.md @@ -87,13 +87,14 @@ from litellm import completion ## set env variables for logging tools os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" +os.environ["HELICONE_API_KEY"] = "your-helicone-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["OPENAI_API_KEY"] # set callbacks -litellm.success_callback = ["lunary", "langfuse"] # log input/output to langfuse, lunary, supabase +litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to langfuse, lunary, supabase, helicone #openai call response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) diff --git a/docs/my-website/docs/index.md b/docs/my-website/docs/index.md index 762156f46..6b472ee6c 100644 --- a/docs/my-website/docs/index.md +++ b/docs/my-website/docs/index.md @@ -310,6 +310,7 @@ LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone from litellm import completion ## set env variables for logging tools +os.environ["HELICONE_API_KEY"] = "your-helicone-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" @@ -317,7 +318,7 @@ os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" os.environ["OPENAI_API_KEY"] # set callbacks -litellm.success_callback = ["lunary", "langfuse"] # log input/output to lunary, langfuse, supabase +litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone #openai call response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) diff --git a/docs/my-website/docs/observability/helicone_integration.md b/docs/my-website/docs/observability/helicone_integration.md index f7fd330c3..57e7039fc 100644 --- a/docs/my-website/docs/observability/helicone_integration.md +++ b/docs/my-website/docs/observability/helicone_integration.md @@ -1,64 +1,170 @@ -# Helicone Tutorial +# 🧠 Helicone - OSS LLM Observability Platform :::tip -This is community maintained, Please make an issue if you run into a bug +This is community maintained. Please make an issue if you run into a bug: https://github.com/BerriAI/litellm ::: +[Helicone](https://helicone.ai/) is an open source observability platform that proxies your LLM requests and provides key insights into your usage, spend, latency and more. -[Helicone](https://helicone.ai/) is an open source observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage. +## Using Helicone with LiteLLM -## Use Helicone to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM) -liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. +LiteLLM provides `success_callbacks` and `failure_callbacks`, allowing you to easily log data to Helicone based on the status of your responses. -In this case, we want to log requests to Helicone when a request succeeds. +### Supported LLM Providers + +Helicone can log requests across [various LLM providers](https://docs.helicone.ai/getting-started/quick-start), including: + +- OpenAI +- Azure +- Anthropic +- Gemini +- Groq +- Cohere +- Replicate +- And more + +### Integration Methods + +There are two main approaches to integrate Helicone with LiteLLM: + +1. Using callbacks +2. Using Helicone as a proxy + +Let's explore each method in detail. + +### Approach 1: Use Callbacks + +Use just 1 line of code to instantly log your responses **across all providers** with Helicone: -### Approach 1: Use Callbacks -Use just 1 line of code, to instantly log your responses **across all providers** with helicone: ```python -litellm.success_callback=["helicone"] +litellm.success_callback = ["helicone"] ``` -Complete code -```python -from litellm import completion - -## set env variables -os.environ["HELICONE_API_KEY"] = "your-helicone-key" -os.environ["OPENAI_API_KEY"], os.environ["COHERE_API_KEY"] = "", "" - -# set callbacks -litellm.success_callback=["helicone"] - -#openai call -response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) - -#cohere call -response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]) -``` - -### Approach 2: [OpenAI + Azure only] Use Helicone as a proxy -Helicone provides advanced functionality like caching, etc. Helicone currently supports this for Azure and OpenAI. - -If you want to use Helicone to proxy your OpenAI/Azure requests, then you can - - -- Set helicone as your base url via: `litellm.api_url` -- Pass in helicone request headers via: `litellm.headers` - Complete Code + ```python -import litellm +import os from litellm import completion -litellm.api_base = "https://oai.hconeai.com/v1" -litellm.headers = {"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}"} +## Set env variables +os.environ["HELICONE_API_KEY"] = "your-helicone-key" +os.environ["OPENAI_API_KEY"] = "your-openai-key" -response = litellm.completion( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "how does a court case get to the Supreme Court?"}] +# Set callbacks +litellm.success_callback = ["helicone"] + +# OpenAI call +response = completion( + model="gpt-4o", + messages=[{"role": "user", "content": "Hi 👋 - I'm OpenAI"}], ) print(response) ``` + +### Approach 2: Use Helicone as a proxy + +Helicone's proxy provides [advanced functionality](https://docs.helicone.ai/getting-started/proxy-vs-async) like caching, rate limiting, LLM security through [PromptArmor](https://promptarmor.com/) and more. + +To use Helicone as a proxy for your LLM requests: + +1. Set Helicone as your base URL via: litellm.api_base +2. Pass in Helicone request headers via: litellm.headers + +Complete Code: + +```python +import os +import litellm +from litellm import completion + +litellm.api_base = "https://oai.hconeai.com/v1" +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API +} + +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "How does a court case get to the Supreme Court?"}] +) + +print(response) +``` + +### Advanced Usage + +You can add custom metadata and properties to your requests using Helicone headers. Here are some examples: + +```python +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API + "Helicone-User-Id": "user-abc", # Specify the user making the request + "Helicone-Property-App": "web", # Custom property to add additional information + "Helicone-Property-Custom": "any-value", # Add any custom property + "Helicone-Prompt-Id": "prompt-supreme-court", # Assign an ID to associate this prompt with future versions + "Helicone-Cache-Enabled": "true", # Enable caching of responses + "Cache-Control": "max-age=3600", # Set cache limit to 1 hour + "Helicone-RateLimit-Policy": "10;w=60;s=user", # Set rate limit policy + "Helicone-Retry-Enabled": "true", # Enable retry mechanism + "helicone-retry-num": "3", # Set number of retries + "helicone-retry-factor": "2", # Set exponential backoff factor + "Helicone-Model-Override": "gpt-3.5-turbo-0613", # Override the model used for cost calculation + "Helicone-Session-Id": "session-abc-123", # Set session ID for tracking + "Helicone-Session-Path": "parent-trace/child-trace", # Set session path for hierarchical tracking + "Helicone-Omit-Response": "false", # Include response in logging (default behavior) + "Helicone-Omit-Request": "false", # Include request in logging (default behavior) + "Helicone-LLM-Security-Enabled": "true", # Enable LLM security features + "Helicone-Moderations-Enabled": "true", # Enable content moderation + "Helicone-Fallbacks": '["gpt-3.5-turbo", "gpt-4"]', # Set fallback models +} +``` + +### Caching and Rate Limiting + +Enable caching and set up rate limiting policies: + +```python +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API + "Helicone-Cache-Enabled": "true", # Enable caching of responses + "Cache-Control": "max-age=3600", # Set cache limit to 1 hour + "Helicone-RateLimit-Policy": "100;w=3600;s=user", # Set rate limit policy +} +``` + +### Session Tracking and Tracing + +Track multi-step and agentic LLM interactions using session IDs and paths: + +```python +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API + "Helicone-Session-Id": "session-abc-123", # The session ID you want to track + "Helicone-Session-Path": "parent-trace/child-trace", # The path of the session +} +``` + +- `Helicone-Session-Id`: Use this to specify the unique identifier for the session you want to track. This allows you to group related requests together. +- `Helicone-Session-Path`: This header defines the path of the session, allowing you to represent parent and child traces. For example, "parent/child" represents a child trace of a parent trace. + +By using these two headers, you can effectively group and visualize multi-step LLM interactions, gaining insights into complex AI workflows. + +### Retry and Fallback Mechanisms + +Set up retry mechanisms and fallback options: + +```python +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API + "Helicone-Retry-Enabled": "true", # Enable retry mechanism + "helicone-retry-num": "3", # Set number of retries + "helicone-retry-factor": "2", # Set exponential backoff factor + "Helicone-Fallbacks": '["gpt-3.5-turbo", "gpt-4"]', # Set fallback models +} +``` + +> **Supported Headers** - For a full list of supported Helicone headers and their descriptions, please refer to the [Helicone documentation](https://docs.helicone.ai/getting-started/quick-start). +> By utilizing these headers and metadata options, you can gain deeper insights into your LLM usage, optimize performance, and better manage your AI workflows with Helicone and LiteLLM. diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 68b4601ce..094fc7aec 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -188,6 +188,7 @@ const sidebars = { type: "category", label: "Logging & Observability", items: [ + "observability/helicone_integration", "observability/langfuse_integration", "observability/logfire_integration", "debugging/local_debugging", @@ -204,7 +205,6 @@ const sidebars = { "observability/athina_integration", "observability/lunary_integration", "observability/greenscale_integration", - "observability/helicone_integration", "observability/supabase_integration", `observability/telemetry`, ], diff --git a/docs/my-website/src/pages/index.md b/docs/my-website/src/pages/index.md index 126e83688..308ed0831 100644 --- a/docs/my-website/src/pages/index.md +++ b/docs/my-website/src/pages/index.md @@ -304,6 +304,7 @@ LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone from litellm import completion ## set env variables for logging tools +os.environ["HELICONE_API_KEY"] = "your-helicone-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"