From a3d9e34b262cf652375e9aed31460b6e50a0d79f Mon Sep 17 00:00:00 2001 From: colegottdank Date: Mon, 8 Jul 2024 13:56:53 -0700 Subject: [PATCH 1/6] Fix 2 --- README.md | 1 + docs/my-website/docs/getting_started.md | 1 + docs/my-website/docs/index.md | 1 + .../observability/helicone_integration.md | 36 ++++++++++++------- docs/my-website/sidebars.js | 2 +- docs/my-website/src/pages/index.md | 1 + 6 files changed, 29 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 6d26e92c2..fee0a186b 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ from litellm import completion ## set env variables for logging tools os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" +os.environ["HELICONE_AUTH"] = "your-helicone-auth-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["ATHINA_API_KEY"] = "your-athina-api-key" diff --git a/docs/my-website/docs/getting_started.md b/docs/my-website/docs/getting_started.md index edbdf3c00..c5381ed71 100644 --- a/docs/my-website/docs/getting_started.md +++ b/docs/my-website/docs/getting_started.md @@ -87,6 +87,7 @@ from litellm import completion ## set env variables for logging tools os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" +os.environ["HELICONE_API_KEY"] = "your-helicone-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" diff --git a/docs/my-website/docs/index.md b/docs/my-website/docs/index.md index 762156f46..582be1dad 100644 --- a/docs/my-website/docs/index.md +++ b/docs/my-website/docs/index.md @@ -310,6 +310,7 @@ LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone from litellm import completion ## set env variables for logging tools +os.environ["HELICONE_API_KEY"] = "your-helicone-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" diff --git a/docs/my-website/docs/observability/helicone_integration.md b/docs/my-website/docs/observability/helicone_integration.md index f7fd330c3..befb770d3 100644 --- a/docs/my-website/docs/observability/helicone_integration.md +++ b/docs/my-website/docs/observability/helicone_integration.md @@ -1,4 +1,4 @@ -# Helicone Tutorial +# 🧠 Helicone - OSS LLM Observability Platform :::tip @@ -7,47 +7,52 @@ https://github.com/BerriAI/litellm ::: - [Helicone](https://helicone.ai/) is an open source observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage. ## Use Helicone to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM) -liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. -In this case, we want to log requests to Helicone when a request succeeds. +liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. + +In this case, we want to log requests to Helicone when a request succeeds. + +### Approach 1: Use Callbacks + +Use just 1 line of code, to instantly log your responses **across all providers** with helicone: -### Approach 1: Use Callbacks -Use just 1 line of code, to instantly log your responses **across all providers** with helicone: ```python litellm.success_callback=["helicone"] ``` Complete code + ```python from litellm import completion ## set env variables -os.environ["HELICONE_API_KEY"] = "your-helicone-key" +os.environ["HELICONE_API_KEY"] = "your-helicone-key" os.environ["OPENAI_API_KEY"], os.environ["COHERE_API_KEY"] = "", "" # set callbacks litellm.success_callback=["helicone"] #openai call -response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) +response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) #cohere call -response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]) +response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]) ``` ### Approach 2: [OpenAI + Azure only] Use Helicone as a proxy + Helicone provides advanced functionality like caching, etc. Helicone currently supports this for Azure and OpenAI. -If you want to use Helicone to proxy your OpenAI/Azure requests, then you can - +If you want to use Helicone to proxy your OpenAI/Azure requests, then you can - -- Set helicone as your base url via: `litellm.api_url` -- Pass in helicone request headers via: `litellm.headers` +- Set helicone as your base url via: `litellm.api_url` +- Pass in helicone request headers via: `litellm.headers` Complete Code + ```python import litellm from litellm import completion @@ -62,3 +67,10 @@ response = litellm.completion( print(response) ``` + +### Group and visualize multi-step LLM interactions. + +Track request flows across multiple traces and gain insights into complex AI workflows by adding only 2 simple headers. + +- `Helicone-Session-Id` - The session id you want to track +- `Helicone-Session-Path` - The path of the session diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 3f52111bd..2eaec7869 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -186,6 +186,7 @@ const sidebars = { type: "category", label: "Logging & Observability", items: [ + "observability/helicone_integration", "observability/langfuse_integration", "observability/logfire_integration", "debugging/local_debugging", @@ -202,7 +203,6 @@ const sidebars = { "observability/athina_integration", "observability/lunary_integration", "observability/greenscale_integration", - "observability/helicone_integration", "observability/supabase_integration", `observability/telemetry`, ], diff --git a/docs/my-website/src/pages/index.md b/docs/my-website/src/pages/index.md index 126e83688..308ed0831 100644 --- a/docs/my-website/src/pages/index.md +++ b/docs/my-website/src/pages/index.md @@ -304,6 +304,7 @@ LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone from litellm import completion ## set env variables for logging tools +os.environ["HELICONE_API_KEY"] = "your-helicone-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" From 20561bcaaf0848ea6c7088f4868061a48dfa84f0 Mon Sep 17 00:00:00 2001 From: colegottdank Date: Mon, 8 Jul 2024 14:17:30 -0700 Subject: [PATCH 2/6] Update key name --- README.md | 74 +++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index fee0a186b..a4a55266b 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ LiteLLM manages: [**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs)
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs) -🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. +🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+). @@ -120,7 +120,7 @@ from litellm import completion ## set env variables for logging tools os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" -os.environ["HELICONE_AUTH"] = "your-helicone-auth-key" +os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["ATHINA_API_KEY"] = "your-athina-api-key" @@ -149,7 +149,6 @@ The proxy provides: ## 📖 Proxy Endpoints - [Swagger Docs](https://litellm-api.up.railway.app/) - ## Quick Start Proxy - CLI ```shell @@ -199,7 +198,6 @@ source .env docker-compose up ``` - UI on `/ui` on your proxy server ![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033) @@ -227,38 +225,38 @@ curl 'http://0.0.0.0:4000/key/generate' \ ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers)) | Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) | -|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------| -| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | | -| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | | -| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | | -| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | | -| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | | -| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | | -| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | | -| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | | -| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | | -| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | | -| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | | -| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | | -| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | | -| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | | -| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | | -| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | | -| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | | -| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | | -| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | -| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | -| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | | +| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- | +| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | | +| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | | +| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | | +| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | | +| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | | +| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | | +| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | | +| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | | +| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | | +| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | | +| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | | +| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | | +| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | | +| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | | +| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | | +| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | | +| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | | +| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | | +| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | +| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | +| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | | [**Read the Docs**](https://docs.litellm.ai/docs/) @@ -294,11 +292,13 @@ Step 4: Submit a PR with your changes! 🚀 - submit a PR from there # Enterprise + For companies that need better security, user management and professional support [Talk to founders](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) -This covers: +This covers: + - ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):** - ✅ **Feature Prioritization** - ✅ **Custom Integrations** From e7c16cc4887eef8132b1ac9db83a9efe1b8c8740 Mon Sep 17 00:00:00 2001 From: colegottdank Date: Mon, 8 Jul 2024 15:27:58 -0700 Subject: [PATCH 3/6] Update Helicone docs --- .../observability/helicone_integration.md | 183 +++++++++++++----- 1 file changed, 138 insertions(+), 45 deletions(-) diff --git a/docs/my-website/docs/observability/helicone_integration.md b/docs/my-website/docs/observability/helicone_integration.md index befb770d3..3eef52092 100644 --- a/docs/my-website/docs/observability/helicone_integration.md +++ b/docs/my-website/docs/observability/helicone_integration.md @@ -2,75 +2,168 @@ :::tip -This is community maintained, Please make an issue if you run into a bug +This is community maintained. Please make an issue if you run into a bug: https://github.com/BerriAI/litellm ::: -[Helicone](https://helicone.ai/) is an open source observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage. +[Helicone](https://helicone.ai/) is an open source observability platform that proxies your LLM requests and provides key insights into your usage, spend, latency and more. -## Use Helicone to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM) +## Using Helicone with LiteLLM -liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. +LiteLLM provides `success_callbacks` and `failure_callbacks`, allowing you to easily log data to Helicone based on the status of your responses. -In this case, we want to log requests to Helicone when a request succeeds. +### Supported LLM Providers + +Helicone can log requests across [various LLM providers](https://docs.helicone.ai/getting-started/quick-start), including: + +- OpenAI +- Azure +- Anthropic +- Cohere +- Replicate +- PaLM +- And more + +### Integration Methods + +There are two main approaches to integrate Helicone with LiteLLM: + +1. Using callbacks +2. Using Helicone as a proxy + +Let's explore each method in detail. ### Approach 1: Use Callbacks -Use just 1 line of code, to instantly log your responses **across all providers** with helicone: +Use just 1 line of code to instantly log your responses **across all providers** with Helicone: ```python -litellm.success_callback=["helicone"] +litellm.success_callback = ["helicone"] ``` -Complete code - -```python -from litellm import completion - -## set env variables -os.environ["HELICONE_API_KEY"] = "your-helicone-key" -os.environ["OPENAI_API_KEY"], os.environ["COHERE_API_KEY"] = "", "" - -# set callbacks -litellm.success_callback=["helicone"] - -#openai call -response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) - -#cohere call -response = completion(model="command-nightly", messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}]) -``` - -### Approach 2: [OpenAI + Azure only] Use Helicone as a proxy - -Helicone provides advanced functionality like caching, etc. Helicone currently supports this for Azure and OpenAI. - -If you want to use Helicone to proxy your OpenAI/Azure requests, then you can - - -- Set helicone as your base url via: `litellm.api_url` -- Pass in helicone request headers via: `litellm.headers` - Complete Code ```python -import litellm +import os from litellm import completion -litellm.api_base = "https://oai.hconeai.com/v1" -litellm.headers = {"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}"} +## Set env variables +os.environ["HELICONE_API_KEY"] = "your-helicone-key" +os.environ["OPENAI_API_KEY"] = "your-openai-key" -response = litellm.completion( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "how does a court case get to the Supreme Court?"}] +# Set callbacks +litellm.success_callback = ["helicone"] + +# OpenAI call +response = completion( + model="gpt-4o", + messages=[{"role": "user", "content": "Hi 👋 - I'm OpenAI"}], ) print(response) ``` -### Group and visualize multi-step LLM interactions. +### Approach 2: Use Helicone as a proxy -Track request flows across multiple traces and gain insights into complex AI workflows by adding only 2 simple headers. +Helicone's proxy provides [advanced functionality](https://docs.helicone.ai/getting-started/proxy-vs-async) like caching, rate limiting, LLM security through [PromptArmor](https://promptarmor.com/) and more. -- `Helicone-Session-Id` - The session id you want to track -- `Helicone-Session-Path` - The path of the session +To use Helicone as a proxy for your LLM requests: + +1. Set Helicone as your base URL via: litellm.api_base +2. Pass in Helicone request headers via: litellm.headers + +Complete Code: + +```python +import os +import litellm +from litellm import completion + +litellm.api_base = "https://oai.hconeai.com/v1" +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API +} + +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "How does a court case get to the Supreme Court?"}] +) + +print(response) +``` + +### Advanced Usage + +You can add custom metadata and properties to your requests using Helicone headers. Here are some examples: + +```python +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API + "Helicone-User-Id": "user-abc", # Specify the user making the request + "Helicone-Property-App": "web", # Custom property to add additional information + "Helicone-Property-Custom": "any-value", # Add any custom property + "Helicone-Prompt-Id": "prompt-supreme-court", # Assign an ID to associate this prompt with future versions + "Helicone-Cache-Enabled": "true", # Enable caching of responses + "Cache-Control": "max-age=3600", # Set cache limit to 1 hour + "Helicone-RateLimit-Policy": "10;w=60;s=user", # Set rate limit policy + "Helicone-Retry-Enabled": "true", # Enable retry mechanism + "helicone-retry-num": "3", # Set number of retries + "helicone-retry-factor": "2", # Set exponential backoff factor + "Helicone-Model-Override": "gpt-3.5-turbo-0613", # Override the model used for cost calculation + "Helicone-Session-Id": "session-abc-123", # Set session ID for tracking + "Helicone-Session-Path": "parent-trace/child-trace", # Set session path for hierarchical tracking + "Helicone-Omit-Response": "false", # Include response in logging (default behavior) + "Helicone-Omit-Request": "false", # Include request in logging (default behavior) + "Helicone-LLM-Security-Enabled": "true", # Enable LLM security features + "Helicone-Moderations-Enabled": "true", # Enable content moderation + "Helicone-Fallbacks": '["gpt-3.5-turbo", "gpt-4"]', # Set fallback models +} +``` + +### Caching and Rate Limiting + +Enable caching and set up rate limiting policies: + +```python +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API + "Helicone-Cache-Enabled": "true", # Enable caching of responses + "Cache-Control": "max-age=3600", # Set cache limit to 1 hour + "Helicone-RateLimit-Policy": "100;w=3600;s=user", # Set rate limit policy +} +``` + +### Session Tracking and Tracing + +Track multi-step and agentic LLM interactions using session IDs and paths: + +```python +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API + "Helicone-Session-Id": "session-abc-123", # The session ID you want to track + "Helicone-Session-Path": "parent-trace/child-trace", # The path of the session +} +``` + +- `Helicone-Session-Id`: Use this to specify the unique identifier for the session you want to track. This allows you to group related requests together. +- `Helicone-Session-Path`: This header defines the path of the session, allowing you to represent parent and child traces. For example, "parent/child" represents a child trace of a parent trace. + +By using these two headers, you can effectively group and visualize multi-step LLM interactions, gaining insights into complex AI workflows. + +### Retry and Fallback Mechanisms + +Set up retry mechanisms and fallback options: + +```python +litellm.headers = { + "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", # Authenticate to send requests to Helicone API + "Helicone-Retry-Enabled": "true", # Enable retry mechanism + "helicone-retry-num": "3", # Set number of retries + "helicone-retry-factor": "2", # Set exponential backoff factor + "Helicone-Fallbacks": '["gpt-3.5-turbo", "gpt-4"]', # Set fallback models +} +``` + +> **Supported Headers** - For a full list of supported Helicone headers and their descriptions, please refer to the [Helicone documentation](https://docs.helicone.ai/getting-started/quick-start). +> By utilizing these headers and metadata options, you can gain deeper insights into your LLM usage, optimize performance, and better manage your AI workflows with Helicone and LiteLLM. From 0de776bca25c4b42235f76a460d92399f9cd2870 Mon Sep 17 00:00:00 2001 From: colegottdank Date: Mon, 8 Jul 2024 15:30:33 -0700 Subject: [PATCH 4/6] bye palm --- docs/my-website/docs/observability/helicone_integration.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/observability/helicone_integration.md b/docs/my-website/docs/observability/helicone_integration.md index 3eef52092..57e7039fc 100644 --- a/docs/my-website/docs/observability/helicone_integration.md +++ b/docs/my-website/docs/observability/helicone_integration.md @@ -20,9 +20,10 @@ Helicone can log requests across [various LLM providers](https://docs.helicone.a - OpenAI - Azure - Anthropic +- Gemini +- Groq - Cohere - Replicate -- PaLM - And more ### Integration Methods From 705c498641de17225cd7de214881aaa9aca2d870 Mon Sep 17 00:00:00 2001 From: colegottdank Date: Mon, 8 Jul 2024 15:35:50 -0700 Subject: [PATCH 5/6] revert readme --- README.md | 73 +++++++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index a4a55266b..6d26e92c2 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ LiteLLM manages: [**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs)
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs) -🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. +🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+). @@ -120,7 +120,6 @@ from litellm import completion ## set env variables for logging tools os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" -os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["ATHINA_API_KEY"] = "your-athina-api-key" @@ -149,6 +148,7 @@ The proxy provides: ## 📖 Proxy Endpoints - [Swagger Docs](https://litellm-api.up.railway.app/) + ## Quick Start Proxy - CLI ```shell @@ -198,6 +198,7 @@ source .env docker-compose up ``` + UI on `/ui` on your proxy server ![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033) @@ -225,38 +226,38 @@ curl 'http://0.0.0.0:4000/key/generate' \ ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers)) | Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) | -| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- | -| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | | -| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | | -| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | | -| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | | -| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | | -| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | | -| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | | -| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | | -| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | | -| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | | -| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | | -| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | | -| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | | -| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | | -| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | | -| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | | -| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | | -| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | | -| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | | -| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | -| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | -| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | | +|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------| +| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | | +| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | | +| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | | +| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | | +| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | | +| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | | +| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | | +| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | | +| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | | +| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | | +| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | | +| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | | +| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | | +| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | | +| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | | +| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | | +| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | | +| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | | +| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | +| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | +| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | | [**Read the Docs**](https://docs.litellm.ai/docs/) @@ -292,13 +293,11 @@ Step 4: Submit a PR with your changes! 🚀 - submit a PR from there # Enterprise - For companies that need better security, user management and professional support [Talk to founders](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) -This covers: - +This covers: - ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):** - ✅ **Feature Prioritization** - ✅ **Custom Integrations** From f414bafd4a61b5d9e01e9ec82e6795d7871e0a06 Mon Sep 17 00:00:00 2001 From: colegottdank Date: Mon, 8 Jul 2024 15:41:35 -0700 Subject: [PATCH 6/6] Add Helicone --- README.md | 3 ++- docs/my-website/docs/getting_started.md | 2 +- docs/my-website/docs/index.md | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6d26e92c2..b2eaacd54 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ from litellm import completion ## set env variables for logging tools os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" +os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key" os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["ATHINA_API_KEY"] = "your-athina-api-key" @@ -127,7 +128,7 @@ os.environ["ATHINA_API_KEY"] = "your-athina-api-key" os.environ["OPENAI_API_KEY"] # set callbacks -litellm.success_callback = ["lunary", "langfuse", "athina"] # log input/output to lunary, langfuse, supabase, athina etc +litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc #openai call response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) diff --git a/docs/my-website/docs/getting_started.md b/docs/my-website/docs/getting_started.md index c5381ed71..e9b2a0db6 100644 --- a/docs/my-website/docs/getting_started.md +++ b/docs/my-website/docs/getting_started.md @@ -94,7 +94,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["OPENAI_API_KEY"] # set callbacks -litellm.success_callback = ["lunary", "langfuse"] # log input/output to langfuse, lunary, supabase +litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to langfuse, lunary, supabase, helicone #openai call response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) diff --git a/docs/my-website/docs/index.md b/docs/my-website/docs/index.md index 582be1dad..6b472ee6c 100644 --- a/docs/my-website/docs/index.md +++ b/docs/my-website/docs/index.md @@ -318,7 +318,7 @@ os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" os.environ["OPENAI_API_KEY"] # set callbacks -litellm.success_callback = ["lunary", "langfuse"] # log input/output to lunary, langfuse, supabase +litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone #openai call response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])