forked from phoenix/litellm-mirror
feat: LLMonitor is now Lunary
This commit is contained in:
parent
f90533a3b3
commit
a9648613dc
15 changed files with 633 additions and 548 deletions
|
@ -5,7 +5,6 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
https://github.com/BerriAI/litellm
|
||||
|
||||
|
||||
## **Call 100+ LLMs using the same Input/Output Format**
|
||||
|
||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||
|
@ -13,7 +12,8 @@ https://github.com/BerriAI/litellm
|
|||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||
|
||||
## Basic usage
|
||||
## Basic usage
|
||||
|
||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||
</a>
|
||||
|
@ -21,6 +21,7 @@ https://github.com/BerriAI/litellm
|
|||
```shell
|
||||
pip install litellm
|
||||
```
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="openai" label="OpenAI">
|
||||
|
||||
|
@ -32,7 +33,7 @@ import os
|
|||
os.environ["OPENAI_API_KEY"] = "your-api-key"
|
||||
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}]
|
||||
)
|
||||
```
|
||||
|
@ -48,7 +49,7 @@ import os
|
|||
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
||||
|
||||
response = completion(
|
||||
model="claude-2",
|
||||
model="claude-2",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}]
|
||||
)
|
||||
```
|
||||
|
@ -66,7 +67,7 @@ os.environ["VERTEX_PROJECT"] = "hardy-device-386718"
|
|||
os.environ["VERTEX_LOCATION"] = "us-central1"
|
||||
|
||||
response = completion(
|
||||
model="chat-bison",
|
||||
model="chat-bison",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}]
|
||||
)
|
||||
```
|
||||
|
@ -76,15 +77,15 @@ response = completion(
|
|||
<TabItem value="hugging" label="HuggingFace">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
from litellm import completion
|
||||
import os
|
||||
|
||||
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
||||
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
||||
|
||||
# e.g. Call 'WizardLM/WizardCoder-Python-34B-V1.0' hosted on HF Inference endpoints
|
||||
response = completion(
|
||||
model="huggingface/WizardLM/WizardCoder-Python-34B-V1.0",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
api_base="https://my-endpoint.huggingface.cloud"
|
||||
)
|
||||
|
||||
|
@ -106,25 +107,25 @@ os.environ["AZURE_API_VERSION"] = ""
|
|||
|
||||
# azure call
|
||||
response = completion(
|
||||
"azure/<your_deployment_name>",
|
||||
"azure/<your_deployment_name>",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="ollama" label="Ollama">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
response = completion(
|
||||
model="ollama/llama2",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
model="ollama/llama2",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
api_base="http://localhost:11434"
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="or" label="Openrouter">
|
||||
|
||||
|
@ -133,19 +134,21 @@ from litellm import completion
|
|||
import os
|
||||
|
||||
## set ENV variables
|
||||
os.environ["OPENROUTER_API_KEY"] = "openrouter_api_key"
|
||||
os.environ["OPENROUTER_API_KEY"] = "openrouter_api_key"
|
||||
|
||||
response = completion(
|
||||
model="openrouter/google/palm-2-chat-bison",
|
||||
model="openrouter/google/palm-2-chat-bison",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
## Streaming
|
||||
Set `stream=True` in the `completion` args.
|
||||
|
||||
Set `stream=True` in the `completion` args.
|
||||
<Tabs>
|
||||
<TabItem value="openai" label="OpenAI">
|
||||
|
||||
|
@ -157,7 +160,7 @@ import os
|
|||
os.environ["OPENAI_API_KEY"] = "your-api-key"
|
||||
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
stream=True,
|
||||
)
|
||||
|
@ -174,7 +177,7 @@ import os
|
|||
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
||||
|
||||
response = completion(
|
||||
model="claude-2",
|
||||
model="claude-2",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
stream=True,
|
||||
)
|
||||
|
@ -193,7 +196,7 @@ os.environ["VERTEX_PROJECT"] = "hardy-device-386718"
|
|||
os.environ["VERTEX_LOCATION"] = "us-central1"
|
||||
|
||||
response = completion(
|
||||
model="chat-bison",
|
||||
model="chat-bison",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
stream=True,
|
||||
)
|
||||
|
@ -204,15 +207,15 @@ response = completion(
|
|||
<TabItem value="hugging" label="HuggingFace">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
from litellm import completion
|
||||
import os
|
||||
|
||||
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
||||
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
||||
|
||||
# e.g. Call 'WizardLM/WizardCoder-Python-34B-V1.0' hosted on HF Inference endpoints
|
||||
response = completion(
|
||||
model="huggingface/WizardLM/WizardCoder-Python-34B-V1.0",
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||
api_base="https://my-endpoint.huggingface.cloud",
|
||||
stream=True,
|
||||
)
|
||||
|
@ -235,7 +238,7 @@ os.environ["AZURE_API_VERSION"] = ""
|
|||
|
||||
# azure call
|
||||
response = completion(
|
||||
"azure/<your_deployment_name>",
|
||||
"azure/<your_deployment_name>",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
stream=True,
|
||||
)
|
||||
|
@ -243,19 +246,19 @@ response = completion(
|
|||
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem value="ollama" label="Ollama">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
response = completion(
|
||||
model="ollama/llama2",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
model="ollama/llama2",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
api_base="http://localhost:11434",
|
||||
stream=True,
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="or" label="Openrouter">
|
||||
|
||||
|
@ -264,60 +267,64 @@ from litellm import completion
|
|||
import os
|
||||
|
||||
## set ENV variables
|
||||
os.environ["OPENROUTER_API_KEY"] = "openrouter_api_key"
|
||||
os.environ["OPENROUTER_API_KEY"] = "openrouter_api_key"
|
||||
|
||||
response = completion(
|
||||
model="openrouter/google/palm-2-chat-bison",
|
||||
model="openrouter/google/palm-2-chat-bison",
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||
stream=True,
|
||||
)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
## Exception handling
|
||||
## Exception handling
|
||||
|
||||
LiteLLM maps exceptions across all supported providers to the OpenAI exceptions. All our exceptions inherit from OpenAI's exception types, so any error-handling you have for that, should work out of the box with LiteLLM.
|
||||
LiteLLM maps exceptions across all supported providers to the OpenAI exceptions. All our exceptions inherit from OpenAI's exception types, so any error-handling you have for that, should work out of the box with LiteLLM.
|
||||
|
||||
```python
|
||||
```python
|
||||
from openai.error import OpenAIError
|
||||
from litellm import completion
|
||||
|
||||
os.environ["ANTHROPIC_API_KEY"] = "bad-key"
|
||||
try:
|
||||
# some code
|
||||
try:
|
||||
# some code
|
||||
completion(model="claude-instant-1", messages=[{"role": "user", "content": "Hey, how's it going?"}])
|
||||
except OpenAIError as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
|
||||
|
||||
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
## set env variables for logging tools
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
||||
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||
|
||||
os.environ["OPENAI_API_KEY"]
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase
|
||||
litellm.success_callback = ["langfuse", "lunary"] # log input/output to lunary, langfuse, supabase
|
||||
|
||||
#openai call
|
||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||
```
|
||||
|
||||
## Track Costs, Usage, Latency for streaming
|
||||
|
||||
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
|
||||
|
||||
```python
|
||||
import litellm
|
||||
|
||||
# track_cost_callback
|
||||
# track_cost_callback
|
||||
def track_cost_callback(
|
||||
kwargs, # kwargs to completion
|
||||
completion_response, # response from completion
|
||||
|
@ -328,7 +335,7 @@ def track_cost_callback(
|
|||
print("streaming response_cost", response_cost)
|
||||
except:
|
||||
pass
|
||||
# set callback
|
||||
# set callback
|
||||
litellm.success_callback = [track_cost_callback] # set custom callback function
|
||||
|
||||
# litellm.completion() call
|
||||
|
@ -346,11 +353,12 @@ response = completion(
|
|||
|
||||
## OpenAI Proxy
|
||||
|
||||
Track spend across multiple projects/people
|
||||
Track spend across multiple projects/people
|
||||
|
||||

|
||||
|
||||
The proxy provides:
|
||||
The proxy provides:
|
||||
|
||||
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
||||
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
||||
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
||||
|
@ -358,13 +366,14 @@ The proxy provides:
|
|||
|
||||
### 📖 Proxy Endpoints - [Swagger Docs](https://litellm-api.up.railway.app/)
|
||||
|
||||
### Quick Start Proxy - CLI
|
||||
### Quick Start Proxy - CLI
|
||||
|
||||
```shell
|
||||
pip install 'litellm[proxy]'
|
||||
```
|
||||
|
||||
#### Step 1: Start litellm proxy
|
||||
|
||||
```shell
|
||||
$ litellm --model huggingface/bigcode/starcoder
|
||||
|
||||
|
@ -372,6 +381,7 @@ $ litellm --model huggingface/bigcode/starcoder
|
|||
```
|
||||
|
||||
#### Step 2: Make ChatCompletions Request to Proxy
|
||||
|
||||
```python
|
||||
import openai # openai v1.0.0+
|
||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
||||
|
@ -387,6 +397,7 @@ print(response)
|
|||
```
|
||||
|
||||
## More details
|
||||
* [exception mapping](./exception_mapping.md)
|
||||
* [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
||||
* [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
||||
|
||||
- [exception mapping](./exception_mapping.md)
|
||||
- [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
||||
- [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
||||
|
|
|
@ -6,7 +6,7 @@ liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for
|
|||
|
||||
liteLLM supports:
|
||||
|
||||
- [LLMonitor](https://llmonitor.com/docs)
|
||||
- [Lunary](https://lunary.ai/docs)
|
||||
- [Helicone](https://docs.helicone.ai/introduction)
|
||||
- [Sentry](https://docs.sentry.io/platforms/python/)
|
||||
- [PostHog](https://posthog.com/docs/libraries/python)
|
||||
|
@ -18,8 +18,8 @@ liteLLM supports:
|
|||
from litellm import completion
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback=["posthog", "helicone", "llmonitor"]
|
||||
litellm.failure_callback=["sentry", "llmonitor"]
|
||||
litellm.success_callback=["posthog", "helicone", "lunary"]
|
||||
litellm.failure_callback=["sentry", "lunary"]
|
||||
|
||||
## set env variables
|
||||
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue