diff --git a/build/lib/litellm/__init__.py b/build/lib/litellm/__init__.py index e3d4a8d849..6ae281afc9 100644 --- a/build/lib/litellm/__init__.py +++ b/build/lib/litellm/__init__.py @@ -2,7 +2,7 @@ success_callback = [] failure_callback = [] set_verbose=False telemetry=True - +max_tokens = 256 # OpenAI Defaults ####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone api_base = None headers = None diff --git a/build/lib/litellm/main.py b/build/lib/litellm/main.py index 16faba2bd6..898f40ae41 100644 --- a/build/lib/litellm/main.py +++ b/build/lib/litellm/main.py @@ -189,7 +189,7 @@ def completion( if max_tokens != float('inf'): max_tokens_to_sample = max_tokens else: - max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries + max_tokens_to_sample = litellm.max_tokens # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries ## LOGGING logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) ## COMPLETION CALL diff --git a/build/lib/litellm/utils.py b/build/lib/litellm/utils.py index 67a3d6df74..1cbbb37cfd 100644 --- a/build/lib/litellm/utils.py +++ b/build/lib/litellm/utils.py @@ -2,6 +2,7 @@ import dotenv, json, traceback, threading import subprocess, os import litellm, openai import random, uuid, requests +import datetime from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError ####### ENVIRONMENT VARIABLES ################### dotenv.load_dotenv() # Loading env variables using dotenv @@ -11,6 +12,7 @@ add_breadcrumb = None posthog = None slack_app = None alerts_channel = None +heliconeLogger = None callback_list = [] user_logger_fn = None additional_details = {} @@ -68,7 +70,7 @@ def client(original_function): global callback_list, add_breadcrumb if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: callback_list = list(set(litellm.success_callback + litellm.failure_callback)) - set_callbacks(callback_list=callback_list) + set_callbacks(callback_list=callback_list,) if add_breadcrumb: add_breadcrumb( category="litellm.llm_call", @@ -83,9 +85,11 @@ def client(original_function): try: function_setup(args, kwargs) ## MODEL CALL + start_time = datetime.datetime.now() result = original_function(*args, **kwargs) + end_time = datetime.datetime.now() ## LOG SUCCESS - my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread + my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread my_thread.start() return result except Exception as e: @@ -97,7 +101,7 @@ def client(original_function): ####### HELPER FUNCTIONS ################ def set_callbacks(callback_list): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger try: for callback in callback_list: if callback == "sentry": @@ -134,6 +138,10 @@ def set_callbacks(callback_list): ) alerts_channel = os.environ["SLACK_API_CHANNEL"] print_verbose(f"Initialized Slack App: {slack_app}") + elif callback == "helicone": + from .integrations.helicone import HeliconeLogger + + heliconeLogger = HeliconeLogger() except: pass @@ -200,7 +208,8 @@ def handle_failure(exception, traceback_exception, args, kwargs): except: pass -def handle_success(*args, **kwargs): +def handle_success(args, kwargs, result, start_time, end_time): + global heliconeLogger try: success_handler = additional_details.pop("success_handler", None) failure_handler = additional_details.pop("failure_handler", None) @@ -223,6 +232,11 @@ def handle_success(*args, **kwargs): for detail in additional_details: slack_msg += f"{detail}: {additional_details[detail]}\n" slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg) + elif callback == "helicone": + print_verbose("reaches helicone for logging!") + model = args[0] if len(args) > 0 else kwargs["model"] + messages = args[1] if len(args) > 1 else kwargs["messages"] + heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time) except: pass diff --git a/dist/litellm-0.1.220-py3-none-any.whl b/dist/litellm-0.1.220-py3-none-any.whl deleted file mode 100644 index d12f0568cd..0000000000 Binary files a/dist/litellm-0.1.220-py3-none-any.whl and /dev/null differ diff --git a/dist/litellm-0.1.220.tar.gz b/dist/litellm-0.1.220.tar.gz deleted file mode 100644 index 63d50fdac4..0000000000 Binary files a/dist/litellm-0.1.220.tar.gz and /dev/null differ diff --git a/dist/litellm-0.1.222-py3-none-any.whl b/dist/litellm-0.1.222-py3-none-any.whl new file mode 100644 index 0000000000..c16ca32682 Binary files /dev/null and b/dist/litellm-0.1.222-py3-none-any.whl differ diff --git a/dist/litellm-0.1.222.tar.gz b/dist/litellm-0.1.222.tar.gz new file mode 100644 index 0000000000..75f64b0b4b Binary files /dev/null and b/dist/litellm-0.1.222.tar.gz differ diff --git a/docs/advanced.md b/docs/advanced.md index 403e607553..aa3b223896 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -1,21 +1,27 @@ -# Advanced - liteLLM client +# Advanced - Callbacks -## Use liteLLM client to send Output Data to Posthog, Sentry etc -liteLLM allows you to create `completion_client` and `embedding_client` to send successfull / error LLM API call data to Posthog, Sentry, Slack etc +## Use Callbacks to send Output Data to Posthog, Sentry etc +liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. + +liteLLM supports: + +- [Helicone](https://docs.helicone.ai/introduction) +- [Sentry](https://docs.sentry.io/platforms/python/) +- [PostHog](https://posthog.com/docs/libraries/python) +- [Slack](https://slack.dev/bolt-python/concepts) ### Quick Start ```python -from main import litellm_client -import os +from litellm import completion + +# set callbacks +litellm.success_callback=["posthog", "helicone"] +litellm.failure_callback=["sentry"] ## set env variables -os.environ['SENTRY_API_URL'] = "" +os.environ['SENTRY_API_URL'], os.environ['SENTRY_API_TRACE_RATE']= "" os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url" - -# init liteLLM client -client = litellm_client(success_callback=["posthog"], failure_callback=["sentry", "posthog"]) -completion = client.completion -embedding = client.embedding +os.environ["HELICONE_API_KEY"] = "" response = completion(model="gpt-3.5-turbo", messages=messages) ``` diff --git a/docs/client_integrations.md b/docs/client_integrations.md index 83de02a412..a7ebc6969a 100644 --- a/docs/client_integrations.md +++ b/docs/client_integrations.md @@ -1,10 +1,11 @@ # Data Logging Integrations -| Integration | Required OS Variables | How to Use with litellm Client | +| Integration | Required OS Variables | How to Use with callbacks | |-----------------|--------------------------------------------|-------------------------------------------| -| Sentry | `SENTRY_API_URL` | `client = litellm_client(success_callback=["sentry"], failure_callback=["sentry"])` | -| Posthog | `POSTHOG_API_KEY`,
`POSTHOG_API_URL` | `client = litellm_client(success_callback=["posthog"], failure_callback=["posthog"])` | -| Slack | `SLACK_API_TOKEN`,
`SLACK_API_SECRET`,
`SLACK_API_CHANNEL` | `client = litellm_client(success_callback=["slack"], failure_callback=["slack"])` | +| Sentry | `SENTRY_API_URL` | `litellm.success_callback=["sentry"], litellm.failure_callback=["sentry"]` | +| Posthog | `POSTHOG_API_KEY`,
`POSTHOG_API_URL` | `litellm.success_callback=["posthog"], litellm.failure_callback=["posthog"]` | +| Slack | `SLACK_API_TOKEN`,
`SLACK_API_SECRET`,
`SLACK_API_CHANNEL` | `litellm.success_callback=["slack"], litellm.failure_callback=["slack"]` | +| Helicone | `HELICONE_API_TOKEN` | `litellm.success_callback=["helicone"]` | diff --git a/docs/index.md b/docs/index.md index b58918f09b..dba0cee0dc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,11 @@ a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic AP ###### litellm manages: * Calling all LLM APIs using the OpenAI format - `completion(model, messages)` * Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']` -* **[Advanced]** Automatically logging your output to Sentry, Posthog, Slack [see liteLLM Client](https://litellm.readthedocs.io/en/latest/advanced/) +* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250) + +###### observability: +* Logging - see exactly what the raw model request/response is `completion(.., logger_fn=your_logging_fn)` +* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/) ## Quick Start Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing) diff --git a/docs/supported.md b/docs/supported.md index e6107d0ac5..692a55e7dc 100644 --- a/docs/supported.md +++ b/docs/supported.md @@ -5,6 +5,8 @@ | Model Name | Function Call | Required OS Variables | |------------------|----------------------------------------|--------------------------------------| | gpt-3.5-turbo | `completion('gpt-3.5-turbo', messages)` | `os.environ['OPENAI_API_KEY']` | +| gpt-3.5-turbo-16k | `completion('gpt-3.5-turbo-16k', messages)` | `os.environ['OPENAI_API_KEY']` | +| gpt-3.5-turbo-16k-0613 | `completion('gpt-3.5-turbo-16k-0613', messages)` | `os.environ['OPENAI_API_KEY']` | | gpt-4 | `completion('gpt-4', messages)` | `os.environ['OPENAI_API_KEY']` | ## Azure OpenAI Chat Completion Models @@ -26,16 +28,11 @@ |------------------|--------------------------------------------|--------------------------------------| | command-nightly | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']` | -### OpenRouter Models -| Model Name | Function Call | Required OS Variables | -|----------------------------------|----------------------------------------------------------------------|---------------------------------------------------------------------------| -| google/palm-2-codechat-bison | `completion('google/palm-2-codechat-bison', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | -| google/palm-2-chat-bison | `completion('google/palm-2-chat-bison', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | -| openai/gpt-3.5-turbo | `completion('openai/gpt-3.5-turbo', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | -| openai/gpt-3.5-turbo-16k | `completion('openai/gpt-3.5-turbo-16k', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | -| openai/gpt-4-32k | `completion('openai/gpt-4-32k', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | -| anthropic/claude-2 | `completion('anthropic/claude-2', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | -| anthropic/claude-instant-v1 | `completion('anthropic/claude-instant-v1', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | -| meta-llama/llama-2-13b-chat | `completion('meta-llama/llama-2-13b-chat', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | -| meta-llama/llama-2-70b-chat | `completion('meta-llama/llama-2-70b-chat', messages)` | `os.environ['OPENROUTER_API_KEY']`,
`os.environ['OR_SITE_URL']`,
`os.environ['OR_APP_NAME']` | +### Anthropic Models + +| Model Name | Function Call | Required OS Variables | +|------------------|--------------------------------------------|--------------------------------------| +| claude-instant-1 | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']` | +| claude-v2 | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']` | + diff --git a/litellm.egg-info/PKG-INFO b/litellm.egg-info/PKG-INFO index 2e24f886bd..bac726f759 100644 --- a/litellm.egg-info/PKG-INFO +++ b/litellm.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: litellm -Version: 0.1.220 +Version: 0.1.222 Summary: Library to easily interface with LLM API providers Author: BerriAI License-File: LICENSE diff --git a/litellm/utils.py b/litellm/utils.py index 1cbbb37cfd..16fab0aeee 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -112,6 +112,7 @@ def set_callbacks(callback_list): subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk']) import sentry_sdk sentry_sdk_instance = sentry_sdk + sentry_trace_rate = os.environ.get("SENTRY_API_TRACE_RATE") if "SENTRY_API_TRACE_RATE" in os.environ else "1.0" sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) capture_exception = sentry_sdk_instance.capture_exception add_breadcrumb = sentry_sdk_instance.add_breadcrumb diff --git a/setup.py b/setup.py index e2d3a25bee..641473e0a4 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='litellm', - version='0.1.221', + version='0.1.222', description='Library to easily interface with LLM API providers', author='BerriAI', packages=[