diff --git a/build/lib/litellm/__init__.py b/build/lib/litellm/__init__.py
index e3d4a8d849..6ae281afc9 100644
--- a/build/lib/litellm/__init__.py
+++ b/build/lib/litellm/__init__.py
@@ -2,7 +2,7 @@ success_callback = []
 failure_callback = []
 set_verbose=False
 telemetry=True
-
+max_tokens = 256 # OpenAI Defaults
 ####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
 api_base = None
 headers = None
diff --git a/build/lib/litellm/main.py b/build/lib/litellm/main.py
index 16faba2bd6..898f40ae41 100644
--- a/build/lib/litellm/main.py
+++ b/build/lib/litellm/main.py
@@ -189,7 +189,7 @@ def completion(
       if max_tokens != float('inf'):
         max_tokens_to_sample = max_tokens
       else:
-        max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
+        max_tokens_to_sample = litellm.max_tokens # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries
       ## LOGGING
       logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
       ## COMPLETION CALL
diff --git a/build/lib/litellm/utils.py b/build/lib/litellm/utils.py
index 67a3d6df74..1cbbb37cfd 100644
--- a/build/lib/litellm/utils.py
+++ b/build/lib/litellm/utils.py
@@ -2,6 +2,7 @@ import dotenv, json, traceback, threading
 import subprocess, os 
 import litellm, openai 
 import random, uuid, requests
+import datetime
 from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
@@ -11,6 +12,7 @@ add_breadcrumb = None
 posthog = None
 slack_app = None
 alerts_channel = None
+heliconeLogger = None
 callback_list = []
 user_logger_fn = None
 additional_details = {}
@@ -68,7 +70,7 @@ def client(original_function):
         global callback_list, add_breadcrumb
         if (len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0) and len(callback_list) == 0: 
           callback_list = list(set(litellm.success_callback + litellm.failure_callback))
-          set_callbacks(callback_list=callback_list)
+          set_callbacks(callback_list=callback_list,)
         if add_breadcrumb:
           add_breadcrumb(
                 category="litellm.llm_call",
@@ -83,9 +85,11 @@ def client(original_function):
         try:
           function_setup(args, kwargs)
           ## MODEL CALL
+          start_time = datetime.datetime.now()
           result = original_function(*args, **kwargs)
+          end_time = datetime.datetime.now()
           ## LOG SUCCESS 
-          my_thread = threading.Thread(target=handle_success, args=(args, kwargs)) # don't interrupt execution of main thread
+          my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread
           my_thread.start()
           return result
         except Exception as e:
@@ -97,7 +101,7 @@ def client(original_function):
 
 ####### HELPER FUNCTIONS ################
 def set_callbacks(callback_list):
-  global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
+  global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger
   try:
     for callback in callback_list:
       if callback == "sentry":
@@ -134,6 +138,10 @@ def set_callbacks(callback_list):
         )
         alerts_channel = os.environ["SLACK_API_CHANNEL"]
         print_verbose(f"Initialized Slack App: {slack_app}")
+      elif callback == "helicone":
+        from .integrations.helicone import HeliconeLogger
+
+        heliconeLogger = HeliconeLogger()
   except:
     pass
 
@@ -200,7 +208,8 @@ def handle_failure(exception, traceback_exception, args, kwargs):
     except:
       pass
 
-def handle_success(*args, **kwargs):
+def handle_success(args, kwargs, result, start_time, end_time):
+  global heliconeLogger
   try:
     success_handler = additional_details.pop("success_handler", None)
     failure_handler = additional_details.pop("failure_handler", None)
@@ -223,6 +232,11 @@ def handle_success(*args, **kwargs):
           for detail in additional_details: 
             slack_msg += f"{detail}: {additional_details[detail]}\n"
           slack_app.client.chat_postMessage(channel=alerts_channel, text=slack_msg)
+        elif callback == "helicone":
+          print_verbose("reaches helicone for logging!")
+          model = args[0] if len(args) > 0 else kwargs["model"]
+          messages = args[1] if len(args) > 1 else kwargs["messages"]
+          heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time)
       except:
         pass
 
diff --git a/dist/litellm-0.1.220-py3-none-any.whl b/dist/litellm-0.1.220-py3-none-any.whl
deleted file mode 100644
index d12f0568cd..0000000000
Binary files a/dist/litellm-0.1.220-py3-none-any.whl and /dev/null differ
diff --git a/dist/litellm-0.1.220.tar.gz b/dist/litellm-0.1.220.tar.gz
deleted file mode 100644
index 63d50fdac4..0000000000
Binary files a/dist/litellm-0.1.220.tar.gz and /dev/null differ
diff --git a/dist/litellm-0.1.222-py3-none-any.whl b/dist/litellm-0.1.222-py3-none-any.whl
new file mode 100644
index 0000000000..c16ca32682
Binary files /dev/null and b/dist/litellm-0.1.222-py3-none-any.whl differ
diff --git a/dist/litellm-0.1.222.tar.gz b/dist/litellm-0.1.222.tar.gz
new file mode 100644
index 0000000000..75f64b0b4b
Binary files /dev/null and b/dist/litellm-0.1.222.tar.gz differ
diff --git a/docs/advanced.md b/docs/advanced.md
index 403e607553..aa3b223896 100644
--- a/docs/advanced.md
+++ b/docs/advanced.md
@@ -1,21 +1,27 @@
-# Advanced - liteLLM client
+# Advanced - Callbacks
 
-## Use liteLLM client to send Output Data to Posthog, Sentry etc
-liteLLM allows you to create `completion_client` and `embedding_client` to send successfull / error LLM API call data to Posthog, Sentry, Slack etc
+## Use Callbacks to send Output Data to Posthog, Sentry etc
+liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for you to send data to a particular provider depending on the status of your responses. 
+
+liteLLM supports: 
+
+- [Helicone](https://docs.helicone.ai/introduction)
+- [Sentry](https://docs.sentry.io/platforms/python/) 
+- [PostHog](https://posthog.com/docs/libraries/python)
+- [Slack](https://slack.dev/bolt-python/concepts)
 
 ### Quick Start
 ```python
-from main import litellm_client
-import os
+from litellm import completion
+
+# set callbacks
+litellm.success_callback=["posthog", "helicone"]
+litellm.failure_callback=["sentry"]
 
 ## set env variables
-os.environ['SENTRY_API_URL'] = ""
+os.environ['SENTRY_API_URL'], os.environ['SENTRY_API_TRACE_RATE']= ""
 os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
-
-# init liteLLM client
-client = litellm_client(success_callback=["posthog"], failure_callback=["sentry", "posthog"])
-completion = client.completion
-embedding = client.embedding
+os.environ["HELICONE_API_KEY"] = "" 
 
 response = completion(model="gpt-3.5-turbo", messages=messages) 
 ```
diff --git a/docs/client_integrations.md b/docs/client_integrations.md
index 83de02a412..a7ebc6969a 100644
--- a/docs/client_integrations.md
+++ b/docs/client_integrations.md
@@ -1,10 +1,11 @@
 # Data Logging Integrations
 
-| Integration     | Required OS Variables                      | How to Use with litellm Client             |
+| Integration     | Required OS Variables                      | How to Use with callbacks             |
 |-----------------|--------------------------------------------|-------------------------------------------|
-| Sentry          | `SENTRY_API_URL`                           | `client = litellm_client(success_callback=["sentry"], failure_callback=["sentry"])`  |
-| Posthog         | `POSTHOG_API_KEY`,<br>`POSTHOG_API_URL`   | `client = litellm_client(success_callback=["posthog"], failure_callback=["posthog"])` |
-| Slack           | `SLACK_API_TOKEN`,<br>`SLACK_API_SECRET`,<br>`SLACK_API_CHANNEL` | `client = litellm_client(success_callback=["slack"], failure_callback=["slack"])`      |
+| Sentry          | `SENTRY_API_URL`                          | `litellm.success_callback=["sentry"], litellm.failure_callback=["sentry"]`  |
+| Posthog         | `POSTHOG_API_KEY`,<br>`POSTHOG_API_URL`   | `litellm.success_callback=["posthog"], litellm.failure_callback=["posthog"]` |
+| Slack           | `SLACK_API_TOKEN`,<br>`SLACK_API_SECRET`,<br>`SLACK_API_CHANNEL` | `litellm.success_callback=["slack"], litellm.failure_callback=["slack"]`      |
+| Helicone           | `HELICONE_API_TOKEN` | `litellm.success_callback=["helicone"]`      |
 
 
 
diff --git a/docs/index.md b/docs/index.md
index b58918f09b..dba0cee0dc 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -4,7 +4,11 @@ a light 100 line package to simplify calling OpenAI, Azure, Cohere, Anthropic AP
 ###### litellm manages:
 * Calling all LLM APIs using the OpenAI format - `completion(model, messages)`
 * Consistent output for all LLM APIs, text responses will always be available at `['choices'][0]['message']['content']`
-* **[Advanced]** Automatically logging your output to Sentry, Posthog, Slack [see liteLLM Client](https://litellm.readthedocs.io/en/latest/advanced/)
+* Consistent Exceptions for all LLM APIs, we map RateLimit, Context Window, and Authentication Error exceptions across all providers to their OpenAI equivalents. [see Code](https://github.com/BerriAI/litellm/blob/ba1079ff6698ef238c5c7f771dd2b698ec76f8d9/litellm/utils.py#L250)
+
+###### observability:
+* Logging - see exactly what the raw model request/response is `completion(.., logger_fn=your_logging_fn)`
+* Callbacks - automatically send your data to Helicone, Sentry, Posthog, Slack - `litellm.success_callbacks`, `litellm.failure_callbacks` [see Callbacks](https://litellm.readthedocs.io/en/latest/advanced/)
 
 ## Quick Start
 Go directly to code: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
diff --git a/docs/supported.md b/docs/supported.md
index e6107d0ac5..692a55e7dc 100644
--- a/docs/supported.md
+++ b/docs/supported.md
@@ -5,6 +5,8 @@
 | Model Name       | Function Call                          | Required OS Variables                |
 |------------------|----------------------------------------|--------------------------------------|
 | gpt-3.5-turbo    | `completion('gpt-3.5-turbo', messages)` | `os.environ['OPENAI_API_KEY']`       |
+| gpt-3.5-turbo-16k    | `completion('gpt-3.5-turbo-16k', messages)` | `os.environ['OPENAI_API_KEY']`       |
+| gpt-3.5-turbo-16k-0613    | `completion('gpt-3.5-turbo-16k-0613', messages)` | `os.environ['OPENAI_API_KEY']`       |
 | gpt-4            | `completion('gpt-4', messages)`         | `os.environ['OPENAI_API_KEY']`       |
 
 ## Azure OpenAI Chat Completion Models
@@ -26,16 +28,11 @@
 |------------------|--------------------------------------------|--------------------------------------|
 | command-nightly  | `completion('command-nightly', messages)` | `os.environ['COHERE_API_KEY']`       |
 
-### OpenRouter Models
 
-| Model Name                       | Function Call                                                        | Required OS Variables                                                     |
-|----------------------------------|----------------------------------------------------------------------|---------------------------------------------------------------------------|
-| google/palm-2-codechat-bison     | `completion('google/palm-2-codechat-bison', messages)`                | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
-| google/palm-2-chat-bison         | `completion('google/palm-2-chat-bison', messages)`                    | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
-| openai/gpt-3.5-turbo             | `completion('openai/gpt-3.5-turbo', messages)`                        | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
-| openai/gpt-3.5-turbo-16k         | `completion('openai/gpt-3.5-turbo-16k', messages)`                    | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
-| openai/gpt-4-32k                 | `completion('openai/gpt-4-32k', messages)`                            | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
-| anthropic/claude-2               | `completion('anthropic/claude-2', messages)`                          | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
-| anthropic/claude-instant-v1      | `completion('anthropic/claude-instant-v1', messages)`                 | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
-| meta-llama/llama-2-13b-chat      | `completion('meta-llama/llama-2-13b-chat', messages)`                 | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
-| meta-llama/llama-2-70b-chat      | `completion('meta-llama/llama-2-70b-chat', messages)`                 | `os.environ['OPENROUTER_API_KEY']`,<br>`os.environ['OR_SITE_URL']`,<br>`os.environ['OR_APP_NAME']` |
+### Anthropic Models
+
+| Model Name       | Function Call                              | Required OS Variables                |
+|------------------|--------------------------------------------|--------------------------------------|
+| claude-instant-1  | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+| claude-v2  | `completion('claude-v2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+
diff --git a/litellm.egg-info/PKG-INFO b/litellm.egg-info/PKG-INFO
index 2e24f886bd..bac726f759 100644
--- a/litellm.egg-info/PKG-INFO
+++ b/litellm.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: litellm
-Version: 0.1.220
+Version: 0.1.222
 Summary: Library to easily interface with LLM API providers
 Author: BerriAI
 License-File: LICENSE
diff --git a/litellm/utils.py b/litellm/utils.py
index 1cbbb37cfd..16fab0aeee 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -112,6 +112,7 @@ def set_callbacks(callback_list):
             subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk'])
             import sentry_sdk
         sentry_sdk_instance = sentry_sdk
+        sentry_trace_rate = os.environ.get("SENTRY_API_TRACE_RATE") if "SENTRY_API_TRACE_RATE" in os.environ else "1.0"
         sentry_sdk_instance.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE")))
         capture_exception = sentry_sdk_instance.capture_exception
         add_breadcrumb = sentry_sdk_instance.add_breadcrumb 
diff --git a/setup.py b/setup.py
index e2d3a25bee..641473e0a4 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 
 setup(
     name='litellm',
-    version='0.1.221',
+    version='0.1.222',
     description='Library to easily interface with LLM API providers',
     author='BerriAI',
     packages=[