diff --git a/README.md b/README.md index 11fc9cbcba..40aa4fb2e1 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ messages = [{ "content": "Hello, how are you?","role": "user"}] response = completion(model="gpt-3.5-turbo", messages=messages) # cohere call -response = completion("command-nightly", messages) +response = completion(model="command-nightly", messages) ``` Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing) diff --git a/docs/my-website/docs/index.md b/docs/my-website/docs/index.md index 57d23215d4..b0b8b4c3e6 100644 --- a/docs/my-website/docs/index.md +++ b/docs/my-website/docs/index.md @@ -1,4 +1,4 @@ -# *🚅 litellm* +# litellm [![PyPI Version](https://img.shields.io/pypi/v/litellm.svg)](https://pypi.org/project/litellm/) [![PyPI Version](https://img.shields.io/badge/stable%20version-v0.1.345-blue?color=green&link=https://pypi.org/project/litellm/0.1.1/)](https://pypi.org/project/litellm/0.1.1/) [![CircleCI](https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main) diff --git a/docs/my-website/docs/observability/supabase_integration.md b/docs/my-website/docs/observability/supabase_integration.md index 6ae4f65dae..d9fbc2b5ac 100644 --- a/docs/my-website/docs/observability/supabase_integration.md +++ b/docs/my-website/docs/observability/supabase_integration.md @@ -22,11 +22,13 @@ create table messages json null default '{}'::json, response json null default '{}'::json, end_user text null default ''::text, + status text null default ''::text, error json null default '{}'::json, response_time real null default '0'::real, total_cost real null, additional_details json null default '{}'::json, - constraint request_logs_pkey primary key (id) + litellm_call_id text unique, + primary key (id) ) tablespace pg_default; ``` diff --git a/docs/my-website/docusaurus.config.js b/docs/my-website/docusaurus.config.js index 81d1cc8d48..4af0e6f3b2 100644 --- a/docs/my-website/docusaurus.config.js +++ b/docs/my-website/docusaurus.config.js @@ -8,7 +8,7 @@ const darkCodeTheme = require('prism-react-renderer/themes/dracula'); const config = { title: 'liteLLM', tagline: 'Simplify LLM API Calls', - favicon: 'static/img/favicon.ico', + favicon: '/img/favicon.ico', // Set the production url of your site here url: 'https://litellm.vercel.app/', diff --git a/litellm/__init__.py b/litellm/__init__.py index 688cd084fd..7cbb0e9963 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1,6 +1,6 @@ import threading from typing import Callable, List, Optional - +input_callback: List[str] = [] success_callback: List[str] = [] failure_callback: List[str] = [] set_verbose = False @@ -216,7 +216,6 @@ from .timeout import timeout from .testing import * from .utils import ( client, - logging, exception_type, get_optional_params, modify_integration, @@ -224,6 +223,7 @@ from .utils import ( cost_per_token, completion_cost, get_litellm_params, + Logging ) from .main import * # type: ignore from .integrations import * diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 991b3d6358..c998bff4ac 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 7c1a53cf54..e35b110bc1 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/timeout.cpython-311.pyc b/litellm/__pycache__/timeout.cpython-311.pyc index 09f9769939..68f0223aaa 100644 Binary files a/litellm/__pycache__/timeout.cpython-311.pyc and b/litellm/__pycache__/timeout.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index b872895f98..67de95852b 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/__init__.cpython-311.pyc b/litellm/integrations/__pycache__/__init__.cpython-311.pyc index e951c50a33..5bd4bfeb85 100644 Binary files a/litellm/integrations/__pycache__/__init__.cpython-311.pyc and b/litellm/integrations/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/aispend.cpython-311.pyc b/litellm/integrations/__pycache__/aispend.cpython-311.pyc index 9e2d468cb3..111b4eba1b 100644 Binary files a/litellm/integrations/__pycache__/aispend.cpython-311.pyc and b/litellm/integrations/__pycache__/aispend.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/berrispend.cpython-311.pyc b/litellm/integrations/__pycache__/berrispend.cpython-311.pyc index 87b3f5e36e..ccb4bb900f 100644 Binary files a/litellm/integrations/__pycache__/berrispend.cpython-311.pyc and b/litellm/integrations/__pycache__/berrispend.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/helicone.cpython-311.pyc b/litellm/integrations/__pycache__/helicone.cpython-311.pyc index 03de753b4e..972c339ed3 100644 Binary files a/litellm/integrations/__pycache__/helicone.cpython-311.pyc and b/litellm/integrations/__pycache__/helicone.cpython-311.pyc differ diff --git a/litellm/integrations/__pycache__/supabase.cpython-311.pyc b/litellm/integrations/__pycache__/supabase.cpython-311.pyc index c3f60037e5..43b7b234c0 100644 Binary files a/litellm/integrations/__pycache__/supabase.cpython-311.pyc and b/litellm/integrations/__pycache__/supabase.cpython-311.pyc differ diff --git a/litellm/integrations/supabase.py b/litellm/integrations/supabase.py index d27277589a..edc97b6a30 100644 --- a/litellm/integrations/supabase.py +++ b/litellm/integrations/supabase.py @@ -144,6 +144,28 @@ class Supabase: ) return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose): + try: + print_verbose( + f"Supabase Logging - Enters input logging function for model {model}" + ) + supabase_data_obj = { + "model": model, + "messages": messages, + "end_user": end_user, + "status": "initiated", + "litellm_call_id": litellm_call_id + } + data, count = ( + self.supabase_client.table(self.supabase_table_name) + .insert(supabase_data_obj) + .execute() + ) + print(f"data: {data}") + pass + except: + pass + def log_event( self, model, @@ -152,6 +174,7 @@ class Supabase: response_obj, start_time, end_time, + litellm_call_id, print_verbose, ): try: @@ -176,16 +199,20 @@ class Supabase: "messages": messages, "response": response_obj["choices"][0]["message"]["content"], "end_user": end_user, + "litellm_call_id": litellm_call_id, + "status": "success" } print_verbose( f"Supabase Logging - final data object: {supabase_data_obj}" ) data, count = ( self.supabase_client.table(self.supabase_table_name) - .insert(supabase_data_obj) + .upsert(supabase_data_obj) .execute() ) elif "error" in response_obj: + if "Unable to map your input to a model." in response_obj["error"]: + total_cost = 0 supabase_data_obj = { "response_time": response_time, "model": response_obj["model"], @@ -193,13 +220,15 @@ class Supabase: "messages": messages, "error": response_obj["error"], "end_user": end_user, + "litellm_call_id": litellm_call_id, + "status": "failure" } print_verbose( f"Supabase Logging - final data object: {supabase_data_obj}" ) data, count = ( self.supabase_client.table(self.supabase_table_name) - .insert(supabase_data_obj) + .upsert(supabase_data_obj) .execute() ) diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 5ebbc640a6..fecc655f2f 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -1,7 +1,6 @@ import os, json from enum import Enum import requests -from litellm import logging import time from typing import Callable from litellm.utils import ModelResponse @@ -22,11 +21,12 @@ class AnthropicError(Exception): class AnthropicLLM: - def __init__(self, encoding, default_max_tokens_to_sample, api_key=None): + def __init__(self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None): self.encoding = encoding self.default_max_tokens_to_sample = default_max_tokens_to_sample self.completion_url = "https://api.anthropic.com/v1/complete" self.api_key = api_key + self.logging_obj = logging_obj self.validate_environment(api_key=api_key) def validate_environment( @@ -84,15 +84,7 @@ class AnthropicLLM: } ## LOGGING - logging( - model=model, - input=prompt, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - }, - logger_fn=logger_fn, - ) + self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data}) ## COMPLETION CALL response = requests.post( self.completion_url, headers=self.headers, data=json.dumps(data) @@ -101,16 +93,7 @@ class AnthropicLLM: return response.iter_lines() else: ## LOGGING - logging( - model=model, - input=prompt, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - "original_response": response.text, - }, - logger_fn=logger_fn, - ) + self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data}) print_verbose(f"raw model_response: {response.text}") ## RESPONSE OBJECT completion_response = response.json() diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 709c2347eb..624fb4f055 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -2,7 +2,6 @@ import os, json from enum import Enum import requests -from litellm import logging import time from typing import Callable from litellm.utils import ModelResponse @@ -19,8 +18,9 @@ class HuggingfaceError(Exception): class HuggingfaceRestAPILLM: - def __init__(self, encoding, api_key=None) -> None: + def __init__(self, encoding, logging_obj, api_key=None) -> None: self.encoding = encoding + self.logging_obj = logging_obj self.validate_environment(api_key=api_key) def validate_environment( @@ -74,18 +74,10 @@ class HuggingfaceRestAPILLM: optional_params["max_new_tokens"] = value data = { "inputs": prompt, - # "parameters": optional_params + "parameters": optional_params } ## LOGGING - logging( - model=model, - input=prompt, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - }, - logger_fn=logger_fn, - ) + self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data}) ## COMPLETION CALL response = requests.post( completion_url, headers=self.headers, data=json.dumps(data) @@ -94,17 +86,7 @@ class HuggingfaceRestAPILLM: return response.iter_lines() else: ## LOGGING - logging( - model=model, - input=prompt, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - "original_response": response.text, - }, - logger_fn=logger_fn, - ) - print_verbose(f"raw model_response: {response.text}") + self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data}) ## RESPONSE OBJECT completion_response = response.json() print_verbose(f"response: {completion_response}") diff --git a/litellm/main.py b/litellm/main.py index a8f7fbd5e0..ea2dd9f255 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -6,11 +6,11 @@ from copy import deepcopy import litellm from litellm import ( # type: ignore client, - logging, exception_type, timeout, get_optional_params, get_litellm_params, + Logging ) from litellm.utils import ( get_secret, @@ -85,6 +85,7 @@ def completion( azure=False, custom_llm_provider=None, custom_api_base=None, + litellm_call_id=None, # model specific optional params # used by text-bison only top_k=40, @@ -94,6 +95,11 @@ def completion( model_response = ModelResponse() if azure: # this flag is deprecated, remove once notebooks are also updated. custom_llm_provider = "azure" + elif model.split("/", 1)[0] in litellm.provider_list: # allow custom provider to be passed in via the model name "azure/chatgpt-test" + custom_llm_provider = model.split("/", 1)[0] + model = model.split("/", 1)[1] + if "replicate" == custom_llm_provider and "/" not in model: # handle the "replicate/llama2..." edge-case + model = custom_llm_provider + "/" + model args = locals() # check if user passed in any of the OpenAI optional params optional_params = get_optional_params( @@ -124,8 +130,9 @@ def completion( verbose=verbose, custom_llm_provider=custom_llm_provider, custom_api_base=custom_api_base, + litellm_call_id=litellm_call_id ) - + logging = Logging(model=model, messages=messages, optional_params=optional_params, litellm_params=litellm_params) if custom_llm_provider == "azure": # azure configs openai.api_type = "azure" @@ -139,16 +146,14 @@ def completion( if litellm.api_version is not None else get_secret("AZURE_API_VERSION") ) + if not api_key and litellm.azure_key: + api_key = litellm.azure_key + elif not api_key and get_secret("AZURE_API_KEY"): + api_key = get_secret("AZURE_API_KEY") # set key - openai.api_key = api_key or litellm.azure_key or get_secret("AZURE_API_KEY") + openai.api_key = api_key ## LOGGING - logging( - model=model, - input=messages, - additional_args=optional_params, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=messages, api_key=openai.api_key, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base}) ## COMPLETION CALL if litellm.headers: response = openai.ChatCompletion.create( @@ -161,6 +166,8 @@ def completion( response = openai.ChatCompletion.create( model=model, messages=messages, **optional_params ) + ## LOGGING + logging.post_call(input=messages, api_key=openai.api_key, original_response=response, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base}) elif ( model in litellm.open_ai_chat_completion_models or custom_llm_provider == "custom_openai" @@ -177,18 +184,15 @@ def completion( if litellm.organization: openai.organization = litellm.organization # set API KEY - openai.api_key = ( - api_key or litellm.openai_key or get_secret("OPENAI_API_KEY") - ) + if not api_key and litellm.openai_key: + api_key = litellm.openai_key + elif not api_key and get_secret("AZURE_API_KEY"): + api_key = get_secret("OPENAI_API_KEY") + + openai.api_key = api_key ## LOGGING - logging( - model=model, - input=messages, - additional_args=args, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=messages, api_key=api_key, additional_args={"headers": litellm.headers, "api_base": api_base}) ## COMPLETION CALL if litellm.headers: response = openai.ChatCompletion.create( @@ -201,6 +205,8 @@ def completion( response = openai.ChatCompletion.create( model=model, messages=messages, **optional_params ) + ## LOGGING + logging.post_call(input=messages, api_key=api_key, original_response=response, additional_args={"headers": litellm.headers}) elif model in litellm.open_ai_text_completion_models: openai.api_type = "openai" openai.api_base = ( @@ -209,20 +215,19 @@ def completion( else "https://api.openai.com/v1" ) openai.api_version = None - openai.api_key = ( - api_key or litellm.openai_key or get_secret("OPENAI_API_KEY") - ) + # set API KEY + if not api_key and litellm.openai_key: + api_key = litellm.openai_key + elif not api_key and get_secret("AZURE_API_KEY"): + api_key = get_secret("OPENAI_API_KEY") + + openai.api_key = api_key + if litellm.organization: openai.organization = litellm.organization prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - additional_args=optional_params, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=api_key, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type}) ## COMPLETION CALL if litellm.headers: response = openai.Completion.create( @@ -232,19 +237,10 @@ def completion( ) else: response = openai.Completion.create(model=model, prompt=prompt) - completion_response = response["choices"][0]["text"] ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=api_key, original_response=response, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type}) ## RESPONSE OBJECT + completion_response = response["choices"][0]["text"] model_response["choices"][0]["message"]["content"] = completion_response model_response["created"] = response["created"] model_response["model"] = model @@ -273,13 +269,7 @@ def completion( input["max_length"] = max_tokens # for t5 models input["max_new_tokens"] = max_tokens # for llama2 models ## LOGGING - logging( - model=model, - input=input, - custom_llm_provider=custom_llm_provider, - additional_args={"max_tokens": max_tokens}, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=replicate_key, additional_args={"complete_input_dict": input, "max_tokens": max_tokens}) ## COMPLETION CALL output = replicate.run(model, input=input) if "stream" in optional_params and optional_params["stream"] == True: @@ -292,16 +282,8 @@ def completion( response += item completion_response = response ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=replicate_key, original_response=completion_response, additional_args={"complete_input_dict": input, "max_tokens": max_tokens}) + ## USAGE prompt_tokens = len(encoding.encode(prompt)) completion_tokens = len(encoding.encode(completion_response)) ## RESPONSE OBJECT @@ -322,6 +304,7 @@ def completion( encoding=encoding, default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key, + logging_obj = logging # model call logging done inside the class as we make need to modify I/O to fit anthropic's requirements ) model_response = anthropic_client.completion( model=model, @@ -357,13 +340,7 @@ def completion( "OR_API_KEY" ) ## LOGGING - logging( - model=model, - input=messages, - additional_args=optional_params, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=messages, api_key=openai.api_key) ## COMPLETION CALL if litellm.headers: response = openai.ChatCompletion.create( @@ -390,6 +367,8 @@ def completion( }, **optional_params, ) + ## LOGGING + logging.post_call(input=messages, api_key=openai.api_key, original_response=response) elif model in litellm.cohere_models: # import cohere/if it fails then pip install cohere install_and_import("cohere") @@ -404,31 +383,17 @@ def completion( co = cohere.Client(cohere_key) prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=cohere_key) ## COMPLETION CALL response = co.generate(model=model, prompt=prompt, **optional_params) if "stream" in optional_params and optional_params["stream"] == True: # don't try to access stream object, response = CustomStreamWrapper(response, model) return response - - completion_response = response[0].text ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=cohere_key, original_response=response) + ## USAGE + completion_response = response[0].text prompt_tokens = len(encoding.encode(prompt)) completion_tokens = len(encoding.encode(completion_response)) ## RESPONSE OBJECT @@ -452,7 +417,7 @@ def completion( or os.environ.get("HUGGINGFACE_API_KEY") ) huggingface_client = HuggingfaceRestAPILLM( - encoding=encoding, api_key=huggingface_key + encoding=encoding, api_key=huggingface_key, logging_obj=logging ) model_response = huggingface_client.completion( model=model, @@ -487,12 +452,7 @@ def completion( ) # TODO: Add chat support for together AI ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=TOGETHER_AI_TOKEN) if stream == True: return together_ai_completion_streaming( { @@ -514,17 +474,7 @@ def completion( headers=headers, ) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": res.text, - }, - logger_fn=logger_fn, - ) - + logging.post_call(input=prompt, api_key=TOGETHER_AI_TOKEN, original_response=res.text) # make this safe for reading, if output does not exist raise an error json_response = res.json() if "output" not in json_response: @@ -557,16 +507,7 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "litellm_params": litellm_params, - "optional_params": optional_params, - }, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=None) chat_model = ChatModel.from_pretrained(model) @@ -574,16 +515,7 @@ def completion( completion_response = chat.send_message(prompt, **optional_params) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=None, original_response=completion_response) ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = completion_response @@ -602,27 +534,13 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=None) + vertex_model = TextGenerationModel.from_pretrained(model) completion_response = vertex_model.predict(prompt, **optional_params) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) - + logging.post_call(input=prompt, api_key=None, original_response=completion_response) ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = completion_response model_response["created"] = time.time() @@ -636,12 +554,7 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=ai21.api_key) ai21_response = ai21.Completion.execute( model=model, @@ -650,16 +563,7 @@ def completion( completion_response = ai21_response["completions"][0]["data"]["text"] ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=ai21.api_key, original_response=completion_response) ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = completion_response @@ -673,7 +577,8 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + logging.pre_call(input=prompt, api_key=None, additional_args={"endpoint": endpoint}) + generator = get_ollama_response_stream(endpoint, model, prompt) # assume all responses are streamed return generator @@ -688,12 +593,7 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=base_ten_key) base_ten__model = baseten.deployed_model_version_id(model) @@ -703,16 +603,8 @@ def completion( if type(completion_response) == dict: completion_response = completion_response["generated_text"] - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": completion_response, - }, - logger_fn=logger_fn, - ) + ## LOGGING + logging.post_call(input=prompt, api_key=base_ten_key, original_response=completion_response) ## RESPONSE OBJECT model_response["choices"][0]["message"]["content"] = completion_response @@ -729,26 +621,14 @@ def completion( prompt = " ".join([message["content"] for message in messages]) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) + logging.pre_call(input=prompt, api_key=None, additional_args={"url": url, "max_new_tokens": 100}) + response = requests.post( url, data={"inputs": prompt, "max_new_tokens": 100, "model": model} ) ## LOGGING - logging( - model=model, - input=prompt, - custom_llm_provider=custom_llm_provider, - additional_args={ - "max_tokens": max_tokens, - "original_response": response, - }, - logger_fn=logger_fn, - ) + logging.post_call(input=prompt, api_key=None, original_response=response.text, additional_args={"url": url, "max_new_tokens": 100}) + completion_response = response.json()["outputs"] # RESPONSE OBJECT @@ -757,13 +637,6 @@ def completion( model_response["model"] = model response = model_response else: - ## LOGGING - logging( - model=model, - input=messages, - custom_llm_provider=custom_llm_provider, - logger_fn=logger_fn, - ) args = locals() raise ValueError( f"Unable to map your input to a model. Check your input - {args}" @@ -771,14 +644,7 @@ def completion( return response except Exception as e: ## LOGGING - logging( - model=model, - input=messages, - custom_llm_provider=custom_llm_provider, - additional_args={"max_tokens": max_tokens}, - logger_fn=logger_fn, - exception=e, - ) + logging.post_call(input=messages, api_key=api_key, original_response=e) ## Map to OpenAI Exception raise exception_type( model=model, custom_llm_provider=custom_llm_provider, original_exception=e @@ -810,9 +676,10 @@ def batch_completion(*args, **kwargs): @timeout( # type: ignore 60 ) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` -def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None): +def embedding(model, input=[], azure=False, force_timeout=60, litellm_call_id=None, logger_fn=None): try: response = None + logging = Logging(model=model, messages=input, optional_params={}, litellm_params={"azure": azure, "force_timeout": force_timeout, "logger_fn": logger_fn, "litellm_call_id": litellm_call_id}) if azure == True: # azure configs openai.api_type = "azure" @@ -820,7 +687,7 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None): openai.api_version = get_secret("AZURE_API_VERSION") openai.api_key = get_secret("AZURE_API_KEY") ## LOGGING - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + logging.pre_call(input=input, api_key=openai.api_key, additional_args={"api_type": openai.api_type, "api_base": openai.api_base, "api_version": openai.api_version}) ## EMBEDDING CALL response = openai.Embedding.create(input=input, engine=model) print_verbose(f"response_value: {str(response)[:50]}") @@ -830,19 +697,16 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None): openai.api_version = None openai.api_key = get_secret("OPENAI_API_KEY") ## LOGGING - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + logging.pre_call(input=input, api_key=openai.api_key, additional_args={"api_type": openai.api_type, "api_base": openai.api_base, "api_version": openai.api_version}) ## EMBEDDING CALL response = openai.Embedding.create(input=input, model=model) print_verbose(f"response_value: {str(response)[:50]}") else: - logging(model=model, input=input, azure=azure, logger_fn=logger_fn) args = locals() raise ValueError(f"No valid embedding model args passed in - {args}") return response except Exception as e: - # log the original exception - logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e) ## Map to OpenAI Exception raise exception_type(model=model, original_exception=e, custom_llm_provider="azure" if azure==True else None) raise e diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 370668afb0..fc99544593 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -25,6 +25,18 @@ def logger_fn(user_model_dict): print(f"user_model_dict: {user_model_dict}") +def test_completion_custom_provider_model_name(): + try: + response = completion( + model="together_ai/togethercomputer/llama-2-70b-chat", messages=messages, logger_fn=logger_fn + ) + # Add any assertions here to check the response + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +test_completion_custom_provider_model_name() + def test_completion_claude(): try: response = completion( @@ -77,7 +89,7 @@ def test_completion_claude_stream(): def test_completion_cohere(): try: response = completion( - model="command-nightly", messages=messages, max_tokens=100 + model="command-nightly", messages=messages, max_tokens=100, logit_bias={40: 10} ) # Add any assertions here to check the response print(response) @@ -91,7 +103,6 @@ def test_completion_cohere(): except Exception as e: pytest.fail(f"Error occurred: {e}") - def test_completion_cohere_stream(): try: messages = [ diff --git a/litellm/tests/test_supabase_integration.py b/litellm/tests/test_supabase_integration.py index 882d0bbc69..3fd4b5247f 100644 --- a/litellm/tests/test_supabase_integration.py +++ b/litellm/tests/test_supabase_integration.py @@ -9,10 +9,11 @@ # import litellm # from litellm import embedding, completion +# litellm.input_callback = ["supabase"] # litellm.success_callback = ["supabase"] # litellm.failure_callback = ["supabase"] -# litellm.modify_integration("supabase",{"table_name": "litellm_logs"}) +# litellm.modify_integration("supabase",{"table_name": "test_table"}) # litellm.set_verbose = True diff --git a/litellm/utils.py b/litellm/utils.py index c01c0e8443..44be72c2ef 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -135,48 +135,105 @@ def install_and_import(package: str): ####### LOGGING ################### # Logging function -> log the exact model details + what's being sent | Non-Blocking -def logging( - model=None, - input=None, - custom_llm_provider=None, - azure=False, +class Logging: + def __init__(self, model, messages, optional_params, litellm_params): + self.model = model + self.messages = messages + self.optional_params = optional_params + self.litellm_params = litellm_params + self.logger_fn = litellm_params["logger_fn"] + self.model_call_details = { + "model": model, + "messages": messages, + "optional_params": self.optional_params, + "litellm_params": self.litellm_params, + } + + def pre_call(self, input, api_key, additional_args={}): + try: + print(f"logging pre call for model: {self.model}") + self.model_call_details["input"] = input + self.model_call_details["api_key"] = api_key + self.model_call_details["additional_args"] = additional_args + + ## User Logging -> if you pass in a custom logging function + print_verbose( + f"Logging Details: logger_fn - {self.logger_fn} | callable(logger_fn) - {callable(self.logger_fn)}" + ) + if self.logger_fn and callable(self.logger_fn): + try: + self.logger_fn( + self.model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + + ## Input Integration Logging -> If you want to log the fact that an attempt to call the model was made + for callback in litellm.input_callback: + try: + if callback == "supabase": + print_verbose("reaches supabase for logging!") + model = self.model + messages = self.messages + print(f"litellm._thread_context: {litellm._thread_context}") + supabaseClient.input_log_event( + model=model, + messages=messages, + end_user=litellm._thread_context.user, + litellm_call_id=self.litellm_params["litellm_call_id"], + print_verbose=print_verbose, + ) + pass + except: + pass + except: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + pass + + def post_call(self, input, api_key, original_response, additional_args={}): + # Do something here + try: + self.model_call_details["input"] = input + self.model_call_details["api_key"] = api_key + self.model_call_details["original_response"] = original_response + self.model_call_details["additional_args"] = additional_args + + ## User Logging -> if you pass in a custom logging function + print_verbose( + f"Logging Details: logger_fn - {self.logger_fn} | callable(logger_fn) - {callable(self.logger_fn)}" + ) + if self.logger_fn and callable(self.logger_fn): + try: + self.logger_fn( + self.model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + except: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + pass + + # Add more methods as needed + + +def exception_logging( additional_args={}, logger_fn=None, exception=None, ): try: model_call_details = {} - if model: - model_call_details["model"] = model - if azure: - model_call_details["azure"] = azure - if custom_llm_provider: - model_call_details["custom_llm_provider"] = custom_llm_provider if exception: model_call_details["exception"] = exception - if input: - model_call_details["input"] = input - - if len(additional_args): - model_call_details["additional_args"] = additional_args - # log additional call details -> api key, etc. - if model: - if ( - azure == True - or model in litellm.open_ai_chat_completion_models - or model in litellm.open_ai_chat_completion_models - or model in litellm.open_ai_embedding_models - ): - model_call_details["api_type"] = openai.api_type - model_call_details["api_base"] = openai.api_base - model_call_details["api_version"] = openai.api_version - model_call_details["api_key"] = openai.api_key - elif "replicate" in model: - model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") - elif model in litellm.anthropic_models: - model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") - elif model in litellm.cohere_models: - model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") + model_call_details["additional_args"] = additional_args ## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs print_verbose( f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}" @@ -206,10 +263,10 @@ def client(original_function): try: global callback_list, add_breadcrumb, user_logger_fn if ( - len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0 + len(litellm.input_callback) > 0 or len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0 ) and len(callback_list) == 0: callback_list = list( - set(litellm.success_callback + litellm.failure_callback) + set(litellm.input_callback + litellm.success_callback + litellm.failure_callback) ) set_callbacks( callback_list=callback_list, @@ -299,13 +356,16 @@ def client(original_function): result = None try: function_setup(*args, **kwargs) - ## MODEL CALL + litellm_call_id = str(uuid.uuid4()) + kwargs["litellm_call_id"] = litellm_call_id + ## [OPTIONAL] CHECK CACHE start_time = datetime.datetime.now() if (litellm.caching or litellm.caching_with_models) and ( cached_result := check_cache(*args, **kwargs) ) is not None: result = cached_result else: + ## MODEL CALL result = original_function(*args, **kwargs) end_time = datetime.datetime.now() ## Add response to CACHE @@ -399,6 +459,7 @@ def get_litellm_params( together_ai=False, custom_llm_provider=None, custom_api_base=None, + litellm_call_id=None, ): litellm_params = { "return_async": return_async, @@ -408,6 +469,7 @@ def get_litellm_params( "verbose": verbose, "custom_llm_provider": custom_llm_provider, "custom_api_base": custom_api_base, + "litellm_call_id": litellm_call_id } return litellm_params @@ -452,6 +514,8 @@ def get_optional_params( optional_params["temperature"] = temperature if max_tokens != float("inf"): optional_params["max_tokens"] = max_tokens + if logit_bias != {}: + optional_params["logit_bias"] = logit_bias return optional_params elif custom_llm_provider == "replicate": # any replicate models @@ -565,7 +629,8 @@ def set_callbacks(callback_list): global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient try: for callback in callback_list: - if callback == "sentry" or "SENTRY_API_URL" in os.environ: + print(f"callback: {callback}") + if callback == "sentry": try: import sentry_sdk except ImportError: @@ -621,6 +686,7 @@ def set_callbacks(callback_list): elif callback == "berrispend": berrispendLogger = BerriSpendLogger() elif callback == "supabase": + print(f"instantiating supabase") supabaseClient = Supabase() except Exception as e: raise e @@ -741,7 +807,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k "completion_tokens": 0, }, } - print(f"litellm._thread_context: {litellm._thread_context}") supabaseClient.log_event( model=model, messages=messages, @@ -749,9 +814,9 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k response_obj=result, start_time=start_time, end_time=end_time, + litellm_call_id=kwargs["litellm_call_id"], print_verbose=print_verbose, ) - except: print_verbose( f"Error Occurred while logging failure: {traceback.format_exc()}" @@ -767,7 +832,7 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k pass except Exception as e: ## LOGGING - logging(logger_fn=user_logger_fn, exception=e) + exception_logging(logger_fn=user_logger_fn, exception=e) pass @@ -847,11 +912,12 @@ def handle_success(args, kwargs, result, start_time, end_time): response_obj=result, start_time=start_time, end_time=end_time, + litellm_call_id=kwargs["litellm_call_id"], print_verbose=print_verbose, ) except Exception as e: ## LOGGING - logging(logger_fn=user_logger_fn, exception=e) + exception_logging(logger_fn=user_logger_fn, exception=e) print_verbose( f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}" ) @@ -862,7 +928,7 @@ def handle_success(args, kwargs, result, start_time, end_time): pass except Exception as e: ## LOGGING - logging(logger_fn=user_logger_fn, exception=e) + exception_logging(logger_fn=user_logger_fn, exception=e) print_verbose( f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}" ) @@ -910,15 +976,6 @@ def exception_type(model, original_exception, custom_llm_provider): exception_type = type(original_exception).__name__ else: exception_type = "" - logging( - model=model, - additional_args={ - "error_str": error_str, - "exception_type": exception_type, - "original_exception": original_exception, - }, - logger_fn=user_logger_fn, - ) if "claude" in model: # one of the anthropics if hasattr(original_exception, "status_code"): print_verbose(f"status_code: {original_exception.status_code}") @@ -1028,7 +1085,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise original_exception except Exception as e: ## LOGGING - logging( + exception_logging( logger_fn=user_logger_fn, additional_args={ "exception_mapping_worked": exception_mapping_worked, diff --git a/pyproject.toml b/pyproject.toml index aebd15b8b0..216a058811 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.426" +version = "0.1.431" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"