forked from phoenix/litellm-mirror
Merge branch 'BerriAI:main' into main
This commit is contained in:
commit
92536eec20
22 changed files with 246 additions and 317 deletions
|
@ -35,7 +35,7 @@ messages = [{ "content": "Hello, how are you?","role": "user"}]
|
|||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||
|
||||
# cohere call
|
||||
response = completion("command-nightly", messages)
|
||||
response = completion(model="command-nightly", messages)
|
||||
```
|
||||
Code Sample: [Getting Started Notebook](https://colab.research.google.com/drive/1gR3pY-JzDZahzpVdbGBtrNGDBmzUNJaJ?usp=sharing)
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# *🚅 litellm*
|
||||
# litellm
|
||||
[](https://pypi.org/project/litellm/)
|
||||
[](https://pypi.org/project/litellm/0.1.1/)
|
||||
[](https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main)
|
||||
|
|
|
@ -22,11 +22,13 @@ create table
|
|||
messages json null default '{}'::json,
|
||||
response json null default '{}'::json,
|
||||
end_user text null default ''::text,
|
||||
status text null default ''::text,
|
||||
error json null default '{}'::json,
|
||||
response_time real null default '0'::real,
|
||||
total_cost real null,
|
||||
additional_details json null default '{}'::json,
|
||||
constraint request_logs_pkey primary key (id)
|
||||
litellm_call_id text unique,
|
||||
primary key (id)
|
||||
) tablespace pg_default;
|
||||
```
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ const darkCodeTheme = require('prism-react-renderer/themes/dracula');
|
|||
const config = {
|
||||
title: 'liteLLM',
|
||||
tagline: 'Simplify LLM API Calls',
|
||||
favicon: 'static/img/favicon.ico',
|
||||
favicon: '/img/favicon.ico',
|
||||
|
||||
// Set the production url of your site here
|
||||
url: 'https://litellm.vercel.app/',
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import threading
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
input_callback: List[str] = []
|
||||
success_callback: List[str] = []
|
||||
failure_callback: List[str] = []
|
||||
set_verbose = False
|
||||
|
@ -216,7 +216,6 @@ from .timeout import timeout
|
|||
from .testing import *
|
||||
from .utils import (
|
||||
client,
|
||||
logging,
|
||||
exception_type,
|
||||
get_optional_params,
|
||||
modify_integration,
|
||||
|
@ -224,6 +223,7 @@ from .utils import (
|
|||
cost_per_token,
|
||||
completion_cost,
|
||||
get_litellm_params,
|
||||
Logging
|
||||
)
|
||||
from .main import * # type: ignore
|
||||
from .integrations import *
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -144,6 +144,28 @@ class Supabase:
|
|||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
|
||||
def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose):
|
||||
try:
|
||||
print_verbose(
|
||||
f"Supabase Logging - Enters input logging function for model {model}"
|
||||
)
|
||||
supabase_data_obj = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"end_user": end_user,
|
||||
"status": "initiated",
|
||||
"litellm_call_id": litellm_call_id
|
||||
}
|
||||
data, count = (
|
||||
self.supabase_client.table(self.supabase_table_name)
|
||||
.insert(supabase_data_obj)
|
||||
.execute()
|
||||
)
|
||||
print(f"data: {data}")
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
|
||||
def log_event(
|
||||
self,
|
||||
model,
|
||||
|
@ -152,6 +174,7 @@ class Supabase:
|
|||
response_obj,
|
||||
start_time,
|
||||
end_time,
|
||||
litellm_call_id,
|
||||
print_verbose,
|
||||
):
|
||||
try:
|
||||
|
@ -176,16 +199,20 @@ class Supabase:
|
|||
"messages": messages,
|
||||
"response": response_obj["choices"][0]["message"]["content"],
|
||||
"end_user": end_user,
|
||||
"litellm_call_id": litellm_call_id,
|
||||
"status": "success"
|
||||
}
|
||||
print_verbose(
|
||||
f"Supabase Logging - final data object: {supabase_data_obj}"
|
||||
)
|
||||
data, count = (
|
||||
self.supabase_client.table(self.supabase_table_name)
|
||||
.insert(supabase_data_obj)
|
||||
.upsert(supabase_data_obj)
|
||||
.execute()
|
||||
)
|
||||
elif "error" in response_obj:
|
||||
if "Unable to map your input to a model." in response_obj["error"]:
|
||||
total_cost = 0
|
||||
supabase_data_obj = {
|
||||
"response_time": response_time,
|
||||
"model": response_obj["model"],
|
||||
|
@ -193,13 +220,15 @@ class Supabase:
|
|||
"messages": messages,
|
||||
"error": response_obj["error"],
|
||||
"end_user": end_user,
|
||||
"litellm_call_id": litellm_call_id,
|
||||
"status": "failure"
|
||||
}
|
||||
print_verbose(
|
||||
f"Supabase Logging - final data object: {supabase_data_obj}"
|
||||
)
|
||||
data, count = (
|
||||
self.supabase_client.table(self.supabase_table_name)
|
||||
.insert(supabase_data_obj)
|
||||
.upsert(supabase_data_obj)
|
||||
.execute()
|
||||
)
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import os, json
|
||||
from enum import Enum
|
||||
import requests
|
||||
from litellm import logging
|
||||
import time
|
||||
from typing import Callable
|
||||
from litellm.utils import ModelResponse
|
||||
|
@ -22,11 +21,12 @@ class AnthropicError(Exception):
|
|||
|
||||
|
||||
class AnthropicLLM:
|
||||
def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
|
||||
def __init__(self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None):
|
||||
self.encoding = encoding
|
||||
self.default_max_tokens_to_sample = default_max_tokens_to_sample
|
||||
self.completion_url = "https://api.anthropic.com/v1/complete"
|
||||
self.api_key = api_key
|
||||
self.logging_obj = logging_obj
|
||||
self.validate_environment(api_key=api_key)
|
||||
|
||||
def validate_environment(
|
||||
|
@ -84,15 +84,7 @@ class AnthropicLLM:
|
|||
}
|
||||
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
additional_args={
|
||||
"litellm_params": litellm_params,
|
||||
"optional_params": optional_params,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
|
||||
## COMPLETION CALL
|
||||
response = requests.post(
|
||||
self.completion_url, headers=self.headers, data=json.dumps(data)
|
||||
|
@ -101,16 +93,7 @@ class AnthropicLLM:
|
|||
return response.iter_lines()
|
||||
else:
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
additional_args={
|
||||
"litellm_params": litellm_params,
|
||||
"optional_params": optional_params,
|
||||
"original_response": response.text,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
|
||||
print_verbose(f"raw model_response: {response.text}")
|
||||
## RESPONSE OBJECT
|
||||
completion_response = response.json()
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
import os, json
|
||||
from enum import Enum
|
||||
import requests
|
||||
from litellm import logging
|
||||
import time
|
||||
from typing import Callable
|
||||
from litellm.utils import ModelResponse
|
||||
|
@ -19,8 +18,9 @@ class HuggingfaceError(Exception):
|
|||
|
||||
|
||||
class HuggingfaceRestAPILLM:
|
||||
def __init__(self, encoding, api_key=None) -> None:
|
||||
def __init__(self, encoding, logging_obj, api_key=None) -> None:
|
||||
self.encoding = encoding
|
||||
self.logging_obj = logging_obj
|
||||
self.validate_environment(api_key=api_key)
|
||||
|
||||
def validate_environment(
|
||||
|
@ -74,18 +74,10 @@ class HuggingfaceRestAPILLM:
|
|||
optional_params["max_new_tokens"] = value
|
||||
data = {
|
||||
"inputs": prompt,
|
||||
# "parameters": optional_params
|
||||
"parameters": optional_params
|
||||
}
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
additional_args={
|
||||
"litellm_params": litellm_params,
|
||||
"optional_params": optional_params,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
|
||||
## COMPLETION CALL
|
||||
response = requests.post(
|
||||
completion_url, headers=self.headers, data=json.dumps(data)
|
||||
|
@ -94,17 +86,7 @@ class HuggingfaceRestAPILLM:
|
|||
return response.iter_lines()
|
||||
else:
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
additional_args={
|
||||
"litellm_params": litellm_params,
|
||||
"optional_params": optional_params,
|
||||
"original_response": response.text,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
print_verbose(f"raw model_response: {response.text}")
|
||||
self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
|
||||
## RESPONSE OBJECT
|
||||
completion_response = response.json()
|
||||
print_verbose(f"response: {completion_response}")
|
||||
|
|
280
litellm/main.py
280
litellm/main.py
|
@ -6,11 +6,11 @@ from copy import deepcopy
|
|||
import litellm
|
||||
from litellm import ( # type: ignore
|
||||
client,
|
||||
logging,
|
||||
exception_type,
|
||||
timeout,
|
||||
get_optional_params,
|
||||
get_litellm_params,
|
||||
Logging
|
||||
)
|
||||
from litellm.utils import (
|
||||
get_secret,
|
||||
|
@ -85,6 +85,7 @@ def completion(
|
|||
azure=False,
|
||||
custom_llm_provider=None,
|
||||
custom_api_base=None,
|
||||
litellm_call_id=None,
|
||||
# model specific optional params
|
||||
# used by text-bison only
|
||||
top_k=40,
|
||||
|
@ -94,6 +95,11 @@ def completion(
|
|||
model_response = ModelResponse()
|
||||
if azure: # this flag is deprecated, remove once notebooks are also updated.
|
||||
custom_llm_provider = "azure"
|
||||
elif model.split("/", 1)[0] in litellm.provider_list: # allow custom provider to be passed in via the model name "azure/chatgpt-test"
|
||||
custom_llm_provider = model.split("/", 1)[0]
|
||||
model = model.split("/", 1)[1]
|
||||
if "replicate" == custom_llm_provider and "/" not in model: # handle the "replicate/llama2..." edge-case
|
||||
model = custom_llm_provider + "/" + model
|
||||
args = locals()
|
||||
# check if user passed in any of the OpenAI optional params
|
||||
optional_params = get_optional_params(
|
||||
|
@ -124,8 +130,9 @@ def completion(
|
|||
verbose=verbose,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
custom_api_base=custom_api_base,
|
||||
litellm_call_id=litellm_call_id
|
||||
)
|
||||
|
||||
logging = Logging(model=model, messages=messages, optional_params=optional_params, litellm_params=litellm_params)
|
||||
if custom_llm_provider == "azure":
|
||||
# azure configs
|
||||
openai.api_type = "azure"
|
||||
|
@ -139,16 +146,14 @@ def completion(
|
|||
if litellm.api_version is not None
|
||||
else get_secret("AZURE_API_VERSION")
|
||||
)
|
||||
if not api_key and litellm.azure_key:
|
||||
api_key = litellm.azure_key
|
||||
elif not api_key and get_secret("AZURE_API_KEY"):
|
||||
api_key = get_secret("AZURE_API_KEY")
|
||||
# set key
|
||||
openai.api_key = api_key or litellm.azure_key or get_secret("AZURE_API_KEY")
|
||||
openai.api_key = api_key
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=messages,
|
||||
additional_args=optional_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=messages, api_key=openai.api_key, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base})
|
||||
## COMPLETION CALL
|
||||
if litellm.headers:
|
||||
response = openai.ChatCompletion.create(
|
||||
|
@ -161,6 +166,8 @@ def completion(
|
|||
response = openai.ChatCompletion.create(
|
||||
model=model, messages=messages, **optional_params
|
||||
)
|
||||
## LOGGING
|
||||
logging.post_call(input=messages, api_key=openai.api_key, original_response=response, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base})
|
||||
elif (
|
||||
model in litellm.open_ai_chat_completion_models
|
||||
or custom_llm_provider == "custom_openai"
|
||||
|
@ -177,18 +184,15 @@ def completion(
|
|||
if litellm.organization:
|
||||
openai.organization = litellm.organization
|
||||
# set API KEY
|
||||
openai.api_key = (
|
||||
api_key or litellm.openai_key or get_secret("OPENAI_API_KEY")
|
||||
)
|
||||
if not api_key and litellm.openai_key:
|
||||
api_key = litellm.openai_key
|
||||
elif not api_key and get_secret("AZURE_API_KEY"):
|
||||
api_key = get_secret("OPENAI_API_KEY")
|
||||
|
||||
openai.api_key = api_key
|
||||
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=messages,
|
||||
additional_args=args,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=messages, api_key=api_key, additional_args={"headers": litellm.headers, "api_base": api_base})
|
||||
## COMPLETION CALL
|
||||
if litellm.headers:
|
||||
response = openai.ChatCompletion.create(
|
||||
|
@ -201,6 +205,8 @@ def completion(
|
|||
response = openai.ChatCompletion.create(
|
||||
model=model, messages=messages, **optional_params
|
||||
)
|
||||
## LOGGING
|
||||
logging.post_call(input=messages, api_key=api_key, original_response=response, additional_args={"headers": litellm.headers})
|
||||
elif model in litellm.open_ai_text_completion_models:
|
||||
openai.api_type = "openai"
|
||||
openai.api_base = (
|
||||
|
@ -209,20 +215,19 @@ def completion(
|
|||
else "https://api.openai.com/v1"
|
||||
)
|
||||
openai.api_version = None
|
||||
openai.api_key = (
|
||||
api_key or litellm.openai_key or get_secret("OPENAI_API_KEY")
|
||||
)
|
||||
# set API KEY
|
||||
if not api_key and litellm.openai_key:
|
||||
api_key = litellm.openai_key
|
||||
elif not api_key and get_secret("AZURE_API_KEY"):
|
||||
api_key = get_secret("OPENAI_API_KEY")
|
||||
|
||||
openai.api_key = api_key
|
||||
|
||||
if litellm.organization:
|
||||
openai.organization = litellm.organization
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
additional_args=optional_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=api_key, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type})
|
||||
## COMPLETION CALL
|
||||
if litellm.headers:
|
||||
response = openai.Completion.create(
|
||||
|
@ -232,19 +237,10 @@ def completion(
|
|||
)
|
||||
else:
|
||||
response = openai.Completion.create(model=model, prompt=prompt)
|
||||
completion_response = response["choices"][0]["text"]
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": completion_response,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.post_call(input=prompt, api_key=api_key, original_response=response, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type})
|
||||
## RESPONSE OBJECT
|
||||
completion_response = response["choices"][0]["text"]
|
||||
model_response["choices"][0]["message"]["content"] = completion_response
|
||||
model_response["created"] = response["created"]
|
||||
model_response["model"] = model
|
||||
|
@ -273,13 +269,7 @@ def completion(
|
|||
input["max_length"] = max_tokens # for t5 models
|
||||
input["max_new_tokens"] = max_tokens # for llama2 models
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=input,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={"max_tokens": max_tokens},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=replicate_key, additional_args={"complete_input_dict": input, "max_tokens": max_tokens})
|
||||
## COMPLETION CALL
|
||||
output = replicate.run(model, input=input)
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
|
@ -292,16 +282,8 @@ def completion(
|
|||
response += item
|
||||
completion_response = response
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": completion_response,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.post_call(input=prompt, api_key=replicate_key, original_response=completion_response, additional_args={"complete_input_dict": input, "max_tokens": max_tokens})
|
||||
## USAGE
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(encoding.encode(completion_response))
|
||||
## RESPONSE OBJECT
|
||||
|
@ -322,6 +304,7 @@ def completion(
|
|||
encoding=encoding,
|
||||
default_max_tokens_to_sample=litellm.max_tokens,
|
||||
api_key=anthropic_key,
|
||||
logging_obj = logging # model call logging done inside the class as we make need to modify I/O to fit anthropic's requirements
|
||||
)
|
||||
model_response = anthropic_client.completion(
|
||||
model=model,
|
||||
|
@ -357,13 +340,7 @@ def completion(
|
|||
"OR_API_KEY"
|
||||
)
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=messages,
|
||||
additional_args=optional_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=messages, api_key=openai.api_key)
|
||||
## COMPLETION CALL
|
||||
if litellm.headers:
|
||||
response = openai.ChatCompletion.create(
|
||||
|
@ -390,6 +367,8 @@ def completion(
|
|||
},
|
||||
**optional_params,
|
||||
)
|
||||
## LOGGING
|
||||
logging.post_call(input=messages, api_key=openai.api_key, original_response=response)
|
||||
elif model in litellm.cohere_models:
|
||||
# import cohere/if it fails then pip install cohere
|
||||
install_and_import("cohere")
|
||||
|
@ -404,31 +383,17 @@ def completion(
|
|||
co = cohere.Client(cohere_key)
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=cohere_key)
|
||||
## COMPLETION CALL
|
||||
response = co.generate(model=model, prompt=prompt, **optional_params)
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
# don't try to access stream object,
|
||||
response = CustomStreamWrapper(response, model)
|
||||
return response
|
||||
|
||||
completion_response = response[0].text
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": completion_response,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.post_call(input=prompt, api_key=cohere_key, original_response=response)
|
||||
## USAGE
|
||||
completion_response = response[0].text
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(encoding.encode(completion_response))
|
||||
## RESPONSE OBJECT
|
||||
|
@ -452,7 +417,7 @@ def completion(
|
|||
or os.environ.get("HUGGINGFACE_API_KEY")
|
||||
)
|
||||
huggingface_client = HuggingfaceRestAPILLM(
|
||||
encoding=encoding, api_key=huggingface_key
|
||||
encoding=encoding, api_key=huggingface_key, logging_obj=logging
|
||||
)
|
||||
model_response = huggingface_client.completion(
|
||||
model=model,
|
||||
|
@ -487,12 +452,7 @@ def completion(
|
|||
) # TODO: Add chat support for together AI
|
||||
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=TOGETHER_AI_TOKEN)
|
||||
if stream == True:
|
||||
return together_ai_completion_streaming(
|
||||
{
|
||||
|
@ -514,17 +474,7 @@ def completion(
|
|||
headers=headers,
|
||||
)
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": res.text,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
|
||||
logging.post_call(input=prompt, api_key=TOGETHER_AI_TOKEN, original_response=res.text)
|
||||
# make this safe for reading, if output does not exist raise an error
|
||||
json_response = res.json()
|
||||
if "output" not in json_response:
|
||||
|
@ -557,16 +507,7 @@ def completion(
|
|||
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"litellm_params": litellm_params,
|
||||
"optional_params": optional_params,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=None)
|
||||
|
||||
chat_model = ChatModel.from_pretrained(model)
|
||||
|
||||
|
@ -574,16 +515,7 @@ def completion(
|
|||
completion_response = chat.send_message(prompt, **optional_params)
|
||||
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": completion_response,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.post_call(input=prompt, api_key=None, original_response=completion_response)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = completion_response
|
||||
|
@ -602,27 +534,13 @@ def completion(
|
|||
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=None)
|
||||
|
||||
vertex_model = TextGenerationModel.from_pretrained(model)
|
||||
completion_response = vertex_model.predict(prompt, **optional_params)
|
||||
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": completion_response,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
|
||||
logging.post_call(input=prompt, api_key=None, original_response=completion_response)
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = completion_response
|
||||
model_response["created"] = time.time()
|
||||
|
@ -636,12 +554,7 @@ def completion(
|
|||
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=ai21.api_key)
|
||||
|
||||
ai21_response = ai21.Completion.execute(
|
||||
model=model,
|
||||
|
@ -650,16 +563,7 @@ def completion(
|
|||
completion_response = ai21_response["completions"][0]["data"]["text"]
|
||||
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": completion_response,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.post_call(input=prompt, api_key=ai21.api_key, original_response=completion_response)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = completion_response
|
||||
|
@ -673,7 +577,8 @@ def completion(
|
|||
prompt = " ".join([message["content"] for message in messages])
|
||||
|
||||
## LOGGING
|
||||
logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn)
|
||||
logging.pre_call(input=prompt, api_key=None, additional_args={"endpoint": endpoint})
|
||||
|
||||
generator = get_ollama_response_stream(endpoint, model, prompt)
|
||||
# assume all responses are streamed
|
||||
return generator
|
||||
|
@ -688,12 +593,7 @@ def completion(
|
|||
|
||||
prompt = " ".join([message["content"] for message in messages])
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=base_ten_key)
|
||||
|
||||
base_ten__model = baseten.deployed_model_version_id(model)
|
||||
|
||||
|
@ -703,16 +603,8 @@ def completion(
|
|||
if type(completion_response) == dict:
|
||||
completion_response = completion_response["generated_text"]
|
||||
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": completion_response,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
## LOGGING
|
||||
logging.post_call(input=prompt, api_key=base_ten_key, original_response=completion_response)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
model_response["choices"][0]["message"]["content"] = completion_response
|
||||
|
@ -729,26 +621,14 @@ def completion(
|
|||
prompt = " ".join([message["content"] for message in messages])
|
||||
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.pre_call(input=prompt, api_key=None, additional_args={"url": url, "max_new_tokens": 100})
|
||||
|
||||
response = requests.post(
|
||||
url, data={"inputs": prompt, "max_new_tokens": 100, "model": model}
|
||||
)
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=prompt,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={
|
||||
"max_tokens": max_tokens,
|
||||
"original_response": response,
|
||||
},
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
logging.post_call(input=prompt, api_key=None, original_response=response.text, additional_args={"url": url, "max_new_tokens": 100})
|
||||
|
||||
completion_response = response.json()["outputs"]
|
||||
|
||||
# RESPONSE OBJECT
|
||||
|
@ -757,13 +637,6 @@ def completion(
|
|||
model_response["model"] = model
|
||||
response = model_response
|
||||
else:
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=messages,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logger_fn=logger_fn,
|
||||
)
|
||||
args = locals()
|
||||
raise ValueError(
|
||||
f"Unable to map your input to a model. Check your input - {args}"
|
||||
|
@ -771,14 +644,7 @@ def completion(
|
|||
return response
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging(
|
||||
model=model,
|
||||
input=messages,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
additional_args={"max_tokens": max_tokens},
|
||||
logger_fn=logger_fn,
|
||||
exception=e,
|
||||
)
|
||||
logging.post_call(input=messages, api_key=api_key, original_response=e)
|
||||
## Map to OpenAI Exception
|
||||
raise exception_type(
|
||||
model=model, custom_llm_provider=custom_llm_provider, original_exception=e
|
||||
|
@ -810,9 +676,10 @@ def batch_completion(*args, **kwargs):
|
|||
@timeout( # type: ignore
|
||||
60
|
||||
) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
||||
def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
|
||||
def embedding(model, input=[], azure=False, force_timeout=60, litellm_call_id=None, logger_fn=None):
|
||||
try:
|
||||
response = None
|
||||
logging = Logging(model=model, messages=input, optional_params={}, litellm_params={"azure": azure, "force_timeout": force_timeout, "logger_fn": logger_fn, "litellm_call_id": litellm_call_id})
|
||||
if azure == True:
|
||||
# azure configs
|
||||
openai.api_type = "azure"
|
||||
|
@ -820,7 +687,7 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
|
|||
openai.api_version = get_secret("AZURE_API_VERSION")
|
||||
openai.api_key = get_secret("AZURE_API_KEY")
|
||||
## LOGGING
|
||||
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||
logging.pre_call(input=input, api_key=openai.api_key, additional_args={"api_type": openai.api_type, "api_base": openai.api_base, "api_version": openai.api_version})
|
||||
## EMBEDDING CALL
|
||||
response = openai.Embedding.create(input=input, engine=model)
|
||||
print_verbose(f"response_value: {str(response)[:50]}")
|
||||
|
@ -830,19 +697,16 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
|
|||
openai.api_version = None
|
||||
openai.api_key = get_secret("OPENAI_API_KEY")
|
||||
## LOGGING
|
||||
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||
logging.pre_call(input=input, api_key=openai.api_key, additional_args={"api_type": openai.api_type, "api_base": openai.api_base, "api_version": openai.api_version})
|
||||
## EMBEDDING CALL
|
||||
response = openai.Embedding.create(input=input, model=model)
|
||||
print_verbose(f"response_value: {str(response)[:50]}")
|
||||
else:
|
||||
logging(model=model, input=input, azure=azure, logger_fn=logger_fn)
|
||||
args = locals()
|
||||
raise ValueError(f"No valid embedding model args passed in - {args}")
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
# log the original exception
|
||||
logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e)
|
||||
## Map to OpenAI Exception
|
||||
raise exception_type(model=model, original_exception=e, custom_llm_provider="azure" if azure==True else None)
|
||||
raise e
|
||||
|
|
|
@ -25,6 +25,18 @@ def logger_fn(user_model_dict):
|
|||
print(f"user_model_dict: {user_model_dict}")
|
||||
|
||||
|
||||
def test_completion_custom_provider_model_name():
|
||||
try:
|
||||
response = completion(
|
||||
model="together_ai/togethercomputer/llama-2-70b-chat", messages=messages, logger_fn=logger_fn
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
test_completion_custom_provider_model_name()
|
||||
|
||||
def test_completion_claude():
|
||||
try:
|
||||
response = completion(
|
||||
|
@ -77,7 +89,7 @@ def test_completion_claude_stream():
|
|||
def test_completion_cohere():
|
||||
try:
|
||||
response = completion(
|
||||
model="command-nightly", messages=messages, max_tokens=100
|
||||
model="command-nightly", messages=messages, max_tokens=100, logit_bias={40: 10}
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
|
@ -91,7 +103,6 @@ def test_completion_cohere():
|
|||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_completion_cohere_stream():
|
||||
try:
|
||||
messages = [
|
||||
|
|
|
@ -9,10 +9,11 @@
|
|||
# import litellm
|
||||
# from litellm import embedding, completion
|
||||
|
||||
# litellm.input_callback = ["supabase"]
|
||||
# litellm.success_callback = ["supabase"]
|
||||
# litellm.failure_callback = ["supabase"]
|
||||
|
||||
# litellm.modify_integration("supabase",{"table_name": "litellm_logs"})
|
||||
# litellm.modify_integration("supabase",{"table_name": "test_table"})
|
||||
|
||||
# litellm.set_verbose = True
|
||||
|
||||
|
|
163
litellm/utils.py
163
litellm/utils.py
|
@ -135,48 +135,105 @@ def install_and_import(package: str):
|
|||
|
||||
####### LOGGING ###################
|
||||
# Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||
def logging(
|
||||
model=None,
|
||||
input=None,
|
||||
custom_llm_provider=None,
|
||||
azure=False,
|
||||
class Logging:
|
||||
def __init__(self, model, messages, optional_params, litellm_params):
|
||||
self.model = model
|
||||
self.messages = messages
|
||||
self.optional_params = optional_params
|
||||
self.litellm_params = litellm_params
|
||||
self.logger_fn = litellm_params["logger_fn"]
|
||||
self.model_call_details = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"optional_params": self.optional_params,
|
||||
"litellm_params": self.litellm_params,
|
||||
}
|
||||
|
||||
def pre_call(self, input, api_key, additional_args={}):
|
||||
try:
|
||||
print(f"logging pre call for model: {self.model}")
|
||||
self.model_call_details["input"] = input
|
||||
self.model_call_details["api_key"] = api_key
|
||||
self.model_call_details["additional_args"] = additional_args
|
||||
|
||||
## User Logging -> if you pass in a custom logging function
|
||||
print_verbose(
|
||||
f"Logging Details: logger_fn - {self.logger_fn} | callable(logger_fn) - {callable(self.logger_fn)}"
|
||||
)
|
||||
if self.logger_fn and callable(self.logger_fn):
|
||||
try:
|
||||
self.logger_fn(
|
||||
self.model_call_details
|
||||
) # Expectation: any logger function passed in by the user should accept a dict object
|
||||
except Exception as e:
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||
)
|
||||
|
||||
## Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
|
||||
for callback in litellm.input_callback:
|
||||
try:
|
||||
if callback == "supabase":
|
||||
print_verbose("reaches supabase for logging!")
|
||||
model = self.model
|
||||
messages = self.messages
|
||||
print(f"litellm._thread_context: {litellm._thread_context}")
|
||||
supabaseClient.input_log_event(
|
||||
model=model,
|
||||
messages=messages,
|
||||
end_user=litellm._thread_context.user,
|
||||
litellm_call_id=self.litellm_params["litellm_call_id"],
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
pass
|
||||
except:
|
||||
pass
|
||||
except:
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||
)
|
||||
pass
|
||||
|
||||
def post_call(self, input, api_key, original_response, additional_args={}):
|
||||
# Do something here
|
||||
try:
|
||||
self.model_call_details["input"] = input
|
||||
self.model_call_details["api_key"] = api_key
|
||||
self.model_call_details["original_response"] = original_response
|
||||
self.model_call_details["additional_args"] = additional_args
|
||||
|
||||
## User Logging -> if you pass in a custom logging function
|
||||
print_verbose(
|
||||
f"Logging Details: logger_fn - {self.logger_fn} | callable(logger_fn) - {callable(self.logger_fn)}"
|
||||
)
|
||||
if self.logger_fn and callable(self.logger_fn):
|
||||
try:
|
||||
self.logger_fn(
|
||||
self.model_call_details
|
||||
) # Expectation: any logger function passed in by the user should accept a dict object
|
||||
except Exception as e:
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||
)
|
||||
except:
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
|
||||
)
|
||||
pass
|
||||
|
||||
# Add more methods as needed
|
||||
|
||||
|
||||
def exception_logging(
|
||||
additional_args={},
|
||||
logger_fn=None,
|
||||
exception=None,
|
||||
):
|
||||
try:
|
||||
model_call_details = {}
|
||||
if model:
|
||||
model_call_details["model"] = model
|
||||
if azure:
|
||||
model_call_details["azure"] = azure
|
||||
if custom_llm_provider:
|
||||
model_call_details["custom_llm_provider"] = custom_llm_provider
|
||||
if exception:
|
||||
model_call_details["exception"] = exception
|
||||
if input:
|
||||
model_call_details["input"] = input
|
||||
|
||||
if len(additional_args):
|
||||
model_call_details["additional_args"] = additional_args
|
||||
# log additional call details -> api key, etc.
|
||||
if model:
|
||||
if (
|
||||
azure == True
|
||||
or model in litellm.open_ai_chat_completion_models
|
||||
or model in litellm.open_ai_chat_completion_models
|
||||
or model in litellm.open_ai_embedding_models
|
||||
):
|
||||
model_call_details["api_type"] = openai.api_type
|
||||
model_call_details["api_base"] = openai.api_base
|
||||
model_call_details["api_version"] = openai.api_version
|
||||
model_call_details["api_key"] = openai.api_key
|
||||
elif "replicate" in model:
|
||||
model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN")
|
||||
elif model in litellm.anthropic_models:
|
||||
model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY")
|
||||
elif model in litellm.cohere_models:
|
||||
model_call_details["api_key"] = os.environ.get("COHERE_API_KEY")
|
||||
model_call_details["additional_args"] = additional_args
|
||||
## User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs
|
||||
print_verbose(
|
||||
f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}"
|
||||
|
@ -206,10 +263,10 @@ def client(original_function):
|
|||
try:
|
||||
global callback_list, add_breadcrumb, user_logger_fn
|
||||
if (
|
||||
len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0
|
||||
len(litellm.input_callback) > 0 or len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0
|
||||
) and len(callback_list) == 0:
|
||||
callback_list = list(
|
||||
set(litellm.success_callback + litellm.failure_callback)
|
||||
set(litellm.input_callback + litellm.success_callback + litellm.failure_callback)
|
||||
)
|
||||
set_callbacks(
|
||||
callback_list=callback_list,
|
||||
|
@ -299,13 +356,16 @@ def client(original_function):
|
|||
result = None
|
||||
try:
|
||||
function_setup(*args, **kwargs)
|
||||
## MODEL CALL
|
||||
litellm_call_id = str(uuid.uuid4())
|
||||
kwargs["litellm_call_id"] = litellm_call_id
|
||||
## [OPTIONAL] CHECK CACHE
|
||||
start_time = datetime.datetime.now()
|
||||
if (litellm.caching or litellm.caching_with_models) and (
|
||||
cached_result := check_cache(*args, **kwargs)
|
||||
) is not None:
|
||||
result = cached_result
|
||||
else:
|
||||
## MODEL CALL
|
||||
result = original_function(*args, **kwargs)
|
||||
end_time = datetime.datetime.now()
|
||||
## Add response to CACHE
|
||||
|
@ -399,6 +459,7 @@ def get_litellm_params(
|
|||
together_ai=False,
|
||||
custom_llm_provider=None,
|
||||
custom_api_base=None,
|
||||
litellm_call_id=None,
|
||||
):
|
||||
litellm_params = {
|
||||
"return_async": return_async,
|
||||
|
@ -408,6 +469,7 @@ def get_litellm_params(
|
|||
"verbose": verbose,
|
||||
"custom_llm_provider": custom_llm_provider,
|
||||
"custom_api_base": custom_api_base,
|
||||
"litellm_call_id": litellm_call_id
|
||||
}
|
||||
|
||||
return litellm_params
|
||||
|
@ -452,6 +514,8 @@ def get_optional_params(
|
|||
optional_params["temperature"] = temperature
|
||||
if max_tokens != float("inf"):
|
||||
optional_params["max_tokens"] = max_tokens
|
||||
if logit_bias != {}:
|
||||
optional_params["logit_bias"] = logit_bias
|
||||
return optional_params
|
||||
elif custom_llm_provider == "replicate":
|
||||
# any replicate models
|
||||
|
@ -565,7 +629,8 @@ def set_callbacks(callback_list):
|
|||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient
|
||||
try:
|
||||
for callback in callback_list:
|
||||
if callback == "sentry" or "SENTRY_API_URL" in os.environ:
|
||||
print(f"callback: {callback}")
|
||||
if callback == "sentry":
|
||||
try:
|
||||
import sentry_sdk
|
||||
except ImportError:
|
||||
|
@ -621,6 +686,7 @@ def set_callbacks(callback_list):
|
|||
elif callback == "berrispend":
|
||||
berrispendLogger = BerriSpendLogger()
|
||||
elif callback == "supabase":
|
||||
print(f"instantiating supabase")
|
||||
supabaseClient = Supabase()
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
@ -741,7 +807,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
|||
"completion_tokens": 0,
|
||||
},
|
||||
}
|
||||
print(f"litellm._thread_context: {litellm._thread_context}")
|
||||
supabaseClient.log_event(
|
||||
model=model,
|
||||
messages=messages,
|
||||
|
@ -749,9 +814,9 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
|||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
litellm_call_id=kwargs["litellm_call_id"],
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
|
||||
except:
|
||||
print_verbose(
|
||||
f"Error Occurred while logging failure: {traceback.format_exc()}"
|
||||
|
@ -767,7 +832,7 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
|||
pass
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging(logger_fn=user_logger_fn, exception=e)
|
||||
exception_logging(logger_fn=user_logger_fn, exception=e)
|
||||
pass
|
||||
|
||||
|
||||
|
@ -847,11 +912,12 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
|||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
litellm_call_id=kwargs["litellm_call_id"],
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging(logger_fn=user_logger_fn, exception=e)
|
||||
exception_logging(logger_fn=user_logger_fn, exception=e)
|
||||
print_verbose(
|
||||
f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}"
|
||||
)
|
||||
|
@ -862,7 +928,7 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
|||
pass
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging(logger_fn=user_logger_fn, exception=e)
|
||||
exception_logging(logger_fn=user_logger_fn, exception=e)
|
||||
print_verbose(
|
||||
f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}"
|
||||
)
|
||||
|
@ -910,15 +976,6 @@ def exception_type(model, original_exception, custom_llm_provider):
|
|||
exception_type = type(original_exception).__name__
|
||||
else:
|
||||
exception_type = ""
|
||||
logging(
|
||||
model=model,
|
||||
additional_args={
|
||||
"error_str": error_str,
|
||||
"exception_type": exception_type,
|
||||
"original_exception": original_exception,
|
||||
},
|
||||
logger_fn=user_logger_fn,
|
||||
)
|
||||
if "claude" in model: # one of the anthropics
|
||||
if hasattr(original_exception, "status_code"):
|
||||
print_verbose(f"status_code: {original_exception.status_code}")
|
||||
|
@ -1028,7 +1085,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
|||
raise original_exception
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging(
|
||||
exception_logging(
|
||||
logger_fn=user_logger_fn,
|
||||
additional_args={
|
||||
"exception_mapping_worked": exception_mapping_worked,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "0.1.426"
|
||||
version = "0.1.431"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT License"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue