mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #387 from WilliamEspegren/main
Rebuild stream chunks to openAI object
This commit is contained in:
commit
e83d89d12f
4 changed files with 160 additions and 26 deletions
|
@ -1 +0,0 @@
|
||||||
Subproject commit 21400f5028e5b357b9115ebfb8ed34fbd09d503c
|
|
|
@ -30,13 +30,13 @@ togetherai_api_key: Optional[str] = None
|
||||||
baseten_key: Optional[str] = None
|
baseten_key: Optional[str] = None
|
||||||
aleph_alpha_key: Optional[str] = None
|
aleph_alpha_key: Optional[str] = None
|
||||||
nlp_cloud_key: Optional[str] = None
|
nlp_cloud_key: Optional[str] = None
|
||||||
use_client = False
|
use_client: bool = False
|
||||||
logging = True
|
logging: bool = True
|
||||||
caching = False # deprecated son
|
caching: bool = False # deprecated son
|
||||||
caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon
|
caching_with_models: bool = False # if you want the caching key to be model + prompt # deprecated soon
|
||||||
cache: Optional[Cache] = None # cache object
|
cache: Optional[Cache] = None # cache object
|
||||||
model_alias_map: Dict[str, str] = {}
|
model_alias_map: Dict[str, str] = {}
|
||||||
max_budget = None # set the max budget across all providers
|
max_budget: float = None # set the max budget across all providers
|
||||||
_current_cost = 0 # private variable, used if max budget is set
|
_current_cost = 0 # private variable, used if max budget is set
|
||||||
#############################################
|
#############################################
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ config_path = None
|
||||||
####### Secret Manager #####################
|
####### Secret Manager #####################
|
||||||
secret_manager_client = None
|
secret_manager_client = None
|
||||||
####### COMPLETION MODELS ###################
|
####### COMPLETION MODELS ###################
|
||||||
open_ai_chat_completion_models = [
|
open_ai_chat_completion_models: str = [
|
||||||
"gpt-4",
|
"gpt-4",
|
||||||
"gpt-4-0613",
|
"gpt-4-0613",
|
||||||
"gpt-4-0314",
|
"gpt-4-0314",
|
||||||
|
@ -92,7 +92,7 @@ open_ai_chat_completion_models = [
|
||||||
"gpt-3.5-turbo-16k",
|
"gpt-3.5-turbo-16k",
|
||||||
"gpt-3.5-turbo-16k-0613",
|
"gpt-3.5-turbo-16k-0613",
|
||||||
]
|
]
|
||||||
open_ai_text_completion_models = [
|
open_ai_text_completion_models: str = [
|
||||||
"text-davinci-003",
|
"text-davinci-003",
|
||||||
"text-curie-001",
|
"text-curie-001",
|
||||||
"text-babbage-001",
|
"text-babbage-001",
|
||||||
|
@ -101,7 +101,7 @@ open_ai_text_completion_models = [
|
||||||
"text-davinci-002",
|
"text-davinci-002",
|
||||||
]
|
]
|
||||||
|
|
||||||
cohere_models = [
|
cohere_models: str = [
|
||||||
"command-nightly",
|
"command-nightly",
|
||||||
"command",
|
"command",
|
||||||
"command-light",
|
"command-light",
|
||||||
|
@ -109,10 +109,10 @@ cohere_models = [
|
||||||
"command-xlarge-beta",
|
"command-xlarge-beta",
|
||||||
]
|
]
|
||||||
|
|
||||||
anthropic_models = ["claude-2", "claude-instant-1", "claude-instant-1.2"]
|
anthropic_models: str = ["claude-2", "claude-instant-1", "claude-instant-1.2"]
|
||||||
|
|
||||||
# well supported replicate llms
|
# well supported replicate llms
|
||||||
replicate_models = [
|
replicate_models: str = [
|
||||||
# llama replicate supported LLMs
|
# llama replicate supported LLMs
|
||||||
"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
|
"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
|
||||||
"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
|
"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
|
||||||
|
@ -127,7 +127,7 @@ replicate_models = [
|
||||||
"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
|
"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
|
||||||
]
|
]
|
||||||
|
|
||||||
openrouter_models = [
|
openrouter_models: str = [
|
||||||
"google/palm-2-codechat-bison",
|
"google/palm-2-codechat-bison",
|
||||||
"google/palm-2-chat-bison",
|
"google/palm-2-chat-bison",
|
||||||
"openai/gpt-3.5-turbo",
|
"openai/gpt-3.5-turbo",
|
||||||
|
@ -139,25 +139,25 @@ openrouter_models = [
|
||||||
"meta-llama/llama-2-70b-chat",
|
"meta-llama/llama-2-70b-chat",
|
||||||
]
|
]
|
||||||
|
|
||||||
vertex_chat_models = [
|
vertex_chat_models: str = [
|
||||||
"chat-bison-32k",
|
"chat-bison-32k",
|
||||||
"chat-bison",
|
"chat-bison",
|
||||||
"chat-bison@001",
|
"chat-bison@001",
|
||||||
]
|
]
|
||||||
|
|
||||||
vertex_code_chat_models = [
|
vertex_code_chat_models: str = [
|
||||||
"codechat-bison",
|
"codechat-bison",
|
||||||
"codechat-bison-32k",
|
"codechat-bison-32k",
|
||||||
"codechat-bison@001",
|
"codechat-bison@001",
|
||||||
]
|
]
|
||||||
|
|
||||||
vertex_text_models = [
|
vertex_text_models: str = [
|
||||||
"text-bison",
|
"text-bison",
|
||||||
"text-bison@001",
|
"text-bison@001",
|
||||||
# "text-bison-32k",
|
# "text-bison-32k",
|
||||||
]
|
]
|
||||||
|
|
||||||
vertex_code_text_models = [
|
vertex_code_text_models: str = [
|
||||||
"code-bison",
|
"code-bison",
|
||||||
# "code-bison-32K",
|
# "code-bison-32K",
|
||||||
"code-bison@001",
|
"code-bison@001",
|
||||||
|
@ -165,7 +165,7 @@ vertex_code_text_models = [
|
||||||
"code-gecko@latest",
|
"code-gecko@latest",
|
||||||
]
|
]
|
||||||
|
|
||||||
huggingface_models = [
|
huggingface_models: str = [
|
||||||
"meta-llama/Llama-2-7b-hf",
|
"meta-llama/Llama-2-7b-hf",
|
||||||
"meta-llama/Llama-2-7b-chat-hf",
|
"meta-llama/Llama-2-7b-chat-hf",
|
||||||
"meta-llama/Llama-2-13b-hf",
|
"meta-llama/Llama-2-13b-hf",
|
||||||
|
@ -180,11 +180,11 @@ huggingface_models = [
|
||||||
"meta-llama/Llama-2-70b-chat",
|
"meta-llama/Llama-2-70b-chat",
|
||||||
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
|
] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
|
||||||
|
|
||||||
ai21_models = ["j2-ultra", "j2-mid", "j2-light"]
|
ai21_models: str = ["j2-ultra", "j2-mid", "j2-light"]
|
||||||
|
|
||||||
nlp_cloud_models = ["dolphin", "chatdolphin"]
|
nlp_cloud_models: str = ["dolphin", "chatdolphin"]
|
||||||
|
|
||||||
together_ai_models = [
|
together_ai_models: str = [
|
||||||
# llama llms - chat
|
# llama llms - chat
|
||||||
"togethercomputer/llama-2-70b-chat",
|
"togethercomputer/llama-2-70b-chat",
|
||||||
|
|
||||||
|
@ -221,7 +221,7 @@ together_ai_models = [
|
||||||
|
|
||||||
] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
|
] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
|
||||||
|
|
||||||
aleph_alpha_models = [
|
aleph_alpha_models: str = [
|
||||||
"luminous-base",
|
"luminous-base",
|
||||||
"luminous-base-control",
|
"luminous-base-control",
|
||||||
"luminous-extended",
|
"luminous-extended",
|
||||||
|
@ -230,9 +230,9 @@ aleph_alpha_models = [
|
||||||
"luminous-supreme-control"
|
"luminous-supreme-control"
|
||||||
]
|
]
|
||||||
|
|
||||||
baseten_models = ["qvv0xeq", "q841o8w", "31dxrj3"] # FALCON 7B # WizardLM # Mosaic ML
|
baseten_models: str = ["qvv0xeq", "q841o8w", "31dxrj3"] # FALCON 7B # WizardLM # Mosaic ML
|
||||||
|
|
||||||
bedrock_models = [
|
bedrock_models: str = [
|
||||||
"amazon.titan-tg1-large",
|
"amazon.titan-tg1-large",
|
||||||
"ai21.j2-grande-instruct"
|
"ai21.j2-grande-instruct"
|
||||||
]
|
]
|
||||||
|
@ -254,7 +254,7 @@ model_list = (
|
||||||
+ nlp_cloud_models
|
+ nlp_cloud_models
|
||||||
)
|
)
|
||||||
|
|
||||||
provider_list = [
|
provider_list: str = [
|
||||||
"openai",
|
"openai",
|
||||||
"cohere",
|
"cohere",
|
||||||
"anthropic",
|
"anthropic",
|
||||||
|
@ -274,7 +274,7 @@ provider_list = [
|
||||||
"custom", # custom apis
|
"custom", # custom apis
|
||||||
]
|
]
|
||||||
|
|
||||||
models_by_provider = {
|
models_by_provider: dict = {
|
||||||
"openai": open_ai_chat_completion_models + open_ai_text_completion_models,
|
"openai": open_ai_chat_completion_models + open_ai_text_completion_models,
|
||||||
"cohere": cohere_models,
|
"cohere": cohere_models,
|
||||||
"anthropic": anthropic_models,
|
"anthropic": anthropic_models,
|
||||||
|
@ -289,7 +289,7 @@ models_by_provider = {
|
||||||
}
|
}
|
||||||
|
|
||||||
####### EMBEDDING MODELS ###################
|
####### EMBEDDING MODELS ###################
|
||||||
open_ai_embedding_models = ["text-embedding-ada-002"]
|
open_ai_embedding_models: str = ["text-embedding-ada-002"]
|
||||||
|
|
||||||
from .timeout import timeout
|
from .timeout import timeout
|
||||||
from .testing import *
|
from .testing import *
|
||||||
|
|
|
@ -1193,3 +1193,82 @@ def config_completion(**kwargs):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`"
|
"No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def stream_chunk_builder(chunks: list):
|
||||||
|
id = chunks[0]["id"]
|
||||||
|
object = chunks[0]["object"]
|
||||||
|
created = chunks[0]["created"]
|
||||||
|
model = chunks[0]["model"]
|
||||||
|
role = chunks[0]["choices"][0]["delta"]["role"]
|
||||||
|
finnish_reason = chunks[-1]["choices"][0]["finish_reason"]
|
||||||
|
|
||||||
|
# Initialize the response dictionary
|
||||||
|
response = {
|
||||||
|
"id": id,
|
||||||
|
"object": object,
|
||||||
|
"created": created,
|
||||||
|
"model": model,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": role,
|
||||||
|
"content": ""
|
||||||
|
},
|
||||||
|
"finish_reason": finnish_reason,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
# "usage": {
|
||||||
|
# "prompt_tokens": 0, # Modify as needed
|
||||||
|
# "completion_tokens": 0, # Modify as needed
|
||||||
|
# "total_tokens": 0 # Modify as needed
|
||||||
|
# }
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract the "content" strings from the nested dictionaries within "choices"
|
||||||
|
content_list = []
|
||||||
|
|
||||||
|
if "function_call" in chunks[0]["choices"][0]["delta"]:
|
||||||
|
argument_list = []
|
||||||
|
delta = chunks[0]["choices"][0]["delta"]
|
||||||
|
function_call = delta.get("function_call", "")
|
||||||
|
function_call_name = function_call.get("name", "")
|
||||||
|
|
||||||
|
message = response["choices"][0]["message"]
|
||||||
|
message["function_call"] = {}
|
||||||
|
message["function_call"]["name"] = function_call_name
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
choices = chunk["choices"]
|
||||||
|
for choice in choices:
|
||||||
|
delta = choice.get("delta", {})
|
||||||
|
function_call = delta.get("function_call", "")
|
||||||
|
|
||||||
|
# Check if a function call is present
|
||||||
|
if function_call:
|
||||||
|
# Now, function_call is expected to be a dictionary
|
||||||
|
arguments = function_call.get("arguments", "")
|
||||||
|
argument_list.append(arguments)
|
||||||
|
|
||||||
|
combined_arguments = "".join(argument_list)
|
||||||
|
response["choices"][0]["message"]["content"] = None
|
||||||
|
response["choices"][0]["message"]["function_call"]["arguments"] = combined_arguments
|
||||||
|
else:
|
||||||
|
for chunk in chunks:
|
||||||
|
choices = chunk["choices"]
|
||||||
|
for choice in choices:
|
||||||
|
delta = choice.get("delta", {})
|
||||||
|
content = delta.get("content", "")
|
||||||
|
content_list.append(content)
|
||||||
|
|
||||||
|
# Combine the "content" strings into a single string
|
||||||
|
combined_content = "".join(content_list)
|
||||||
|
|
||||||
|
# Update the "content" field within the response dictionary
|
||||||
|
response["choices"][0]["message"]["content"] = combined_content
|
||||||
|
|
||||||
|
|
||||||
|
# # Update usage information if needed
|
||||||
|
# response["usage"]["completion_tokens"] = token
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
56
litellm/tests/test_stream_chunk_builder.py
Normal file
56
litellm/tests/test_stream_chunk_builder.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
from litellm import completion, stream_chunk_builder
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
user_message = "What is the current weather in Boston?"
|
||||||
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
|
||||||
|
function_schema = {
|
||||||
|
"name": "get_weather",
|
||||||
|
"description":
|
||||||
|
"gets the current weather",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description":
|
||||||
|
"The city and state, e.g. San Francisco, CA"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["location"]
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_stream_chunk_builder():
|
||||||
|
litellm.api_key = os.environ["OPENAI_API_KEY"]
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=messages,
|
||||||
|
functions=[function_schema],
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
|
||||||
|
for chunk in response:
|
||||||
|
chunks.append(chunk)
|
||||||
|
|
||||||
|
try:
|
||||||
|
rebuilt_response = stream_chunk_builder(chunks)
|
||||||
|
|
||||||
|
# exract the response from the rebuilt response
|
||||||
|
rebuilt_response["id"]
|
||||||
|
rebuilt_response["object"]
|
||||||
|
rebuilt_response["created"]
|
||||||
|
rebuilt_response["model"]
|
||||||
|
rebuilt_response["choices"]
|
||||||
|
rebuilt_response["choices"][0]["index"]
|
||||||
|
choices = rebuilt_response["choices"][0]
|
||||||
|
message = choices["message"]
|
||||||
|
role = message["role"]
|
||||||
|
content = message["content"]
|
||||||
|
finnish_reason = choices["finish_reason"]
|
||||||
|
except:
|
||||||
|
raise Exception("stream_chunk_builder failed to rebuild response")
|
||||||
|
test_stream_chunk_builder()
|
Loading…
Add table
Add a link
Reference in a new issue