diff --git a/fastrepl-proxy b/fastrepl-proxy deleted file mode 160000 index 21400f5028..0000000000 --- a/fastrepl-proxy +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 21400f5028e5b357b9115ebfb8ed34fbd09d503c diff --git a/litellm/__init__.py b/litellm/__init__.py index df6b8717d3..880491080a 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -30,13 +30,13 @@ togetherai_api_key: Optional[str] = None baseten_key: Optional[str] = None aleph_alpha_key: Optional[str] = None nlp_cloud_key: Optional[str] = None -use_client = False -logging = True -caching = False # deprecated son -caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon +use_client: bool = False +logging: bool = True +caching: bool = False # deprecated son +caching_with_models: bool = False # if you want the caching key to be model + prompt # deprecated soon cache: Optional[Cache] = None # cache object model_alias_map: Dict[str, str] = {} -max_budget = None # set the max budget across all providers +max_budget: float = None # set the max budget across all providers _current_cost = 0 # private variable, used if max budget is set ############################################# @@ -78,7 +78,7 @@ config_path = None ####### Secret Manager ##################### secret_manager_client = None ####### COMPLETION MODELS ################### -open_ai_chat_completion_models = [ +open_ai_chat_completion_models: str = [ "gpt-4", "gpt-4-0613", "gpt-4-0314", @@ -92,7 +92,7 @@ open_ai_chat_completion_models = [ "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", ] -open_ai_text_completion_models = [ +open_ai_text_completion_models: str = [ "text-davinci-003", "text-curie-001", "text-babbage-001", @@ -101,7 +101,7 @@ open_ai_text_completion_models = [ "text-davinci-002", ] -cohere_models = [ +cohere_models: str = [ "command-nightly", "command", "command-light", @@ -109,10 +109,10 @@ cohere_models = [ "command-xlarge-beta", ] -anthropic_models = ["claude-2", "claude-instant-1", "claude-instant-1.2"] +anthropic_models: str = ["claude-2", "claude-instant-1", "claude-instant-1.2"] # well supported replicate llms -replicate_models = [ +replicate_models: str = [ # llama replicate supported LLMs "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", @@ -127,7 +127,7 @@ replicate_models = [ "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", ] -openrouter_models = [ +openrouter_models: str = [ "google/palm-2-codechat-bison", "google/palm-2-chat-bison", "openai/gpt-3.5-turbo", @@ -139,25 +139,25 @@ openrouter_models = [ "meta-llama/llama-2-70b-chat", ] -vertex_chat_models = [ +vertex_chat_models: str = [ "chat-bison-32k", "chat-bison", "chat-bison@001", ] -vertex_code_chat_models = [ +vertex_code_chat_models: str = [ "codechat-bison", "codechat-bison-32k", "codechat-bison@001", ] -vertex_text_models = [ +vertex_text_models: str = [ "text-bison", "text-bison@001", # "text-bison-32k", ] -vertex_code_text_models = [ +vertex_code_text_models: str = [ "code-bison", # "code-bison-32K", "code-bison@001", @@ -165,7 +165,7 @@ vertex_code_text_models = [ "code-gecko@latest", ] -huggingface_models = [ +huggingface_models: str = [ "meta-llama/Llama-2-7b-hf", "meta-llama/Llama-2-7b-chat-hf", "meta-llama/Llama-2-13b-hf", @@ -180,11 +180,11 @@ huggingface_models = [ "meta-llama/Llama-2-70b-chat", ] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers -ai21_models = ["j2-ultra", "j2-mid", "j2-light"] +ai21_models: str = ["j2-ultra", "j2-mid", "j2-light"] -nlp_cloud_models = ["dolphin", "chatdolphin"] +nlp_cloud_models: str = ["dolphin", "chatdolphin"] -together_ai_models = [ +together_ai_models: str = [ # llama llms - chat "togethercomputer/llama-2-70b-chat", @@ -221,7 +221,7 @@ together_ai_models = [ ] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...) -aleph_alpha_models = [ +aleph_alpha_models: str = [ "luminous-base", "luminous-base-control", "luminous-extended", @@ -230,9 +230,9 @@ aleph_alpha_models = [ "luminous-supreme-control" ] -baseten_models = ["qvv0xeq", "q841o8w", "31dxrj3"] # FALCON 7B # WizardLM # Mosaic ML +baseten_models: str = ["qvv0xeq", "q841o8w", "31dxrj3"] # FALCON 7B # WizardLM # Mosaic ML -bedrock_models = [ +bedrock_models: str = [ "amazon.titan-tg1-large", "ai21.j2-grande-instruct" ] @@ -254,7 +254,7 @@ model_list = ( + nlp_cloud_models ) -provider_list = [ +provider_list: str = [ "openai", "cohere", "anthropic", @@ -274,7 +274,7 @@ provider_list = [ "custom", # custom apis ] -models_by_provider = { +models_by_provider: dict = { "openai": open_ai_chat_completion_models + open_ai_text_completion_models, "cohere": cohere_models, "anthropic": anthropic_models, @@ -289,7 +289,7 @@ models_by_provider = { } ####### EMBEDDING MODELS ################### -open_ai_embedding_models = ["text-embedding-ada-002"] +open_ai_embedding_models: str = ["text-embedding-ada-002"] from .timeout import timeout from .testing import * diff --git a/litellm/main.py b/litellm/main.py index 9d98d40074..74604e7622 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1193,3 +1193,82 @@ def config_completion(**kwargs): raise ValueError( "No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`" ) + +def stream_chunk_builder(chunks: list): + id = chunks[0]["id"] + object = chunks[0]["object"] + created = chunks[0]["created"] + model = chunks[0]["model"] + role = chunks[0]["choices"][0]["delta"]["role"] + finnish_reason = chunks[-1]["choices"][0]["finish_reason"] + + # Initialize the response dictionary + response = { + "id": id, + "object": object, + "created": created, + "model": model, + "choices": [ + { + "index": 0, + "message": { + "role": role, + "content": "" + }, + "finish_reason": finnish_reason, + } + ], + # "usage": { + # "prompt_tokens": 0, # Modify as needed + # "completion_tokens": 0, # Modify as needed + # "total_tokens": 0 # Modify as needed + # } + } + + # Extract the "content" strings from the nested dictionaries within "choices" + content_list = [] + + if "function_call" in chunks[0]["choices"][0]["delta"]: + argument_list = [] + delta = chunks[0]["choices"][0]["delta"] + function_call = delta.get("function_call", "") + function_call_name = function_call.get("name", "") + + message = response["choices"][0]["message"] + message["function_call"] = {} + message["function_call"]["name"] = function_call_name + + for chunk in chunks: + choices = chunk["choices"] + for choice in choices: + delta = choice.get("delta", {}) + function_call = delta.get("function_call", "") + + # Check if a function call is present + if function_call: + # Now, function_call is expected to be a dictionary + arguments = function_call.get("arguments", "") + argument_list.append(arguments) + + combined_arguments = "".join(argument_list) + response["choices"][0]["message"]["content"] = None + response["choices"][0]["message"]["function_call"]["arguments"] = combined_arguments + else: + for chunk in chunks: + choices = chunk["choices"] + for choice in choices: + delta = choice.get("delta", {}) + content = delta.get("content", "") + content_list.append(content) + + # Combine the "content" strings into a single string + combined_content = "".join(content_list) + + # Update the "content" field within the response dictionary + response["choices"][0]["message"]["content"] = combined_content + + + # # Update usage information if needed + # response["usage"]["completion_tokens"] = token + + return response diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py new file mode 100644 index 0000000000..23dcf57a8e --- /dev/null +++ b/litellm/tests/test_stream_chunk_builder.py @@ -0,0 +1,56 @@ +from litellm import completion, stream_chunk_builder +import litellm +import os + +user_message = "What is the current weather in Boston?" +messages = [{"content": user_message, "role": "user"}] + +function_schema = { + "name": "get_weather", + "description": + "gets the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": + "The city and state, e.g. San Francisco, CA" + }, + }, + "required": ["location"] + }, +} + +def test_stream_chunk_builder(): + litellm.api_key = os.environ["OPENAI_API_KEY"] + response = completion( + model="gpt-3.5-turbo", + messages=messages, + functions=[function_schema], + stream=True, + ) + + chunks = [] + + for chunk in response: + chunks.append(chunk) + + try: + rebuilt_response = stream_chunk_builder(chunks) + + # exract the response from the rebuilt response + rebuilt_response["id"] + rebuilt_response["object"] + rebuilt_response["created"] + rebuilt_response["model"] + rebuilt_response["choices"] + rebuilt_response["choices"][0]["index"] + choices = rebuilt_response["choices"][0] + message = choices["message"] + role = message["role"] + content = message["content"] + finnish_reason = choices["finish_reason"] + except: + raise Exception("stream_chunk_builder failed to rebuild response") +test_stream_chunk_builder()