fix multithreading issue with response objects

2025-04-26 03:04:13 +00:00 · 2023-09-20 19:36:08 -07:00 · 2023-09-20 19:36:08 -07:00 · fc922bc300
commit fc922bc300
parent 64793e7ed7
6 changed files with 18 additions and 10 deletions
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1180,10 +1180,12 @@ def batch_completion_models(*args, **kwargs):
    if "models" in kwargs:
        models = kwargs["models"]
        kwargs.pop("models")
+        futures = {}
        with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
-            futures = [executor.submit(completion, *args, model=model, **kwargs) for model in models]
+            for model in models:
+                futures[model] = executor.submit(completion, *args, model=model, **kwargs)

-            for future in concurrent.futures.as_completed(futures):
+            for model, future in sorted(futures.items(), key=lambda x: models.index(x[0])):
                if future.result() is not None:
                    return future.result()

--- a/litellm/tests/test_batch_completions.py
+++ b/litellm/tests/test_batch_completions.py
@ -27,7 +27,7 @@ from litellm import batch_completion, batch_completion_models, completion, batch
 def test_batch_completions_models():
    try:
        result = batch_completion_models(
-            models=["gpt-3.5-turbo", "claude-instant-1.2", "command-nightly"], 
+            models=["gpt-3.5-turbo", "gpt-3.5-turbo", "gpt-3.5-turbo"], 
            messages=[{"role": "user", "content": "Hey, how's it going"}]
        )
        print(result)
@ -37,13 +37,13 @@ def test_batch_completions_models():

 def test_batch_completion_models_all_responses():
    responses = batch_completion_models_all_responses(
-        models=["gpt-3.5-turbo", "claude-instant-1.2", "command-nightly"], 
+        models=["j2-light", "claude-instant-1.2", "command-nightly"], 
        messages=[{"role": "user", "content": "write a poem"}],
        max_tokens=500
    )
    print(responses)
    assert(len(responses) == 3)
-# test_batch_completion_models_all_responses()
+test_batch_completion_models_all_responses()

 # def test_batch_completions():
 #     try:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -100,10 +100,16 @@ class Delta(OpenAIObject):


 class Choices(OpenAIObject):
-    def __init__(self, finish_reason="stop", index=0, message=Message(), **params):
+    def __init__(self, finish_reason=None, index=0, message=None, **params):
        super(Choices, self).__init__(**params)
+        if finish_reason:
            self.finish_reason = finish_reason
+        else:
+            finish_reason = "stop"
        self.index = index
+        if message is None:
+            self.message = Message(content=None)
+        else:
            self.message = message

 class StreamingChoices(OpenAIObject):
@ -126,7 +132,7 @@ class ModelResponse(OpenAIObject):
                self.object = "embedding"
            else:
                self.object = "chat.completion"
-            self.choices = self.choices = choices if choices else [Choices()]
+            self.choices = [Choices()]
        if id is None:
            self.id = _generate_id()
        else:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.716"
+version = "0.1.717"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"