Merge branch 'main' into main

2025-04-25 18:54:30 +00:00 · 2023-12-18 17:54:34 -08:00 · 2023-12-18 17:54:34 -08:00 · 408f232bd7
commit 408f232bd7
parent e214e6ab47 071283c102
11 changed files with 185 additions and 70 deletions
--- a/docs/my-website/docs/observability/langfuse_integration.md
+++ b/docs/my-website/docs/observability/langfuse_integration.md
@ -15,7 +15,7 @@ join our [discord](https://discord.gg/wuPM9dRgDw)
 ## Pre-Requisites
 Ensure you have run `pip install langfuse` for this integration
 ```shell
-pip install langfuse litellm
+pip install langfuse==1.14.0 litellm
 ```
 ## Quick Start
--- a/docs/my-website/docs/providers/mistral.md
+++ b/docs/my-website/docs/providers/mistral.md
@ -14,7 +14,7 @@ import os
 os.environ['MISTRAL_API_KEY'] = ""
 response = completion(
-    model="mistral/mistral-tiny"", 
+    model="mistral/mistral-tiny", 
    messages=[
       {"role": "user", "content": "hello from litellm"}
   ],
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -461,7 +461,7 @@ We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this
 **Step 1** Install langfuse
 ```shell
-pip install langfuse
+pip install langfuse==1.14.0
 ```
 **Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -284,7 +284,7 @@ class OpenAIChatCompletion(BaseLLM):
                                additional_args={"complete_input_dict": data},
                            )
            return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response)
-        except Exception as e: 
+        except Exception as e:
            raise e
    def streaming(self,
@ -631,24 +631,27 @@ class OpenAITextCompletion(BaseLLM):
                        api_key: str, 
                        model: str): 
        async with httpx.AsyncClient() as client:
-            response = await client.post(api_base, json=data, headers=headers, timeout=litellm.request_timeout) 
+            try: 
-            response_json = response.json()
+                response = await client.post(api_base, json=data, headers=headers, timeout=litellm.request_timeout) 
-            if response.status_code != 200:
+                response_json = response.json()
-                raise OpenAIError(status_code=response.status_code, message=response.text)
+                if response.status_code != 200:
-                
+                    raise OpenAIError(status_code=response.status_code, message=response.text)
-            ## LOGGING
+                    
-            logging_obj.post_call(
+                ## LOGGING
-                input=prompt,
+                logging_obj.post_call(
-                api_key=api_key,
+                    input=prompt,
-                original_response=response,
+                    api_key=api_key,
-                additional_args={
+                    original_response=response,
-                    "headers": headers,
+                    additional_args={
-                    "api_base": api_base,
+                        "headers": headers,
-                },
+                        "api_base": api_base,
-            )
+                    },
                )
-            ## RESPONSE OBJECT
+                ## RESPONSE OBJECT
-            return self.convert_to_model_response_object(response_object=response_json, model_response_object=model_response)
+                return self.convert_to_model_response_object(response_object=response_json, model_response_object=model_response)
            except Exception as e: 
                raise e
    def streaming(self,
                  logging_obj,
@ -687,9 +690,12 @@ class OpenAITextCompletion(BaseLLM):
                    method="POST",
                    timeout=litellm.request_timeout
                ) as response: 
-            if response.status_code != 200:
+            try: 
-                raise OpenAIError(status_code=response.status_code, message=response.text)
+                if response.status_code != 200:
-            
+                    raise OpenAIError(status_code=response.status_code, message=response.text)
-            streamwrapper = CustomStreamWrapper(completion_stream=response.aiter_lines(), model=model, custom_llm_provider="text-completion-openai",logging_obj=logging_obj)
+                
-            async for transformed_chunk in streamwrapper:
+                streamwrapper = CustomStreamWrapper(completion_stream=response.aiter_lines(), model=model, custom_llm_provider="text-completion-openai",logging_obj=logging_obj)
-                yield transformed_chunk
+                async for transformed_chunk in streamwrapper:
                    yield transformed_chunk
            except Exception as e: 
                raise e
--- a/litellm/llms/openrouter.py
+++ b/litellm/llms/openrouter.py
@ -0,0 +1,30 @@
 from typing import List, Dict
 import types
 class OpenrouterConfig():
    """
    Reference: https://openrouter.ai/docs#format
    """
    # OpenRouter-only parameters
    extra_body: Dict[str, List[str]] = {
        'transforms': [] # default transforms to []
    }
    def __init__(self, 
                 transforms: List[str] = [],
                 models: List[str] = [],
                 route: str = '',
                ) -> None:
        locals_ = locals()
        for key, value in locals_.items():
            if key != 'self' and value is not None:
                setattr(self.__class__, key, value)
    @classmethod
    def get_config(cls):
        return {k: v for k, v in cls.__dict__.items() 
                if not k.startswith('__') 
                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod)) 
                and v is not None}
--- a/litellm/main.py
+++ b/litellm/main.py
@ -52,6 +52,7 @@ from .llms import (
    cohere,
    petals,
    oobabooga,
    openrouter,
    palm,
    vertex_ai,
    maritalk)
@ -260,8 +261,8 @@ def completion(
    model: str,
    # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
    messages: List = [],
-    functions: List = [],
+    functions: Optional[List] = None,
-    function_call: str = "",  # optional params
+    function_call: Optional[str] = None,
    timeout: Optional[Union[float, int]] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
@ -1026,14 +1027,23 @@ def completion(
                }
            )
            ## Load Config
            config = openrouter.OpenrouterConfig.get_config() 
            for k, v in config.items(): 
                if k == "extra_body":
                    # we use openai 'extra_body' to pass openrouter specific params - transforms, route, models
                    if "extra_body" in  optional_params:
                        optional_params[k].update(v)
                    else:
                        optional_params[k] = v
                elif k not in optional_params: 
                    optional_params[k] = v
            data = {
                "model": model, 
                "messages": messages,  
                **optional_params
            }
            ## LOGGING
            logging.pre_call(input=messages, api_key=openai.api_key, additional_args={"complete_input_dict": data, "headers": headers})
            ## COMPLETION CALL
            ## COMPLETION CALL
            response = openai_chat_completions.completion(
@ -1510,8 +1520,8 @@ def batch_completion(
    model: str,
    # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
    messages: List = [],
-    functions: List = [],
+    functions: Optional[List] = None,
-    function_call: str = "",  # optional params
+    function_call: Optional[str] = None,
    temperature: Optional[float] = None,
    top_p: Optional[float] = None,
    n: Optional[int] = None,
@ -2193,10 +2203,8 @@ def text_completion(
    if stream == True or kwargs.get("stream", False) == True:
        response = TextCompletionStreamWrapper(completion_stream=response, model=model)
        return response
-
+    if kwargs.get("acompletion", False) == True: 
-    if asyncio.iscoroutine(response):
+        return response
        response = asyncio.run(response)
    transformed_logprobs = None
    # only supported for TGI models
    try:
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -47,7 +47,7 @@ litellm_settings:
  # setting callback class
  # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
-general_settings: 
+# general_settings: 
 environment_variables: 
  # otel: True          # OpenTelemetry Logger
--- a/litellm/router.py
+++ b/litellm/router.py
@ -84,11 +84,11 @@ class Router:
        self.set_verbose = set_verbose 
        self.deployment_names: List = [] # names of models under litellm_params. ex. azure/chatgpt-v-2
        self.deployment_latency_map = {}
        if model_list:
            model_list = copy.deepcopy(model_list)
            self.set_model_list(model_list)
            self.healthy_deployments: List = self.model_list
            self.deployment_latency_map = {}
            for m in model_list: 
                self.deployment_latency_map[m["litellm_params"]["model"]] = 0
--- a/litellm/tests/test_text_completion.py
+++ b/litellm/tests/test_text_completion.py
@ -169,17 +169,37 @@ def test_text_completion_stream():
 # test_text_completion_stream()
-async def test_text_completion_async_stream():
+# async def test_text_completion_async_stream():
-    try:
+#     try:
-        response = await atext_completion(
+#         response = await atext_completion(
-                model="text-completion-openai/text-davinci-003", 
+#                 model="text-completion-openai/text-davinci-003", 
-                prompt="good morning",
+#                 prompt="good morning",
-                stream=True,
+#                 stream=True,
-                max_tokens=10,
+#                 max_tokens=10,
-            )
+#             )
-        async for chunk in response:
+#         async for chunk in response:
-            print(f"chunk: {chunk}")
+#             print(f"chunk: {chunk}")
-    except Exception as e:
+#     except Exception as e:
-        pytest.fail(f"GOT exception for HF In streaming{e}")
+#         pytest.fail(f"GOT exception for HF In streaming{e}")
-asyncio.run(test_text_completion_async_stream())
+# asyncio.run(test_text_completion_async_stream())
 def test_async_text_completion():
    litellm.set_verbose = True
    print('test_async_text_completion')
    async def test_get_response():
        try:
            response = await litellm.atext_completion(
                model="gpt-3.5-turbo-instruct", 
                prompt="good morning",
                stream=False,
                max_tokens=10
            )
            print(f"response: {response}")
        except litellm.Timeout as e: 
            print(e)
        except Exception as e: 
            print(e)
    asyncio.run(test_get_response())
 test_async_text_completion()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2336,8 +2336,8 @@ def get_optional_params_embeddings(
 def get_optional_params(  # use the openai defaults
    # 12 optional params
-    functions=[],
+    functions=None,
-    function_call="",
+    function_call=None,
    temperature=None,
    top_p=None,
    n=None,
@ -2363,8 +2363,8 @@ def get_optional_params(  # use the openai defaults
    for k, v in special_params.items():
        passed_params[k] = v
    default_params = {
-        "functions":[],
+        "functions": None,
-        "function_call":"",
+        "function_call": None,
        "temperature":None,
        "top_p":None,
        "n":None,
@ -2851,6 +2851,57 @@ def get_optional_params(  # use the openai defaults
        if random_seed is not None:
            extra_body["random_seed"] = random_seed
        optional_params["extra_body"] = extra_body # openai client supports `extra_body` param
    elif custom_llm_provider == "openrouter":
        supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice", "max_retries"]
        _check_valid_arg(supported_params=supported_params)
        if functions is not None:
            optional_params["functions"] = functions
        if function_call is not None:
            optional_params["function_call"] = function_call
        if temperature is not None:
            optional_params["temperature"] = temperature
        if top_p is not None:
            optional_params["top_p"] = top_p
        if n is not None:
            optional_params["n"] = n
        if stream is not None:
            optional_params["stream"] = stream
        if stop is not None:
            optional_params["stop"] = stop
        if max_tokens is not None:
            optional_params["max_tokens"] = max_tokens
        if presence_penalty is not None:
            optional_params["presence_penalty"] = presence_penalty
        if frequency_penalty is not None:
            optional_params["frequency_penalty"] = frequency_penalty
        if logit_bias is not None:
            optional_params["logit_bias"] = logit_bias
        if user is not None:
            optional_params["user"] = user
        if response_format is not None:
            optional_params["response_format"] = response_format
        if seed is not None:
            optional_params["seed"] = seed
        if tools is not None:
            optional_params["tools"] = tools
        if tool_choice is not None:
            optional_params["tool_choice"] = tool_choice
        if max_retries is not None:
            optional_params["max_retries"] = max_retries
        # OpenRouter-only parameters
        extra_body = {}
        transforms = passed_params.pop("transforms", None)
        models = passed_params.pop("models", None)
        route = passed_params.pop("route", None)
        if transforms is not None:
            extra_body["transforms"] = transforms
        if models is not None:
            extra_body["models"] = models
        if route is not None:
            extra_body["route"] = route
        optional_params["extra_body"] = extra_body # openai client supports `extra_body` param
    else:  # assume passing in params for openai/azure openai
        supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice", "max_retries"]
        _check_valid_arg(supported_params=supported_params)
@ -3962,7 +4013,7 @@ def convert_to_model_response_object(response_object: Optional[dict]=None, model
                    raise Exception("Error in response object format")
                if model_response_object is None: 
-                    model_response_object = EmbeddingResponse()
+                    model_response_object = ImageResponse()
                if "created" in response_object: 
                    model_response_object.created = response_object["created"]
--- a/requirements.txt
+++ b/requirements.txt
@ -3,24 +3,24 @@ anyio==4.2.0 # openai + http req.
 openai>=1.0.0 # openai req. 
 fastapi # server dep
 pydantic>=2.5 # openai req. 
-appdirs # server dep
+appdirs==1.4.4 # server dep
-backoff # server dep
+backoff==2.2.1 # server dep
-pyyaml # server dep
+pyyaml==6.0 # server dep
-uvicorn # server dep
+uvicorn==0.22.0 # server dep
-boto3 # aws bedrock/sagemaker calls
+boto3==1.28.58 # aws bedrock/sagemaker calls
-redis # caching
+redis==4.6.0 # caching
-prisma # for db
+prisma==0.11.0 # for db
-mangum # for aws lambda functions
+mangum==0.17.0 # for aws lambda functions
-google-generativeai # for vertex ai calls
+google-generativeai==0.1.0 # for vertex ai calls
 traceloop-sdk==0.5.3 # for open telemetry logging
 langfuse==1.14.0 # for langfuse self-hosted logging
 ### LITELLM PACKAGE DEPENDENCIES
 python-dotenv>=0.2.0 # for env 
 tiktoken>=0.4.0 # for calculating usage
 importlib-metadata>=6.8.0 # for random utils
-tokenizers # for calculating usage
+tokenizers==0.14.0 # for calculating usage
-click # for proxy cli 
+click==8.1.7 # for proxy cli 
 jinja2==3.1.2 # for prompt templates
 certifi>=2023.7.22 # [TODO] clean up 
-aiohttp # for network calls
+aiohttp==3.8.4 # for network calls
 ####