LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)

* fix(langfuse.py): prevent double logging requester metadata Fixes https://github.com/BerriAI/litellm/issues/5935 * build(model_prices_and_context_window.json): add mistral pixtral cost tracking Closes https://github.com/BerriAI/litellm/issues/5837 * handle streaming for azure ai studio error * [Perf Proxy] parallel request limiter - use one cache update call (#5932) * fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf * fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839 * feat(anthropic/chat.py): return 'retry-after' headers from anthropic Fixes https://github.com/BerriAI/litellm/issues/4387 * feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock Closes https://github.com/BerriAI/litellm/issues/5747 * [Feature]#5940, add max_workers parameter for the batch_completion (#5947) * handle streaming for azure ai studio error * bump: version 1.48.2 → 1.48.3 * docs(data_security.md): add legal/compliance faq's Make it easier for companies to use litellm * docs: resolve imports * [Feature]#5940, add max_workers parameter for the batch_completion method --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local> * fix(converse_transformation.py): fix default message value * fix(utils.py): fix get_model_info to handle finetuned models Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models * fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks * fix: fix linting errors * fix(anthropic/chat/handler.py): fix cache creation input tokens * fix(exception_mapping_utils.py): fix missing imports * fix(anthropic/chat/handler.py): fix usage block translation * test: fix test * test: fix tests * style(types/utils.py): trigger new build * test: fix test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
2025-04-25 10:44:24 +00:00 · 2024-09-27 22:52:57 -07:00 · 2024-09-27 22:52:57 -07:00 · 0b30e212da
commit 0b30e212da
parent 754981a78f
35 changed files with 3657 additions and 2820 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -96,6 +96,7 @@ from .llms.cohere import completion as cohere_completion  # type: ignore
 from .llms.cohere import embed as cohere_embed
 from .llms.custom_llm import CustomLLM, custom_chat_llm_router
 from .llms.databricks.chat import DatabricksChatCompletion
+from .llms.groq.chat.handler import GroqChatCompletion
 from .llms.huggingface_restapi import Huggingface
 from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
 from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
@ -168,6 +169,7 @@ openai_text_completions = OpenAITextCompletion()
 openai_o1_chat_completions = OpenAIO1ChatCompletion()
 openai_audio_transcriptions = OpenAIAudioTranscription()
 databricks_chat_completions = DatabricksChatCompletion()
+groq_chat_completions = GroqChatCompletion()
 azure_ai_chat_completions = AzureAIChatCompletion()
 azure_ai_embedding = AzureAIEmbedding()
 anthropic_chat_completions = AnthropicChatCompletion()
@ -958,6 +960,7 @@ def completion(
            extra_headers=extra_headers,
            api_version=api_version,
            parallel_tool_calls=parallel_tool_calls,
+            messages=messages,
            **non_default_params,
        )

@ -1318,13 +1321,56 @@ def completion(
                    additional_args={"headers": headers},
                )
            response = _response
+        elif custom_llm_provider == "groq":
+            api_base = (
+                api_base  # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
+                or litellm.api_base
+                or get_secret("GROQ_API_BASE")
+                or "https://api.groq.com/openai/v1"
+            )

+            # set API KEY
+            api_key = (
+                api_key
+                or litellm.api_key  # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
+                or litellm.groq_key
+                or get_secret("GROQ_API_KEY")
+            )
+
+            headers = headers or litellm.headers
+
+            ## LOAD CONFIG - if set
+            config = litellm.GroqChatConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in optional_params
+                ):  # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
+                    optional_params[k] = v
+
+            response = groq_chat_completions.completion(
+                model=model,
+                messages=messages,
+                headers=headers,
+                model_response=model_response,
+                print_verbose=print_verbose,
+                api_key=api_key,
+                api_base=api_base,
+                acompletion=acompletion,
+                logging_obj=logging,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                logger_fn=logger_fn,
+                timeout=timeout,  # type: ignore
+                custom_prompt_dict=custom_prompt_dict,
+                client=client,  # pass AsyncOpenAI, OpenAI client
+                organization=organization,
+                custom_llm_provider=custom_llm_provider,
+            )
        elif (
            model in litellm.open_ai_chat_completion_models
            or custom_llm_provider == "custom_openai"
            or custom_llm_provider == "deepinfra"
            or custom_llm_provider == "perplexity"
-            or custom_llm_provider == "groq"
            or custom_llm_provider == "nvidia_nim"
            or custom_llm_provider == "cerebras"
            or custom_llm_provider == "sambanova"
@ -1431,6 +1477,7 @@ def completion(
                    original_response=response,
                    additional_args={"headers": headers},
                )
+
        elif (
            "replicate" in model
            or custom_llm_provider == "replicate"
@ -2933,6 +2980,7 @@ def batch_completion(
    deployment_id=None,
    request_timeout: Optional[int] = None,
    timeout: Optional[int] = 600,
+    max_workers:Optional[int]= 100,
    # Optional liteLLM function params
    **kwargs,
 ):
@ -2956,6 +3004,7 @@ def batch_completion(
        user (str, optional): The user string for generating completions. Defaults to "".
        deployment_id (optional): The deployment ID for generating completions. Defaults to None.
        request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
+        max_workers (int,optional): The maximum number of threads to use for parallel processing.

    Returns:
        list: A list of completion results.
@ -3001,7 +3050,7 @@ def batch_completion(
            for i in range(0, len(lst), n):
                yield lst[i : i + n]

-        with ThreadPoolExecutor(max_workers=100) as executor:
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            for sub_batch in chunks(batch_messages, 100):
                for message_list in sub_batch:
                    kwargs_modified = args.copy()