mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
LiteLLM Minor Fixes & Improvements (09/27/2024) (#5938)
* fix(langfuse.py): prevent double logging requester metadata Fixes https://github.com/BerriAI/litellm/issues/5935 * build(model_prices_and_context_window.json): add mistral pixtral cost tracking Closes https://github.com/BerriAI/litellm/issues/5837 * handle streaming for azure ai studio error * [Perf Proxy] parallel request limiter - use one cache update call (#5932) * fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf * fix(groq/chat/transformation.py): Fixes https://github.com/BerriAI/litellm/issues/5839 * feat(anthropic/chat.py): return 'retry-after' headers from anthropic Fixes https://github.com/BerriAI/litellm/issues/4387 * feat: raise validation error if message has tool calls without passing `tools` param for anthropic/bedrock Closes https://github.com/BerriAI/litellm/issues/5747 * [Feature]#5940, add max_workers parameter for the batch_completion (#5947) * handle streaming for azure ai studio error * bump: version 1.48.2 → 1.48.3 * docs(data_security.md): add legal/compliance faq's Make it easier for companies to use litellm * docs: resolve imports * [Feature]#5940, add max_workers parameter for the batch_completion method --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local> * fix(converse_transformation.py): fix default message value * fix(utils.py): fix get_model_info to handle finetuned models Fixes issue for standard logging payloads, where model_map_value was null for finetuned openai models * fix(litellm_pre_call_utils.py): add debug statement for data sent after updating with team/key callbacks * fix: fix linting errors * fix(anthropic/chat/handler.py): fix cache creation input tokens * fix(exception_mapping_utils.py): fix missing imports * fix(anthropic/chat/handler.py): fix usage block translation * test: fix test * test: fix tests * style(types/utils.py): trigger new build * test: fix test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Jose Alberto Arango Sanchez <jose.arangos@udea.edu.co> Co-authored-by: josearangos <josearangos@Joses-MacBook-Pro.local>
This commit is contained in:
parent
754981a78f
commit
0b30e212da
35 changed files with 3657 additions and 2820 deletions
|
@ -96,6 +96,7 @@ from .llms.cohere import completion as cohere_completion # type: ignore
|
|||
from .llms.cohere import embed as cohere_embed
|
||||
from .llms.custom_llm import CustomLLM, custom_chat_llm_router
|
||||
from .llms.databricks.chat import DatabricksChatCompletion
|
||||
from .llms.groq.chat.handler import GroqChatCompletion
|
||||
from .llms.huggingface_restapi import Huggingface
|
||||
from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription
|
||||
from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion
|
||||
|
@ -168,6 +169,7 @@ openai_text_completions = OpenAITextCompletion()
|
|||
openai_o1_chat_completions = OpenAIO1ChatCompletion()
|
||||
openai_audio_transcriptions = OpenAIAudioTranscription()
|
||||
databricks_chat_completions = DatabricksChatCompletion()
|
||||
groq_chat_completions = GroqChatCompletion()
|
||||
azure_ai_chat_completions = AzureAIChatCompletion()
|
||||
azure_ai_embedding = AzureAIEmbedding()
|
||||
anthropic_chat_completions = AnthropicChatCompletion()
|
||||
|
@ -958,6 +960,7 @@ def completion(
|
|||
extra_headers=extra_headers,
|
||||
api_version=api_version,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
messages=messages,
|
||||
**non_default_params,
|
||||
)
|
||||
|
||||
|
@ -1318,13 +1321,56 @@ def completion(
|
|||
additional_args={"headers": headers},
|
||||
)
|
||||
response = _response
|
||||
elif custom_llm_provider == "groq":
|
||||
api_base = (
|
||||
api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there
|
||||
or litellm.api_base
|
||||
or get_secret("GROQ_API_BASE")
|
||||
or "https://api.groq.com/openai/v1"
|
||||
)
|
||||
|
||||
# set API KEY
|
||||
api_key = (
|
||||
api_key
|
||||
or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there
|
||||
or litellm.groq_key
|
||||
or get_secret("GROQ_API_KEY")
|
||||
)
|
||||
|
||||
headers = headers or litellm.headers
|
||||
|
||||
## LOAD CONFIG - if set
|
||||
config = litellm.GroqChatConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if (
|
||||
k not in optional_params
|
||||
): # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in
|
||||
optional_params[k] = v
|
||||
|
||||
response = groq_chat_completions.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
headers=headers,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
acompletion=acompletion,
|
||||
logging_obj=logging,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
timeout=timeout, # type: ignore
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
client=client, # pass AsyncOpenAI, OpenAI client
|
||||
organization=organization,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
elif (
|
||||
model in litellm.open_ai_chat_completion_models
|
||||
or custom_llm_provider == "custom_openai"
|
||||
or custom_llm_provider == "deepinfra"
|
||||
or custom_llm_provider == "perplexity"
|
||||
or custom_llm_provider == "groq"
|
||||
or custom_llm_provider == "nvidia_nim"
|
||||
or custom_llm_provider == "cerebras"
|
||||
or custom_llm_provider == "sambanova"
|
||||
|
@ -1431,6 +1477,7 @@ def completion(
|
|||
original_response=response,
|
||||
additional_args={"headers": headers},
|
||||
)
|
||||
|
||||
elif (
|
||||
"replicate" in model
|
||||
or custom_llm_provider == "replicate"
|
||||
|
@ -2933,6 +2980,7 @@ def batch_completion(
|
|||
deployment_id=None,
|
||||
request_timeout: Optional[int] = None,
|
||||
timeout: Optional[int] = 600,
|
||||
max_workers:Optional[int]= 100,
|
||||
# Optional liteLLM function params
|
||||
**kwargs,
|
||||
):
|
||||
|
@ -2956,6 +3004,7 @@ def batch_completion(
|
|||
user (str, optional): The user string for generating completions. Defaults to "".
|
||||
deployment_id (optional): The deployment ID for generating completions. Defaults to None.
|
||||
request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
|
||||
max_workers (int,optional): The maximum number of threads to use for parallel processing.
|
||||
|
||||
Returns:
|
||||
list: A list of completion results.
|
||||
|
@ -3001,7 +3050,7 @@ def batch_completion(
|
|||
for i in range(0, len(lst), n):
|
||||
yield lst[i : i + n]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=100) as executor:
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
for sub_batch in chunks(batch_messages, 100):
|
||||
for message_list in sub_batch:
|
||||
kwargs_modified = args.copy()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue