forked from phoenix/litellm-mirror
(feat) caching + stream - bedrock
This commit is contained in:
parent
4b26c550c5
commit
ee3c9d19a2
2 changed files with 14 additions and 4 deletions
|
@ -223,6 +223,13 @@ class Cache:
|
||||||
str: The cache key generated from the arguments, or None if no cache key could be generated.
|
str: The cache key generated from the arguments, or None if no cache key could be generated.
|
||||||
"""
|
"""
|
||||||
cache_key = ""
|
cache_key = ""
|
||||||
|
print_verbose(f"\nGetting Cache key. Kwargs: {kwargs}")
|
||||||
|
|
||||||
|
# for streaming, we use preset_cache_key. It's created in wrapper(), we do this because optional params like max_tokens, get transformed for bedrock -> max_new_tokens
|
||||||
|
if kwargs.get("litellm_params", {}).get("preset_cache_key", None) is not None:
|
||||||
|
print_verbose(f"\nReturning preset cache key: {cache_key}")
|
||||||
|
return kwargs.get("litellm_params", {}).get("preset_cache_key", None)
|
||||||
|
|
||||||
# sort kwargs by keys, since model: [gpt-4, temperature: 0.2, max_tokens: 200] == [temperature: 0.2, max_tokens: 200, model: gpt-4]
|
# sort kwargs by keys, since model: [gpt-4, temperature: 0.2, max_tokens: 200] == [temperature: 0.2, max_tokens: 200, model: gpt-4]
|
||||||
completion_kwargs = ["model", "messages", "temperature", "top_p", "n", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice"]
|
completion_kwargs = ["model", "messages", "temperature", "top_p", "n", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice"]
|
||||||
for param in completion_kwargs:
|
for param in completion_kwargs:
|
||||||
|
@ -245,6 +252,7 @@ class Cache:
|
||||||
continue # ignore None params
|
continue # ignore None params
|
||||||
param_value = kwargs[param]
|
param_value = kwargs[param]
|
||||||
cache_key+= f"{str(param)}: {str(param_value)}"
|
cache_key+= f"{str(param)}: {str(param_value)}"
|
||||||
|
print_verbose(f"\nCreated cache key: {cache_key}")
|
||||||
return cache_key
|
return cache_key
|
||||||
|
|
||||||
def generate_streaming_content(self, content):
|
def generate_streaming_content(self, content):
|
||||||
|
|
|
@ -344,13 +344,14 @@ def completion(
|
||||||
final_prompt_value = kwargs.get("final_prompt_value", None)
|
final_prompt_value = kwargs.get("final_prompt_value", None)
|
||||||
bos_token = kwargs.get("bos_token", None)
|
bos_token = kwargs.get("bos_token", None)
|
||||||
eos_token = kwargs.get("eos_token", None)
|
eos_token = kwargs.get("eos_token", None)
|
||||||
|
preset_cache_key = kwargs.get("preset_cache_key", None)
|
||||||
hf_model_name = kwargs.get("hf_model_name", None)
|
hf_model_name = kwargs.get("hf_model_name", None)
|
||||||
### ASYNC CALLS ###
|
### ASYNC CALLS ###
|
||||||
acompletion = kwargs.get("acompletion", False)
|
acompletion = kwargs.get("acompletion", False)
|
||||||
client = kwargs.get("client", None)
|
client = kwargs.get("client", None)
|
||||||
######## end of unpacking kwargs ###########
|
######## end of unpacking kwargs ###########
|
||||||
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries"]
|
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries"]
|
||||||
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "client", "rpm", "tpm", "input_cost_per_token", "output_cost_per_token", "hf_model_name", "model_info", "proxy_server_request"]
|
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "client", "rpm", "tpm", "input_cost_per_token", "output_cost_per_token", "hf_model_name", "model_info", "proxy_server_request", "preset_cache_key"]
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
||||||
if mock_response:
|
if mock_response:
|
||||||
|
@ -460,7 +461,8 @@ def completion(
|
||||||
completion_call_id=id,
|
completion_call_id=id,
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
proxy_server_request=proxy_server_request
|
proxy_server_request=proxy_server_request,
|
||||||
|
preset_cache_key=preset_cache_key
|
||||||
)
|
)
|
||||||
logging.update_environment_variables(model=model, user=user, optional_params=optional_params, litellm_params=litellm_params)
|
logging.update_environment_variables(model=model, user=user, optional_params=optional_params, litellm_params=litellm_params)
|
||||||
if custom_llm_provider == "azure":
|
if custom_llm_provider == "azure":
|
||||||
|
@ -1784,7 +1786,7 @@ def embedding(
|
||||||
proxy_server_request = kwargs.get("proxy_server_request", None)
|
proxy_server_request = kwargs.get("proxy_server_request", None)
|
||||||
aembedding = kwargs.pop("aembedding", None)
|
aembedding = kwargs.pop("aembedding", None)
|
||||||
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries", "encoding_format"]
|
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key", "deployment_id", "organization", "base_url", "default_headers", "timeout", "response_format", "seed", "tools", "tool_choice", "max_retries", "encoding_format"]
|
||||||
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "client", "rpm", "tpm", "input_cost_per_token", "output_cost_per_token", "hf_model_name", "proxy_server_request", "model_info"]
|
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "client", "rpm", "tpm", "input_cost_per_token", "output_cost_per_token", "hf_model_name", "proxy_server_request", "model_info", "preset_cache_key"]
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
|
||||||
optional_params = {}
|
optional_params = {}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue