From 136693cac4fd86d8be791837f88f07d795759fea Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Thu, 7 Nov 2024 04:17:05 +0530 Subject: [PATCH] LiteLLM Minor Fixes & Improvements (11/05/2024) (#6590) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(pattern_matching_router.py): update model name using correct function * fix(langfuse.py): metadata deepcopy can cause unhandled error (#6563) Co-authored-by: seva * fix(stream_chunk_builder_utils.py): correctly set prompt tokens + log correct streaming usage Closes https://github.com/BerriAI/litellm/issues/6488 * build(deps): bump cookie and express in /docs/my-website (#6566) Bumps [cookie](https://github.com/jshttp/cookie) and [express](https://github.com/expressjs/express). These dependencies needed to be updated together. Updates `cookie` from 0.6.0 to 0.7.1 - [Release notes](https://github.com/jshttp/cookie/releases) - [Commits](https://github.com/jshttp/cookie/compare/v0.6.0...v0.7.1) Updates `express` from 4.20.0 to 4.21.1 - [Release notes](https://github.com/expressjs/express/releases) - [Changelog](https://github.com/expressjs/express/blob/4.21.1/History.md) - [Commits](https://github.com/expressjs/express/compare/4.20.0...4.21.1) --- updated-dependencies: - dependency-name: cookie dependency-type: indirect - dependency-name: express dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * docs(virtual_keys.md): update Dockerfile reference (#6554) Signed-off-by: Emmanuel Ferdman * (proxy fix) - call connect on prisma client when running setup (#6534) * critical fix - call connect on prisma client when running setup * fix test_proxy_server_prisma_setup * fix test_proxy_server_prisma_setup * Add 3.5 haiku (#6588) * feat: add claude-3-5-haiku-20241022 entries * feat: add claude-3-5-haiku-20241022 and vertex_ai/claude-3-5-haiku@20241022 models * add missing entries, remove vision * remove image token costs * Litellm perf improvements 3 (#6573) * perf: move writing key to cache, to background task * perf(litellm_pre_call_utils.py): add otel tracing for pre-call utils adds 200ms on calls with pgdb connected * fix(litellm_pre_call_utils.py'): rename call_type to actual call used * perf(proxy_server.py): remove db logic from _get_config_from_file was causing db calls to occur on every llm request, if team_id was set on key * fix(auth_checks.py): add check for reducing db calls if user/team id does not exist in db reduces latency/call by ~100ms * fix(proxy_server.py): minor fix on existing_settings not incl alerting * fix(exception_mapping_utils.py): map databricks exception string * fix(auth_checks.py): fix auth check logic * test: correctly mark flaky test * fix(utils.py): handle auth token error for tokenizers.from_pretrained * build: fix map * build: fix map * build: fix json for model map * fix ImageObject conversion (#6584) * (fix) litellm.text_completion raises a non-blocking error on simple usage (#6546) * unit test test_huggingface_text_completion_logprobs * fix return TextCompletionHandler convert_chat_to_text_completion * fix hf rest api * fix test_huggingface_text_completion_logprobs * fix linting errors * fix importLiteLLMResponseObjectHandler * fix test for LiteLLMResponseObjectHandler * fix test text completion * fix allow using 15 seconds for premium license check * testing fix bedrock deprecated cohere.command-text-v14 * (feat) add `Predicted Outputs` for OpenAI (#6594) * bump openai to openai==1.54.0 * add 'prediction' param * testing fix bedrock deprecated cohere.command-text-v14 * test test_openai_prediction_param.py * test_openai_prediction_param_with_caching * doc Predicted Outputs * doc Predicted Output * (fix) Vertex Improve Performance when using `image_url` (#6593) * fix transformation vertex * test test_process_gemini_image * test_image_completion_request * testing fix - bedrock has deprecated cohere.command-text-v14 * fix vertex pdf * bump: version 1.51.5 → 1.52.0 * fix(lowest_tpm_rpm_routing.py): fix parallel rate limit check (#6577) * fix(lowest_tpm_rpm_routing.py): fix parallel rate limit check * fix(lowest_tpm_rpm_v2.py): return headers in correct format * test: update test * build(deps): bump cookie and express in /docs/my-website (#6566) Bumps [cookie](https://github.com/jshttp/cookie) and [express](https://github.com/expressjs/express). These dependencies needed to be updated together. Updates `cookie` from 0.6.0 to 0.7.1 - [Release notes](https://github.com/jshttp/cookie/releases) - [Commits](https://github.com/jshttp/cookie/compare/v0.6.0...v0.7.1) Updates `express` from 4.20.0 to 4.21.1 - [Release notes](https://github.com/expressjs/express/releases) - [Changelog](https://github.com/expressjs/express/blob/4.21.1/History.md) - [Commits](https://github.com/expressjs/express/compare/4.20.0...4.21.1) --- updated-dependencies: - dependency-name: cookie dependency-type: indirect - dependency-name: express dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * docs(virtual_keys.md): update Dockerfile reference (#6554) Signed-off-by: Emmanuel Ferdman * (proxy fix) - call connect on prisma client when running setup (#6534) * critical fix - call connect on prisma client when running setup * fix test_proxy_server_prisma_setup * fix test_proxy_server_prisma_setup * Add 3.5 haiku (#6588) * feat: add claude-3-5-haiku-20241022 entries * feat: add claude-3-5-haiku-20241022 and vertex_ai/claude-3-5-haiku@20241022 models * add missing entries, remove vision * remove image token costs * Litellm perf improvements 3 (#6573) * perf: move writing key to cache, to background task * perf(litellm_pre_call_utils.py): add otel tracing for pre-call utils adds 200ms on calls with pgdb connected * fix(litellm_pre_call_utils.py'): rename call_type to actual call used * perf(proxy_server.py): remove db logic from _get_config_from_file was causing db calls to occur on every llm request, if team_id was set on key * fix(auth_checks.py): add check for reducing db calls if user/team id does not exist in db reduces latency/call by ~100ms * fix(proxy_server.py): minor fix on existing_settings not incl alerting * fix(exception_mapping_utils.py): map databricks exception string * fix(auth_checks.py): fix auth check logic * test: correctly mark flaky test * fix(utils.py): handle auth token error for tokenizers.from_pretrained * build: fix map * build: fix map * build: fix json for model map * test: remove eol model * fix(proxy_server.py): fix db config loading logic * fix(proxy_server.py): fix order of config / db updates, to ensure fields not overwritten * test: skip test if required env var is missing * test: fix test --------- Signed-off-by: dependabot[bot] Signed-off-by: Emmanuel Ferdman Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Emmanuel Ferdman Co-authored-by: Ishaan Jaff Co-authored-by: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com> * test: mark flaky test * test: handle anthropic api instability * test(test_proxy_utils.py): add testing for db config update logic * Update setuptools in docker and fastapi to latest verison, in order to upgrade starlette version (#6597) * build(deps): bump cookie and express in /docs/my-website (#6566) Bumps [cookie](https://github.com/jshttp/cookie) and [express](https://github.com/expressjs/express). These dependencies needed to be updated together. Updates `cookie` from 0.6.0 to 0.7.1 - [Release notes](https://github.com/jshttp/cookie/releases) - [Commits](https://github.com/jshttp/cookie/compare/v0.6.0...v0.7.1) Updates `express` from 4.20.0 to 4.21.1 - [Release notes](https://github.com/expressjs/express/releases) - [Changelog](https://github.com/expressjs/express/blob/4.21.1/History.md) - [Commits](https://github.com/expressjs/express/compare/4.20.0...4.21.1) --- updated-dependencies: - dependency-name: cookie dependency-type: indirect - dependency-name: express dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * docs(virtual_keys.md): update Dockerfile reference (#6554) Signed-off-by: Emmanuel Ferdman * (proxy fix) - call connect on prisma client when running setup (#6534) * critical fix - call connect on prisma client when running setup * fix test_proxy_server_prisma_setup * fix test_proxy_server_prisma_setup * Add 3.5 haiku (#6588) * feat: add claude-3-5-haiku-20241022 entries * feat: add claude-3-5-haiku-20241022 and vertex_ai/claude-3-5-haiku@20241022 models * add missing entries, remove vision * remove image token costs * Litellm perf improvements 3 (#6573) * perf: move writing key to cache, to background task * perf(litellm_pre_call_utils.py): add otel tracing for pre-call utils adds 200ms on calls with pgdb connected * fix(litellm_pre_call_utils.py'): rename call_type to actual call used * perf(proxy_server.py): remove db logic from _get_config_from_file was causing db calls to occur on every llm request, if team_id was set on key * fix(auth_checks.py): add check for reducing db calls if user/team id does not exist in db reduces latency/call by ~100ms * fix(proxy_server.py): minor fix on existing_settings not incl alerting * fix(exception_mapping_utils.py): map databricks exception string * fix(auth_checks.py): fix auth check logic * test: correctly mark flaky test * fix(utils.py): handle auth token error for tokenizers.from_pretrained * build: fix map * build: fix map * build: fix json for model map * fix ImageObject conversion (#6584) * (fix) litellm.text_completion raises a non-blocking error on simple usage (#6546) * unit test test_huggingface_text_completion_logprobs * fix return TextCompletionHandler convert_chat_to_text_completion * fix hf rest api * fix test_huggingface_text_completion_logprobs * fix linting errors * fix importLiteLLMResponseObjectHandler * fix test for LiteLLMResponseObjectHandler * fix test text completion * fix allow using 15 seconds for premium license check * testing fix bedrock deprecated cohere.command-text-v14 * (feat) add `Predicted Outputs` for OpenAI (#6594) * bump openai to openai==1.54.0 * add 'prediction' param * testing fix bedrock deprecated cohere.command-text-v14 * test test_openai_prediction_param.py * test_openai_prediction_param_with_caching * doc Predicted Outputs * doc Predicted Output * (fix) Vertex Improve Performance when using `image_url` (#6593) * fix transformation vertex * test test_process_gemini_image * test_image_completion_request * testing fix - bedrock has deprecated cohere.command-text-v14 * fix vertex pdf * bump: version 1.51.5 → 1.52.0 * Update setuptools in docker and fastapi to latest verison, in order to upgrade starlette version --------- Signed-off-by: dependabot[bot] Signed-off-by: Emmanuel Ferdman Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Emmanuel Ferdman Co-authored-by: Ishaan Jaff Co-authored-by: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com> Co-authored-by: Krish Dholakia Co-authored-by: Jacob Hagstedt * fix(langfuse.py): fix linting errors * fix: fix linting errors * fix: fix casting error * fix: fix typing error * fix: add more tests * fix(utils.py): fix return_processed_chunk_logic * Revert "Update setuptools in docker and fastapi to latest verison, in order t…" (#6615) This reverts commit 1a7f7bdfb75df0efbc930b7f2e39febc80e97d5a. * docs fix clarify team_id on team based logging * doc fix team based logging with langfuse * fix flake8 checks * test: bump sleep time * refactor: replace claude-instant-1.2 with haiku in testing * fix(proxy_server.py): move to using sl payload in track_cost_callback * fix(proxy_server.py): fix linting errors * fix(proxy_server.py): fallback to kwargs(response_cost) if given * test: remove claude-instant-1 from tests * test: fix claude test * docs fix clarify team_id on team based logging * doc fix team based logging with langfuse * build: remove lint.yml --------- Signed-off-by: dependabot[bot] Signed-off-by: Emmanuel Ferdman Co-authored-by: Vsevolod Karvetskiy <56288164+karvetskiy@users.noreply.github.com> Co-authored-by: seva Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Emmanuel Ferdman Co-authored-by: Ishaan Jaff Co-authored-by: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com> Co-authored-by: Jacob Hagstedt P Suorra Co-authored-by: Jacob Hagstedt --- litellm/integrations/langfuse/langfuse.py | 84 ++-- .../streaming_chunk_builder_utils.py | 96 ++-- litellm/llms/anthropic/chat/handler.py | 1 + litellm/proxy/_new_secret_config.yaml | 13 +- litellm/proxy/proxy_server.py | 26 +- .../router_utils/pattern_match_deployments.py | 33 +- litellm/types/utils.py | 57 ++- litellm/utils.py | 419 ++++++++---------- .../test_add_function_to_prompt.py | 36 +- tests/local_testing/test_alangfuse.py | 22 - tests/local_testing/test_batch_completions.py | 2 +- tests/local_testing/test_completion.py | 2 +- .../test_custom_callback_input.py | 62 +++ tests/local_testing/test_exceptions.py | 4 +- tests/local_testing/test_langsmith.py | 65 --- tests/local_testing/test_logging.py | 8 +- .../test_model_response_typing/test.py | 2 +- tests/local_testing/test_prometheus.py | 59 +-- .../test_promptlayer_integration.py | 2 +- .../test_provider_specific_config.py | 6 +- tests/local_testing/test_proxy_utils.py | 47 +- tests/local_testing/test_router.py | 2 +- tests/local_testing/test_router_fallbacks.py | 8 +- .../test_router_pattern_matching.py | 33 ++ tests/local_testing/test_router_timeout.py | 6 +- tests/local_testing/test_streaming.py | 4 +- tests/local_testing/test_token_counter.py | 13 +- tests/local_testing/test_traceloop.py | 2 +- tests/local_testing/test_wandb.py | 2 +- .../test_langfuse_unit_tests.py | 32 +- .../test_router_helper_utils.py | 13 + tests/test_keys.py | 6 +- 32 files changed, 634 insertions(+), 533 deletions(-) diff --git a/litellm/integrations/langfuse/langfuse.py b/litellm/integrations/langfuse/langfuse.py index 182c886376..18892871e4 100644 --- a/litellm/integrations/langfuse/langfuse.py +++ b/litellm/integrations/langfuse/langfuse.py @@ -1,9 +1,9 @@ #### What this does #### # On success, logs events to Langfuse import copy -import inspect import os import traceback +from collections.abc import MutableMapping, MutableSequence, MutableSet from typing import TYPE_CHECKING, Any, Dict, Optional from packaging.version import Version @@ -14,7 +14,7 @@ from litellm._logging import verbose_logger from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info from litellm.secret_managers.main import str_to_bool from litellm.types.integrations.langfuse import * -from litellm.types.utils import StandardCallbackDynamicParams, StandardLoggingPayload +from litellm.types.utils import StandardLoggingPayload if TYPE_CHECKING: from litellm.litellm_core_utils.litellm_logging import DynamicLoggingCache @@ -355,6 +355,47 @@ class LangFuseLogger: ) ) + def _prepare_metadata(self, metadata) -> Any: + try: + return copy.deepcopy(metadata) # Avoid modifying the original metadata + except (TypeError, copy.Error) as e: + verbose_logger.warning(f"Langfuse Layer Error - {e}") + + new_metadata: Dict[str, Any] = {} + + # if metadata is not a MutableMapping, return an empty dict since we can't call items() on it + if not isinstance(metadata, MutableMapping): + verbose_logger.warning( + "Langfuse Layer Logging - metadata is not a MutableMapping, returning empty dict" + ) + return new_metadata + + for key, value in metadata.items(): + try: + if isinstance(value, MutableMapping): + new_metadata[key] = self._prepare_metadata(value) + elif isinstance(value, (MutableSequence, MutableSet)): + new_metadata[key] = type(value)( + *( + ( + self._prepare_metadata(v) + if isinstance(v, MutableMapping) + else copy.deepcopy(v) + ) + for v in value + ) + ) + elif isinstance(value, BaseModel): + new_metadata[key] = value.model_dump() + else: + new_metadata[key] = copy.deepcopy(value) + except (TypeError, copy.Error): + verbose_logger.warning( + f"Langfuse Layer Error - Couldn't copy metadata key: {key} - {traceback.format_exc()}" + ) + + return new_metadata + def _log_langfuse_v2( # noqa: PLR0915 self, user_id, @@ -373,40 +414,19 @@ class LangFuseLogger: ) -> tuple: import langfuse + print_verbose("Langfuse Layer Logging - logging to langfuse v2") + try: - tags = [] - try: - optional_params.pop("metadata") - metadata = copy.deepcopy( - metadata - ) # Avoid modifying the original metadata - except Exception: - new_metadata = {} - for key, value in metadata.items(): - if ( - isinstance(value, list) - or isinstance(value, dict) - or isinstance(value, str) - or isinstance(value, int) - or isinstance(value, float) - ): - new_metadata[key] = copy.deepcopy(value) - elif isinstance(value, BaseModel): - new_metadata[key] = value.model_dump() - metadata = new_metadata + metadata = self._prepare_metadata(metadata) - supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3") - supports_prompt = Version(langfuse.version.__version__) >= Version("2.7.3") - supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3") - supports_completion_start_time = Version( - langfuse.version.__version__ - ) >= Version("2.7.3") + langfuse_version = Version(langfuse.version.__version__) - print_verbose("Langfuse Layer Logging - logging to langfuse v2 ") + supports_tags = langfuse_version >= Version("2.6.3") + supports_prompt = langfuse_version >= Version("2.7.3") + supports_costs = langfuse_version >= Version("2.7.3") + supports_completion_start_time = langfuse_version >= Version("2.7.3") - if supports_tags: - metadata_tags = metadata.pop("tags", []) - tags = metadata_tags + tags = metadata.pop("tags", []) if supports_tags else [] # Clean Metadata before logging - never log raw metadata # the raw metadata can contain circular references which leads to infinite recursion diff --git a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py index 1ca6a6fd6d..a198a90f73 100644 --- a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py +++ b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py @@ -243,6 +243,49 @@ class ChunkProcessor: id=id, ) + def _usage_chunk_calculation_helper(self, usage_chunk: Usage) -> dict: + prompt_tokens = 0 + completion_tokens = 0 + ## anthropic prompt caching information ## + cache_creation_input_tokens: Optional[int] = None + cache_read_input_tokens: Optional[int] = None + completion_tokens_details: Optional[CompletionTokensDetails] = None + prompt_tokens_details: Optional[PromptTokensDetails] = None + + if "prompt_tokens" in usage_chunk: + prompt_tokens = usage_chunk.get("prompt_tokens", 0) or 0 + if "completion_tokens" in usage_chunk: + completion_tokens = usage_chunk.get("completion_tokens", 0) or 0 + if "cache_creation_input_tokens" in usage_chunk: + cache_creation_input_tokens = usage_chunk.get("cache_creation_input_tokens") + if "cache_read_input_tokens" in usage_chunk: + cache_read_input_tokens = usage_chunk.get("cache_read_input_tokens") + if hasattr(usage_chunk, "completion_tokens_details"): + if isinstance(usage_chunk.completion_tokens_details, dict): + completion_tokens_details = CompletionTokensDetails( + **usage_chunk.completion_tokens_details + ) + elif isinstance( + usage_chunk.completion_tokens_details, CompletionTokensDetails + ): + completion_tokens_details = usage_chunk.completion_tokens_details + if hasattr(usage_chunk, "prompt_tokens_details"): + if isinstance(usage_chunk.prompt_tokens_details, dict): + prompt_tokens_details = PromptTokensDetails( + **usage_chunk.prompt_tokens_details + ) + elif isinstance(usage_chunk.prompt_tokens_details, PromptTokensDetails): + prompt_tokens_details = usage_chunk.prompt_tokens_details + + return { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "cache_creation_input_tokens": cache_creation_input_tokens, + "cache_read_input_tokens": cache_read_input_tokens, + "completion_tokens_details": completion_tokens_details, + "prompt_tokens_details": prompt_tokens_details, + } + def calculate_usage( self, chunks: List[Union[Dict[str, Any], ModelResponse]], @@ -269,37 +312,30 @@ class ChunkProcessor: elif isinstance(chunk, ModelResponse) and hasattr(chunk, "_hidden_params"): usage_chunk = chunk._hidden_params.get("usage", None) if usage_chunk is not None: - if "prompt_tokens" in usage_chunk: - prompt_tokens = usage_chunk.get("prompt_tokens", 0) or 0 - if "completion_tokens" in usage_chunk: - completion_tokens = usage_chunk.get("completion_tokens", 0) or 0 - if "cache_creation_input_tokens" in usage_chunk: - cache_creation_input_tokens = usage_chunk.get( + usage_chunk_dict = self._usage_chunk_calculation_helper(usage_chunk) + if ( + usage_chunk_dict["prompt_tokens"] is not None + and usage_chunk_dict["prompt_tokens"] > 0 + ): + prompt_tokens = usage_chunk_dict["prompt_tokens"] + if ( + usage_chunk_dict["completion_tokens"] is not None + and usage_chunk_dict["completion_tokens"] > 0 + ): + completion_tokens = usage_chunk_dict["completion_tokens"] + if usage_chunk_dict["cache_creation_input_tokens"] is not None: + cache_creation_input_tokens = usage_chunk_dict[ "cache_creation_input_tokens" - ) - if "cache_read_input_tokens" in usage_chunk: - cache_read_input_tokens = usage_chunk.get("cache_read_input_tokens") - if hasattr(usage_chunk, "completion_tokens_details"): - if isinstance(usage_chunk.completion_tokens_details, dict): - completion_tokens_details = CompletionTokensDetails( - **usage_chunk.completion_tokens_details - ) - elif isinstance( - usage_chunk.completion_tokens_details, CompletionTokensDetails - ): - completion_tokens_details = ( - usage_chunk.completion_tokens_details - ) - if hasattr(usage_chunk, "prompt_tokens_details"): - if isinstance(usage_chunk.prompt_tokens_details, dict): - prompt_tokens_details = PromptTokensDetails( - **usage_chunk.prompt_tokens_details - ) - elif isinstance( - usage_chunk.prompt_tokens_details, PromptTokensDetails - ): - prompt_tokens_details = usage_chunk.prompt_tokens_details - + ] + if usage_chunk_dict["cache_read_input_tokens"] is not None: + cache_read_input_tokens = usage_chunk_dict[ + "cache_read_input_tokens" + ] + if usage_chunk_dict["completion_tokens_details"] is not None: + completion_tokens_details = usage_chunk_dict[ + "completion_tokens_details" + ] + prompt_tokens_details = usage_chunk_dict["prompt_tokens_details"] try: returned_usage.prompt_tokens = prompt_tokens or token_counter( model=model, messages=messages diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py index a30cd65709..da95ac075f 100644 --- a/litellm/llms/anthropic/chat/handler.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -769,6 +769,7 @@ class ModelResponseIterator: message=message, status_code=500, # it looks like Anthropic API does not return a status code in the chunk error - default to 500 ) + returned_chunk = GenericStreamingChunk( text=text, tool_use=tool_use, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index d81c96df5a..5cf2938647 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -24,13 +24,20 @@ model_list: api_key: my-fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - + - model_name: gpt-4 + litellm_params: + model: azure/chatgpt-v-2 + api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ + api_version: "2023-05-15" + api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault + rpm: 480 + timeout: 300 + stream_timeout: 60 # litellm_settings: # fallbacks: [{ "claude-3-5-sonnet-20240620": ["claude-3-5-sonnet-aihubmix"] }] # callbacks: ["otel", "prometheus"] # default_redis_batch_cache_expiry: 10 - # litellm_settings: # cache: True # cache_params: @@ -74,4 +81,4 @@ model_list: # # # see https://docs.litellm.ai/docs/proxy/caching#advanced---user-api-key-cache-ttl # # # our api keys rarely change -# # user_api_key_cache_ttl: 3600 \ No newline at end of file +# # user_api_key_cache_ttl: 3600 diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 9f65792427..94a5bb5e92 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -770,8 +770,16 @@ async def _PROXY_track_cost_callback( org_id = metadata.get("user_api_key_org_id", None) key_alias = metadata.get("user_api_key_alias", None) end_user_max_budget = metadata.get("user_api_end_user_max_budget", None) - if kwargs.get("response_cost", None) is not None: - response_cost = kwargs["response_cost"] + sl_object: Optional[StandardLoggingPayload] = kwargs.get( + "standard_logging_object", None + ) + response_cost = ( + sl_object.get("response_cost", None) + if sl_object is not None + else kwargs.get("response_cost", None) + ) + + if response_cost is not None: user_api_key = metadata.get("user_api_key", None) if kwargs.get("cache_hit", False) is True: response_cost = 0.0 @@ -824,9 +832,15 @@ async def _PROXY_track_cost_callback( if kwargs["stream"] is not True or ( kwargs["stream"] is True and "complete_streaming_response" in kwargs ): - cost_tracking_failure_debug_info = kwargs.get( - "response_cost_failure_debug_information" - ) + if sl_object is not None: + cost_tracking_failure_debug_info: Union[dict, str] = ( + sl_object["response_cost_failure_debug_info"] # type: ignore + or "response_cost_failure_debug_info is None in standard_logging_object" + ) + else: + cost_tracking_failure_debug_info = ( + "standard_logging_object not found" + ) model = kwargs.get("model") raise Exception( f"Cost tracking failed for model={model}.\nDebug info - {cost_tracking_failure_debug_info}\nAdd custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing" @@ -842,7 +856,7 @@ async def _PROXY_track_cost_callback( failing_model=model, ) ) - verbose_proxy_logger.debug("error in tracking cost callback - %s", e) + verbose_proxy_logger.debug(error_msg) def error_tracking(): diff --git a/litellm/router_utils/pattern_match_deployments.py b/litellm/router_utils/pattern_match_deployments.py index a0d631bf71..039af635cf 100644 --- a/litellm/router_utils/pattern_match_deployments.py +++ b/litellm/router_utils/pattern_match_deployments.py @@ -61,6 +61,24 @@ class PatternMatchRouter: # return f"^{regex}$" return re.escape(pattern).replace(r"\*", "(.*)") + def _return_pattern_matched_deployments( + self, matched_pattern: Match, deployments: List[Dict] + ) -> List[Dict]: + new_deployments = [] + for deployment in deployments: + new_deployment = copy.deepcopy(deployment) + new_deployment["litellm_params"]["model"] = ( + PatternMatchRouter.set_deployment_model_name( + matched_pattern=matched_pattern, + litellm_deployment_litellm_model=deployment["litellm_params"][ + "model" + ], + ) + ) + new_deployments.append(new_deployment) + + return new_deployments + def route(self, request: Optional[str]) -> Optional[List[Dict]]: """ Route a requested model to the corresponding llm deployments based on the regex pattern @@ -79,8 +97,11 @@ class PatternMatchRouter: if request is None: return None for pattern, llm_deployments in self.patterns.items(): - if re.match(pattern, request): - return llm_deployments + pattern_match = re.match(pattern, request) + if pattern_match: + return self._return_pattern_matched_deployments( + matched_pattern=pattern_match, deployments=llm_deployments + ) except Exception as e: verbose_router_logger.debug(f"Error in PatternMatchRouter.route: {str(e)}") @@ -102,6 +123,7 @@ class PatternMatchRouter: if model_name = "llmengine/foo" -> model = "openai/foo" """ + ## BASE CASE: if the deployment model name does not contain a wildcard, return the deployment model name if "*" not in litellm_deployment_litellm_model: return litellm_deployment_litellm_model @@ -165,12 +187,7 @@ class PatternMatchRouter: """ pattern_match = self.get_pattern(model, custom_llm_provider) if pattern_match: - provider_deployments = [] - for deployment in pattern_match: - dep = copy.deepcopy(deployment) - dep["litellm_params"]["model"] = model - provider_deployments.append(dep) - return provider_deployments + return pattern_match return [] diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 942750416b..c0a9764e85 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -745,13 +745,13 @@ class StreamingChatCompletionChunk(OpenAIChatCompletionChunk): super().__init__(**kwargs) -class ModelResponse(OpenAIObject): +from openai.types.chat import ChatCompletionChunk + + +class ModelResponseBase(OpenAIObject): id: str """A unique identifier for the completion.""" - choices: List[Union[Choices, StreamingChoices]] - """The list of completion choices the model generated for the input prompt.""" - created: int """The Unix timestamp (in seconds) of when the completion was created.""" @@ -772,6 +772,55 @@ class ModelResponse(OpenAIObject): _response_headers: Optional[dict] = None + +class ModelResponseStream(ModelResponseBase): + choices: List[StreamingChoices] + + def __init__( + self, + choices: Optional[List[Union[StreamingChoices, dict, BaseModel]]] = None, + **kwargs, + ): + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + _new_choice = None + if isinstance(choice, StreamingChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = StreamingChoices(**choice) + elif isinstance(choice, BaseModel): + _new_choice = StreamingChoices(**choice.model_dump()) + new_choices.append(_new_choice) + kwargs["choices"] = new_choices + else: + kwargs["choices"] = [StreamingChoices()] + super().__init__(**kwargs) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class ModelResponse(ModelResponseBase): + choices: List[Union[Choices, StreamingChoices]] + """The list of completion choices the model generated for the input prompt.""" + def __init__( self, id=None, diff --git a/litellm/utils.py b/litellm/utils.py index d8c435552b..6dd0a5009f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -114,6 +114,7 @@ from litellm.types.utils import ( Message, ModelInfo, ModelResponse, + ModelResponseStream, ProviderField, StreamingChoices, TextChoices, @@ -5642,6 +5643,9 @@ class CustomStreamWrapper: ) self.messages = getattr(logging_obj, "messages", None) self.sent_stream_usage = False + self.send_stream_usage = ( + True if self.check_send_stream_usage(self.stream_options) else False + ) self.tool_call = False self.chunks: List = ( [] @@ -5654,6 +5658,12 @@ class CustomStreamWrapper: def __aiter__(self): return self + def check_send_stream_usage(self, stream_options: Optional[dict]): + return ( + stream_options is not None + and stream_options.get("include_usage", False) is True + ) + def check_is_function_call(self, logging_obj) -> bool: if hasattr(logging_obj, "optional_params") and isinstance( logging_obj.optional_params, dict @@ -6506,9 +6516,148 @@ class CustomStreamWrapper: is_empty = False return is_empty + def return_processed_chunk_logic( # noqa + self, + completion_obj: dict, + model_response: ModelResponseStream, + response_obj: dict, + ): + + print_verbose( + f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}" + ) + if ( + "content" in completion_obj + and ( + isinstance(completion_obj["content"], str) + and len(completion_obj["content"]) > 0 + ) + or ( + "tool_calls" in completion_obj + and completion_obj["tool_calls"] is not None + and len(completion_obj["tool_calls"]) > 0 + ) + or ( + "function_call" in completion_obj + and completion_obj["function_call"] is not None + ) + ): # cannot set content of an OpenAI Object to be an empty string + self.safety_checker() + hold, model_response_str = self.check_special_tokens( + chunk=completion_obj["content"], + finish_reason=model_response.choices[0].finish_reason, + ) # filter out bos/eos tokens from openai-compatible hf endpoints + print_verbose(f"hold - {hold}, model_response_str - {model_response_str}") + if hold is False: + ## check if openai/azure chunk + original_chunk = response_obj.get("original_chunk", None) + if original_chunk: + model_response.id = original_chunk.id + self.response_id = original_chunk.id + if len(original_chunk.choices) > 0: + choices = [] + for choice in original_chunk.choices: + try: + if isinstance(choice, BaseModel): + choice_json = choice.model_dump() + choice_json.pop( + "finish_reason", None + ) # for mistral etc. which return a value in their last chunk (not-openai compatible). + print_verbose(f"choice_json: {choice_json}") + choices.append(StreamingChoices(**choice_json)) + except Exception: + choices.append(StreamingChoices()) + print_verbose(f"choices in streaming: {choices}") + setattr(model_response, "choices", choices) + else: + return + model_response.system_fingerprint = ( + original_chunk.system_fingerprint + ) + setattr( + model_response, + "citations", + getattr(original_chunk, "citations", None), + ) + print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}") + if self.sent_first_chunk is False: + model_response.choices[0].delta["role"] = "assistant" + self.sent_first_chunk = True + elif self.sent_first_chunk is True and hasattr( + model_response.choices[0].delta, "role" + ): + _initial_delta = model_response.choices[0].delta.model_dump() + _initial_delta.pop("role", None) + model_response.choices[0].delta = Delta(**_initial_delta) + print_verbose( + f"model_response.choices[0].delta: {model_response.choices[0].delta}" + ) + else: + ## else + completion_obj["content"] = model_response_str + if self.sent_first_chunk is False: + completion_obj["role"] = "assistant" + self.sent_first_chunk = True + + model_response.choices[0].delta = Delta(**completion_obj) + _index: Optional[int] = completion_obj.get("index") + if _index is not None: + model_response.choices[0].index = _index + print_verbose(f"returning model_response: {model_response}") + return model_response + else: + return + elif self.received_finish_reason is not None: + if self.sent_last_chunk is True: + # Bedrock returns the guardrail trace in the last chunk - we want to return this here + if self.custom_llm_provider == "bedrock" and "trace" in model_response: + return model_response + + # Default - return StopIteration + raise StopIteration + # flush any remaining holding chunk + if len(self.holding_chunk) > 0: + if model_response.choices[0].delta.content is None: + model_response.choices[0].delta.content = self.holding_chunk + else: + model_response.choices[0].delta.content = ( + self.holding_chunk + model_response.choices[0].delta.content + ) + self.holding_chunk = "" + # if delta is None + _is_delta_empty = self.is_delta_empty(delta=model_response.choices[0].delta) + + if _is_delta_empty: + # get any function call arguments + model_response.choices[0].finish_reason = map_finish_reason( + finish_reason=self.received_finish_reason + ) # ensure consistent output to openai + + self.sent_last_chunk = True + + return model_response + elif ( + model_response.choices[0].delta.tool_calls is not None + or model_response.choices[0].delta.function_call is not None + ): + if self.sent_first_chunk is False: + model_response.choices[0].delta["role"] = "assistant" + self.sent_first_chunk = True + return model_response + elif ( + len(model_response.choices) > 0 + and hasattr(model_response.choices[0].delta, "audio") + and model_response.choices[0].delta.audio is not None + ): + return model_response + else: + if hasattr(model_response, "usage"): + self.chunks.append(model_response) + return + def chunk_creator(self, chunk): # type: ignore # noqa: PLR0915 model_response = self.model_response_creator() - response_obj = {} + response_obj: dict = {} try: # return this for all models completion_obj = {"content": ""} @@ -6559,6 +6708,7 @@ class CustomStreamWrapper: "provider_specific_fields" ].items(): setattr(model_response, key, value) + response_obj = anthropic_response_obj elif ( self.custom_llm_provider @@ -6626,7 +6776,7 @@ class CustomStreamWrapper: if self.sent_first_chunk is False: raise Exception("An unknown error occurred with the stream") self.received_finish_reason = "stop" - elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"): + elif self.custom_llm_provider == "vertex_ai": import proto # type: ignore if self.model.startswith("claude-3"): @@ -7009,145 +7159,12 @@ class CustomStreamWrapper: self.tool_call = True ## RETURN ARG - if ( - "content" in completion_obj - and ( - isinstance(completion_obj["content"], str) - and len(completion_obj["content"]) > 0 - ) - or ( - "tool_calls" in completion_obj - and completion_obj["tool_calls"] is not None - and len(completion_obj["tool_calls"]) > 0 - ) - or ( - "function_call" in completion_obj - and completion_obj["function_call"] is not None - ) - ): # cannot set content of an OpenAI Object to be an empty string - self.safety_checker() - hold, model_response_str = self.check_special_tokens( - chunk=completion_obj["content"], - finish_reason=model_response.choices[0].finish_reason, - ) # filter out bos/eos tokens from openai-compatible hf endpoints - print_verbose( - f"hold - {hold}, model_response_str - {model_response_str}" - ) - if hold is False: - ## check if openai/azure chunk - original_chunk = response_obj.get("original_chunk", None) - if original_chunk: - model_response.id = original_chunk.id - self.response_id = original_chunk.id - if len(original_chunk.choices) > 0: - choices = [] - for idx, choice in enumerate(original_chunk.choices): - try: - if isinstance(choice, BaseModel): - try: - choice_json = choice.model_dump() - except Exception: - choice_json = choice.dict() - choice_json.pop( - "finish_reason", None - ) # for mistral etc. which return a value in their last chunk (not-openai compatible). - print_verbose(f"choice_json: {choice_json}") - choices.append(StreamingChoices(**choice_json)) - except Exception: - choices.append(StreamingChoices()) - print_verbose(f"choices in streaming: {choices}") - model_response.choices = choices - else: - return - model_response.system_fingerprint = ( - original_chunk.system_fingerprint - ) - model_response.citations = getattr( - original_chunk, "citations", None - ) - print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}") - if self.sent_first_chunk is False: - model_response.choices[0].delta["role"] = "assistant" - self.sent_first_chunk = True - elif self.sent_first_chunk is True and hasattr( - model_response.choices[0].delta, "role" - ): - _initial_delta = model_response.choices[ - 0 - ].delta.model_dump() - _initial_delta.pop("role", None) - model_response.choices[0].delta = Delta(**_initial_delta) - print_verbose( - f"model_response.choices[0].delta: {model_response.choices[0].delta}" - ) - else: - ## else - completion_obj["content"] = model_response_str - if self.sent_first_chunk is False: - completion_obj["role"] = "assistant" - self.sent_first_chunk = True + return self.return_processed_chunk_logic( + completion_obj=completion_obj, + model_response=model_response, # type: ignore + response_obj=response_obj, + ) - model_response.choices[0].delta = Delta(**completion_obj) - if completion_obj.get("index") is not None: - model_response.choices[0].index = completion_obj.get( - "index" - ) - print_verbose(f"returning model_response: {model_response}") - return model_response - else: - return - elif self.received_finish_reason is not None: - if self.sent_last_chunk is True: - # Bedrock returns the guardrail trace in the last chunk - we want to return this here - if ( - self.custom_llm_provider == "bedrock" - and "trace" in model_response - ): - return model_response - - # Default - return StopIteration - raise StopIteration - # flush any remaining holding chunk - if len(self.holding_chunk) > 0: - if model_response.choices[0].delta.content is None: - model_response.choices[0].delta.content = self.holding_chunk - else: - model_response.choices[0].delta.content = ( - self.holding_chunk + model_response.choices[0].delta.content - ) - self.holding_chunk = "" - # if delta is None - _is_delta_empty = self.is_delta_empty( - delta=model_response.choices[0].delta - ) - - if _is_delta_empty: - # get any function call arguments - model_response.choices[0].finish_reason = map_finish_reason( - finish_reason=self.received_finish_reason - ) # ensure consistent output to openai - - self.sent_last_chunk = True - - return model_response - elif ( - model_response.choices[0].delta.tool_calls is not None - or model_response.choices[0].delta.function_call is not None - ): - if self.sent_first_chunk is False: - model_response.choices[0].delta["role"] = "assistant" - self.sent_first_chunk = True - return model_response - elif ( - len(model_response.choices) > 0 - and hasattr(model_response.choices[0].delta, "audio") - and model_response.choices[0].delta.audio is not None - ): - return model_response - else: - if hasattr(model_response, "usage"): - self.chunks.append(model_response) - return except StopIteration: raise StopIteration except Exception as e: @@ -7293,27 +7310,24 @@ class CustomStreamWrapper: except StopIteration: if self.sent_last_chunk is True: - if ( - self.sent_stream_usage is False - and self.stream_options is not None - and self.stream_options.get("include_usage", False) is True - ): - # send the final chunk with stream options - complete_streaming_response = litellm.stream_chunk_builder( - chunks=self.chunks, messages=self.messages + complete_streaming_response = litellm.stream_chunk_builder( + chunks=self.chunks, messages=self.messages + ) + response = self.model_response_creator() + if complete_streaming_response is not None: + setattr( + response, + "usage", + getattr(complete_streaming_response, "usage"), ) - response = self.model_response_creator() - if complete_streaming_response is not None: - setattr( - response, - "usage", - getattr(complete_streaming_response, "usage"), - ) - ## LOGGING - threading.Thread( - target=self.logging_obj.success_handler, - args=(response, None, None, cache_hit), - ).start() # log response + + ## LOGGING + threading.Thread( + target=self.logging_obj.success_handler, + args=(response, None, None, cache_hit), + ).start() # log response + + if self.sent_stream_usage is False and self.send_stream_usage is True: self.sent_stream_usage = True return response raise # Re-raise StopIteration @@ -7401,7 +7415,6 @@ class CustomStreamWrapper: or self.custom_llm_provider in litellm._custom_providers ): async for chunk in self.completion_stream: - print_verbose(f"value of async chunk: {chunk}") if chunk == "None" or chunk is None: raise Exception elif ( @@ -7431,10 +7444,7 @@ class CustomStreamWrapper: end_time=None, cache_hit=cache_hit, ) - # threading.Thread( - # target=self.logging_obj.success_handler, - # args=(processed_chunk, None, None, cache_hit), - # ).start() # log response + asyncio.create_task( self.logging_obj.async_success_handler( processed_chunk, cache_hit=cache_hit @@ -7515,82 +7525,33 @@ class CustomStreamWrapper: # RETURN RESULT self.chunks.append(processed_chunk) return processed_chunk - except StopAsyncIteration: + except (StopAsyncIteration, StopIteration): if self.sent_last_chunk is True: - if ( - self.sent_stream_usage is False - and self.stream_options is not None - and self.stream_options.get("include_usage", False) is True - ): - # send the final chunk with stream options - complete_streaming_response = litellm.stream_chunk_builder( - chunks=self.chunks, messages=self.messages + # log the final chunk with accurate streaming values + complete_streaming_response = litellm.stream_chunk_builder( + chunks=self.chunks, messages=self.messages + ) + response = self.model_response_creator() + if complete_streaming_response is not None: + setattr( + response, + "usage", + getattr(complete_streaming_response, "usage"), ) - response = self.model_response_creator() - if complete_streaming_response is not None: - setattr( - response, - "usage", - getattr(complete_streaming_response, "usage"), - ) - ## LOGGING - threading.Thread( - target=self.logging_obj.success_handler, - args=(response, None, None, cache_hit), - ).start() # log response - asyncio.create_task( - self.logging_obj.async_success_handler( - response, cache_hit=cache_hit - ) - ) - self.sent_stream_usage = True - return response - raise # Re-raise StopIteration - else: - self.sent_last_chunk = True - processed_chunk = self.finish_reason_handler() ## LOGGING threading.Thread( target=self.logging_obj.success_handler, - args=(processed_chunk, None, None, cache_hit), + args=(response, None, None, cache_hit), ).start() # log response asyncio.create_task( self.logging_obj.async_success_handler( - processed_chunk, cache_hit=cache_hit + response, cache_hit=cache_hit ) ) - return processed_chunk - except StopIteration: - if self.sent_last_chunk is True: - if ( - self.sent_stream_usage is False - and self.stream_options is not None - and self.stream_options.get("include_usage", False) is True - ): - # send the final chunk with stream options - complete_streaming_response = litellm.stream_chunk_builder( - chunks=self.chunks, messages=self.messages - ) - response = self.model_response_creator() - if complete_streaming_response is not None: - setattr( - response, - "usage", - getattr(complete_streaming_response, "usage"), - ) - ## LOGGING - threading.Thread( - target=self.logging_obj.success_handler, - args=(response, None, None, cache_hit), - ).start() # log response - asyncio.create_task( - self.logging_obj.async_success_handler( - response, cache_hit=cache_hit - ) - ) + if self.sent_stream_usage is False and self.send_stream_usage is True: self.sent_stream_usage = True return response - raise StopAsyncIteration + raise StopAsyncIteration # Re-raise StopIteration else: self.sent_last_chunk = True processed_chunk = self.finish_reason_handler() diff --git a/tests/local_testing/test_add_function_to_prompt.py b/tests/local_testing/test_add_function_to_prompt.py index d703ce849e..43ee3dd41a 100644 --- a/tests/local_testing/test_add_function_to_prompt.py +++ b/tests/local_testing/test_add_function_to_prompt.py @@ -13,7 +13,7 @@ import litellm ## case 1: set_function_to_prompt not set def test_function_call_non_openai_model(): try: - model = "claude-instant-1" + model = "claude-3-5-haiku-20241022" messages = [{"role": "user", "content": "what's the weather in sf?"}] functions = [ { @@ -43,38 +43,4 @@ def test_function_call_non_openai_model(): # test_function_call_non_openai_model() - -## case 2: add_function_to_prompt set -@pytest.mark.skip(reason="Anthropic now supports tool calling") -def test_function_call_non_openai_model_litellm_mod_set(): - litellm.add_function_to_prompt = True - litellm.set_verbose = True - try: - model = "claude-instant-1.2" - messages = [{"role": "user", "content": "what's the weather in sf?"}] - functions = [ - { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, - }, - "required": ["location"], - }, - } - ] - response = litellm.completion( - model=model, messages=messages, functions=functions - ) - print(f"response: {response}") - except Exception as e: - pytest.fail(f"An error occurred {e}") - - # test_function_call_non_openai_model_litellm_mod_set() diff --git a/tests/local_testing/test_alangfuse.py b/tests/local_testing/test_alangfuse.py index da83e38298..8c69f567b5 100644 --- a/tests/local_testing/test_alangfuse.py +++ b/tests/local_testing/test_alangfuse.py @@ -480,28 +480,6 @@ async def test_aaalangfuse_logging_metadata(langfuse_client): print("generation_from_langfuse", generation) -@pytest.mark.skip(reason="beta test - checking langfuse output") -def test_langfuse_logging(): - try: - pre_langfuse_setup() - litellm.set_verbose = True - response = completion( - model="claude-instant-1.2", - messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}], - max_tokens=10, - temperature=0.2, - ) - print(response) - # time.sleep(5) - # # check langfuse.log to see if there was a failed response - # search_logs("langfuse.log") - - except litellm.Timeout as e: - pass - except Exception as e: - pytest.fail(f"An exception occurred - {e}") - - # test_langfuse_logging() diff --git a/tests/local_testing/test_batch_completions.py b/tests/local_testing/test_batch_completions.py index cb1f16a9fc..87cb88e44d 100644 --- a/tests/local_testing/test_batch_completions.py +++ b/tests/local_testing/test_batch_completions.py @@ -69,7 +69,7 @@ def test_batch_completions_models(): def test_batch_completion_models_all_responses(): try: responses = batch_completion_models_all_responses( - models=["j2-light", "claude-instant-1.2"], + models=["j2-light", "claude-3-haiku-20240307"], messages=[{"role": "user", "content": "write a poem"}], max_tokens=10, ) diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index 8f28de7b49..6ee6a45b2c 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -343,7 +343,7 @@ def test_completion_claude(): try: # test without max tokens response = completion( - model="claude-instant-1", messages=messages, request_timeout=10 + model="claude-3-5-haiku-20241022", messages=messages, request_timeout=10 ) # Add any assertions here to check response args print(response) diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py index 3ce3a618c4..1744d38910 100644 --- a/tests/local_testing/test_custom_callback_input.py +++ b/tests/local_testing/test_custom_callback_input.py @@ -1562,3 +1562,65 @@ def test_logging_key_masking_gemini(): trimmed_key = key.split("key=")[1] trimmed_key = trimmed_key.replace("*", "") assert "PART" == trimmed_key + + +@pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.asyncio +async def test_standard_logging_payload_stream_usage(sync_mode): + """ + Even if stream_options is not provided, correct usage should be logged + """ + from litellm.types.utils import StandardLoggingPayload + from litellm.main import stream_chunk_builder + + stream = True + try: + # sync completion + customHandler = CompletionCustomHandler() + litellm.callbacks = [customHandler] + + if sync_mode: + patch_event = "log_success_event" + return_val = MagicMock() + else: + patch_event = "async_log_success_event" + return_val = AsyncMock() + + with patch.object(customHandler, patch_event, new=return_val) as mock_client: + if sync_mode: + resp = litellm.completion( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + stream=stream, + ) + + chunks = [] + for chunk in resp: + chunks.append(chunk) + time.sleep(2) + else: + resp = await litellm.acompletion( + model="anthropic/claude-3-5-sonnet-20240620", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + stream=stream, + ) + + chunks = [] + async for chunk in resp: + chunks.append(chunk) + await asyncio.sleep(2) + + mock_client.assert_called_once() + + standard_logging_object: StandardLoggingPayload = ( + mock_client.call_args.kwargs["kwargs"]["standard_logging_object"] + ) + + built_response = stream_chunk_builder(chunks=chunks) + assert ( + built_response.usage.total_tokens + != standard_logging_object["total_tokens"] + ) + print(f"standard_logging_object usage: {built_response.usage}") + except litellm.InternalServerError: + pass diff --git a/tests/local_testing/test_exceptions.py b/tests/local_testing/test_exceptions.py index e1ae1a84f9..d5f67cecf6 100644 --- a/tests/local_testing/test_exceptions.py +++ b/tests/local_testing/test_exceptions.py @@ -163,7 +163,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th elif model == "azure/chatgpt-v-2": temporary_key = os.environ["AZURE_API_KEY"] os.environ["AZURE_API_KEY"] = "bad-key" - elif model == "claude-instant-1": + elif model == "claude-3-5-haiku-20241022": temporary_key = os.environ["ANTHROPIC_API_KEY"] os.environ["ANTHROPIC_API_KEY"] = "bad-key" elif model == "command-nightly": @@ -213,7 +213,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th elif model == "chatgpt-test": os.environ["AZURE_API_KEY"] = temporary_key azure = True - elif model == "claude-instant-1": + elif model == "claude-3-5-haiku-20241022": os.environ["ANTHROPIC_API_KEY"] = temporary_key elif model == "command-nightly": os.environ["COHERE_API_KEY"] = temporary_key diff --git a/tests/local_testing/test_langsmith.py b/tests/local_testing/test_langsmith.py index 3470445929..6a98f244dc 100644 --- a/tests/local_testing/test_langsmith.py +++ b/tests/local_testing/test_langsmith.py @@ -77,71 +77,6 @@ async def test_langsmith_queue_logging(): pytest.fail(f"Error occurred: {e}") -@pytest.mark.skip(reason="Flaky test. covered by unit tests on custom logger.") -@pytest.mark.asyncio() -async def test_async_langsmith_logging(): - try: - test_langsmith_logger = LangsmithLogger() - run_id = str(uuid.uuid4()) - litellm.set_verbose = True - litellm.callbacks = ["langsmith"] - response = await litellm.acompletion( - model="claude-instant-1.2", - messages=[{"role": "user", "content": "what llm are u"}], - max_tokens=10, - temperature=0.2, - metadata={ - "id": run_id, - "tags": ["tag1", "tag2"], - "user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c", - "user_api_key_alias": "ishaans-langmsith-key", - "user_api_end_user_max_budget": None, - "litellm_api_version": "1.40.19", - "global_max_parallel_requests": None, - "user_api_key_user_id": "admin", - "user_api_key_org_id": None, - "user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709", - "user_api_key_team_alias": "testing-team", - }, - ) - print(response) - await asyncio.sleep(3) - - print("run_id", run_id) - logged_run_on_langsmith = test_langsmith_logger.get_run_by_id(run_id=run_id) - - print("logged_run_on_langsmith", logged_run_on_langsmith) - - print("fields in logged_run_on_langsmith", logged_run_on_langsmith.keys()) - - input_fields_on_langsmith = logged_run_on_langsmith.get("inputs") - extra_fields_on_langsmith = logged_run_on_langsmith.get("extra").get( - "invocation_params" - ) - - print("\nLogged INPUT ON LANGSMITH", input_fields_on_langsmith) - - print("\nextra fields on langsmith", extra_fields_on_langsmith) - - assert isinstance(input_fields_on_langsmith, dict) - assert "api_key" not in input_fields_on_langsmith - assert "api_key" not in extra_fields_on_langsmith - - # assert user_api_key in extra_fields_on_langsmith - assert "user_api_key" in extra_fields_on_langsmith - assert "user_api_key_user_id" in extra_fields_on_langsmith - assert "user_api_key_team_alias" in extra_fields_on_langsmith - - for cb in litellm.callbacks: - if isinstance(cb, LangsmithLogger): - await cb.async_httpx_client.client.aclose() - # test_langsmith_logger.async_httpx_client.close() - - except Exception as e: - print(e) - pytest.fail(f"Error occurred: {e}") - - # test_langsmith_logging() diff --git a/tests/local_testing/test_logging.py b/tests/local_testing/test_logging.py index 1a35d8454e..0140cbd565 100644 --- a/tests/local_testing/test_logging.py +++ b/tests/local_testing/test_logging.py @@ -72,7 +72,7 @@ # # old_stdout = sys.stdout # # sys.stdout = new_stdout = io.StringIO() -# # response = completion(model="claude-instant-1", messages=messages) +# # response = completion(model="claude-3-5-haiku-20241022", messages=messages) # # # Restore stdout # # sys.stdout = old_stdout @@ -154,7 +154,7 @@ # old_stdout = sys.stdout # sys.stdout = new_stdout = io.StringIO() -# response = completion(model="claude-instant-1", messages=messages, stream=True) +# response = completion(model="claude-3-5-haiku-20241022", messages=messages, stream=True) # for idx, chunk in enumerate(response): # pass @@ -255,7 +255,7 @@ # # sys.stdout = new_stdout = io.StringIO() # # try: -# # response = completion(model="claude-instant-1", messages=messages) +# # response = completion(model="claude-3-5-haiku-20241022", messages=messages) # # except AuthenticationError: # # pass @@ -327,7 +327,7 @@ # # sys.stdout = new_stdout = io.StringIO() # # try: -# # response = completion(model="claude-instant-1", messages=messages) +# # response = completion(model="claude-3-5-haiku-20241022", messages=messages) # # except AuthenticationError: # # pass diff --git a/tests/local_testing/test_model_response_typing/test.py b/tests/local_testing/test_model_response_typing/test.py index 95d4048098..46bf5fbb44 100644 --- a/tests/local_testing/test_model_response_typing/test.py +++ b/tests/local_testing/test_model_response_typing/test.py @@ -3,7 +3,7 @@ # BASE_URL = 'http://localhost:8080' # def test_hello_route(): -# data = {"model": "claude-instant-1", "messages": [{"role": "user", "content": "hey, how's it going?"}]} +# data = {"model": "claude-3-5-haiku-20241022", "messages": [{"role": "user", "content": "hey, how's it going?"}]} # headers = {'Content-Type': 'application/json'} # response = requests.get(BASE_URL, headers=headers, data=json.dumps(data)) # print(response.text) diff --git a/tests/local_testing/test_prometheus.py b/tests/local_testing/test_prometheus.py index 164d94553a..2abdeea98e 100644 --- a/tests/local_testing/test_prometheus.py +++ b/tests/local_testing/test_prometheus.py @@ -31,63 +31,6 @@ litellm.set_verbose = True import time -@pytest.mark.skip(reason="duplicate test of logging with callbacks") -@pytest.mark.asyncio() -async def test_async_prometheus_success_logging(): - from litellm.integrations.prometheus import PrometheusLogger - - pl = PrometheusLogger() - run_id = str(uuid.uuid4()) - - litellm.set_verbose = True - litellm.callbacks = [pl] - - response = await litellm.acompletion( - model="claude-instant-1.2", - messages=[{"role": "user", "content": "what llm are u"}], - max_tokens=10, - mock_response="hi", - temperature=0.2, - metadata={ - "id": run_id, - "tags": ["tag1", "tag2"], - "user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c", - "user_api_key_alias": "ishaans-prometheus-key", - "user_api_end_user_max_budget": None, - "litellm_api_version": "1.40.19", - "global_max_parallel_requests": None, - "user_api_key_user_id": "admin", - "user_api_key_org_id": None, - "user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709", - "user_api_key_team_alias": "testing-team", - }, - ) - print(response) - await asyncio.sleep(3) - - # get prometheus logger - test_prometheus_logger = pl - print("done with success request") - - print( - "vars of test_prometheus_logger", - vars(test_prometheus_logger.litellm_requests_metric), - ) - - # Get the metrics - metrics = {} - for metric in REGISTRY.collect(): - for sample in metric.samples: - metrics[sample.name] = sample.value - - print("metrics from prometheus", metrics) - assert metrics["litellm_requests_metric_total"] == 1.0 - assert metrics["litellm_total_tokens_total"] == 30.0 - assert metrics["litellm_deployment_success_responses_total"] == 1.0 - assert metrics["litellm_deployment_total_requests_total"] == 1.0 - assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0 - - @pytest.mark.asyncio() async def test_async_prometheus_success_logging_with_callbacks(): @@ -107,7 +50,7 @@ async def test_async_prometheus_success_logging_with_callbacks(): initial_metrics[sample.name] = sample.value response = await litellm.acompletion( - model="claude-instant-1.2", + model="claude-3-haiku-20240307", messages=[{"role": "user", "content": "what llm are u"}], max_tokens=10, mock_response="hi", diff --git a/tests/local_testing/test_promptlayer_integration.py b/tests/local_testing/test_promptlayer_integration.py index f557657576..d2e2268e61 100644 --- a/tests/local_testing/test_promptlayer_integration.py +++ b/tests/local_testing/test_promptlayer_integration.py @@ -18,7 +18,7 @@ import time # sys.stdout = new_stdout = io.StringIO() -# response = completion(model="claude-instant-1.2", +# response = completion(model="claude-3-5-haiku-20241022", # messages=[{ # "role": "user", # "content": "Hi 👋 - i'm claude" diff --git a/tests/local_testing/test_provider_specific_config.py b/tests/local_testing/test_provider_specific_config.py index 3ff709854d..1f1ccaef88 100644 --- a/tests/local_testing/test_provider_specific_config.py +++ b/tests/local_testing/test_provider_specific_config.py @@ -56,7 +56,7 @@ def claude_test_completion(): try: # OVERRIDE WITH DYNAMIC MAX TOKENS response_1 = litellm.completion( - model="claude-instant-1.2", + model="claude-3-haiku-20240307", messages=[{"content": "Hello, how are you?", "role": "user"}], max_tokens=10, ) @@ -66,7 +66,7 @@ def claude_test_completion(): # USE CONFIG TOKENS response_2 = litellm.completion( - model="claude-instant-1.2", + model="claude-3-haiku-20240307", messages=[{"content": "Hello, how are you?", "role": "user"}], ) # Add any assertions here to check the response @@ -77,7 +77,7 @@ def claude_test_completion(): try: response_3 = litellm.completion( - model="claude-instant-1.2", + model="claude-3-5-haiku-20241022", messages=[{"content": "Hello, how are you?", "role": "user"}], n=2, ) diff --git a/tests/local_testing/test_proxy_utils.py b/tests/local_testing/test_proxy_utils.py index 74ef75392a..f3f33bad6e 100644 --- a/tests/local_testing/test_proxy_utils.py +++ b/tests/local_testing/test_proxy_utils.py @@ -10,7 +10,7 @@ sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import litellm - +from unittest.mock import MagicMock, patch, AsyncMock from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth from litellm.proxy.auth.auth_utils import is_request_body_safe @@ -465,3 +465,48 @@ def test_update_internal_user_params(): updated_data_json["budget_duration"] == litellm.default_internal_user_params["budget_duration"] ) + + +@pytest.mark.asyncio +async def test_proxy_config_update_from_db(): + from litellm.proxy.proxy_server import ProxyConfig + from pydantic import BaseModel + + proxy_config = ProxyConfig() + + pc = AsyncMock() + + test_config = { + "litellm_settings": { + "callbacks": ["prometheus", "otel"], + } + } + + class ReturnValue(BaseModel): + param_name: str + param_value: dict + + with patch.object( + pc, + "get_generic_data", + new=AsyncMock( + return_value=ReturnValue( + param_name="litellm_settings", + param_value={ + "success_callback": "langfuse", + }, + ) + ), + ): + new_config = await proxy_config._update_config_from_db( + prisma_client=pc, + config=test_config, + store_model_in_db=True, + ) + + assert new_config == { + "litellm_settings": { + "callbacks": ["prometheus", "otel"], + "success_callback": "langfuse", + } + } diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py index 5ffdbc7ac6..8884f4c3ae 100644 --- a/tests/local_testing/test_router.py +++ b/tests/local_testing/test_router.py @@ -1807,7 +1807,7 @@ def test_router_anthropic_key_dynamic(): { "model_name": "anthropic-claude", "litellm_params": { - "model": "claude-instant-1.2", + "model": "claude-3-5-haiku-20241022", "api_key": anthropic_api_key, }, } diff --git a/tests/local_testing/test_router_fallbacks.py b/tests/local_testing/test_router_fallbacks.py index 96983003a9..3e91cd79ab 100644 --- a/tests/local_testing/test_router_fallbacks.py +++ b/tests/local_testing/test_router_fallbacks.py @@ -824,8 +824,8 @@ def test_ausage_based_routing_fallbacks(): "rpm": OPENAI_RPM, }, { - "model_name": "anthropic-claude-instant-1.2", - "litellm_params": get_anthropic_params("claude-instant-1.2"), + "model_name": "anthropic-claude-3-5-haiku-20241022", + "litellm_params": get_anthropic_params("claude-3-5-haiku-20241022"), "model_info": {"id": 4}, "rpm": ANTHROPIC_RPM, }, @@ -834,7 +834,7 @@ def test_ausage_based_routing_fallbacks(): fallbacks_list = [ {"azure/gpt-4-fast": ["azure/gpt-4-basic"]}, {"azure/gpt-4-basic": ["openai-gpt-4"]}, - {"openai-gpt-4": ["anthropic-claude-instant-1.2"]}, + {"openai-gpt-4": ["anthropic-claude-3-5-haiku-20241022"]}, ] router = Router( @@ -864,7 +864,7 @@ def test_ausage_based_routing_fallbacks(): assert response._hidden_params["model_id"] == "1" for i in range(10): - # now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2 + # now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-3-5-haiku-20241022 response = router.completion( model="azure/gpt-4-fast", messages=messages, diff --git a/tests/local_testing/test_router_pattern_matching.py b/tests/local_testing/test_router_pattern_matching.py index 701a62e412..9d8c4db0da 100644 --- a/tests/local_testing/test_router_pattern_matching.py +++ b/tests/local_testing/test_router_pattern_matching.py @@ -17,6 +17,7 @@ from litellm.router import Deployment, LiteLLM_Params, ModelInfo from concurrent.futures import ThreadPoolExecutor from collections import defaultdict from dotenv import load_dotenv +from unittest.mock import patch, MagicMock, AsyncMock load_dotenv() @@ -155,3 +156,35 @@ def test_route_with_exception(): result = router.route("openai/gpt-3.5-turbo") assert result is None + + +def test_router_pattern_match_e2e(): + """ + Tests the end to end flow of the router + """ + from litellm.llms.custom_httpx.http_handler import HTTPHandler + + client = HTTPHandler() + router = Router( + model_list=[ + { + "model_name": "llmengine/*", + "litellm_params": {"model": "anthropic/*", "api_key": "test"}, + } + ] + ) + + with patch.object(client, "post", new=MagicMock()) as mock_post: + + router.completion( + model="llmengine/my-custom-model", + messages=[{"role": "user", "content": "Hello, how are you?"}], + client=client, + api_key="test", + ) + mock_post.assert_called_once() + print(mock_post.call_args.kwargs["data"]) + mock_post.call_args.kwargs["data"] == { + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Hello, how are you?"}], + } diff --git a/tests/local_testing/test_router_timeout.py b/tests/local_testing/test_router_timeout.py index c13bc2deb1..21e74e099c 100644 --- a/tests/local_testing/test_router_timeout.py +++ b/tests/local_testing/test_router_timeout.py @@ -38,9 +38,9 @@ def test_router_timeouts(): "tpm": 80000, }, { - "model_name": "anthropic-claude-instant-1.2", + "model_name": "anthropic-claude-3-5-haiku-20241022", "litellm_params": { - "model": "claude-instant-1.2", + "model": "claude-3-5-haiku-20241022", "api_key": "os.environ/ANTHROPIC_API_KEY", "mock_response": "hello world", }, @@ -49,7 +49,7 @@ def test_router_timeouts(): ] fallbacks_list = [ - {"openai-gpt-4": ["anthropic-claude-instant-1.2"]}, + {"openai-gpt-4": ["anthropic-claude-3-5-haiku-20241022"]}, ] # Configure router diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py index 3e2145c811..827a2495b0 100644 --- a/tests/local_testing/test_streaming.py +++ b/tests/local_testing/test_streaming.py @@ -681,7 +681,7 @@ def test_completion_ollama_hosted_stream(): @pytest.mark.parametrize( "model", [ - # "claude-instant-1.2", + # "claude-3-5-haiku-20241022", # "claude-2", # "mistral/mistral-medium", "openrouter/openai/gpt-4o-mini", @@ -1112,7 +1112,7 @@ def test_completion_claude_stream_bad_key(): }, ] response = completion( - model="claude-instant-1", + model="claude-3-5-haiku-20241022", messages=messages, stream=True, max_tokens=50, diff --git a/tests/local_testing/test_token_counter.py b/tests/local_testing/test_token_counter.py index 3ad73f2d8b..7234ef38e0 100644 --- a/tests/local_testing/test_token_counter.py +++ b/tests/local_testing/test_token_counter.py @@ -1,6 +1,6 @@ #### What this tests #### # This tests litellm.token_counter() function - +import traceback import os import sys import time @@ -116,7 +116,9 @@ def test_tokenizers(): openai_tokens = token_counter(model="gpt-3.5-turbo", text=sample_text) # claude tokenizer - claude_tokens = token_counter(model="claude-instant-1", text=sample_text) + claude_tokens = token_counter( + model="claude-3-5-haiku-20241022", text=sample_text + ) # cohere tokenizer cohere_tokens = token_counter(model="command-nightly", text=sample_text) @@ -167,8 +169,9 @@ def test_encoding_and_decoding(): assert openai_text == sample_text # claude encoding + decoding - claude_tokens = encode(model="claude-instant-1", text=sample_text) - claude_text = decode(model="claude-instant-1", tokens=claude_tokens.ids) + claude_tokens = encode(model="claude-3-5-haiku-20241022", text=sample_text) + + claude_text = decode(model="claude-3-5-haiku-20241022", tokens=claude_tokens) assert claude_text == sample_text @@ -186,7 +189,7 @@ def test_encoding_and_decoding(): assert llama2_text == sample_text except Exception as e: - pytest.fail(f"An exception occured: {e}") + pytest.fail(f"An exception occured: {e}\n{traceback.format_exc()}") # test_encoding_and_decoding() diff --git a/tests/local_testing/test_traceloop.py b/tests/local_testing/test_traceloop.py index 74d58228ef..5cab8dd59c 100644 --- a/tests/local_testing/test_traceloop.py +++ b/tests/local_testing/test_traceloop.py @@ -26,7 +26,7 @@ def exporter(): return exporter -@pytest.mark.parametrize("model", ["claude-instant-1.2", "gpt-3.5-turbo"]) +@pytest.mark.parametrize("model", ["claude-3-5-haiku-20241022", "gpt-3.5-turbo"]) def test_traceloop_logging(exporter, model): litellm.completion( model=model, diff --git a/tests/local_testing/test_wandb.py b/tests/local_testing/test_wandb.py index d31310fa6f..6cdca40492 100644 --- a/tests/local_testing/test_wandb.py +++ b/tests/local_testing/test_wandb.py @@ -57,7 +57,7 @@ test_wandb_logging_async() def test_wandb_logging(): try: response = completion( - model="claude-instant-1.2", + model="claude-3-5-haiku-20241022", messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}], max_tokens=10, temperature=0.2, diff --git a/tests/logging_callback_tests/test_langfuse_unit_tests.py b/tests/logging_callback_tests/test_langfuse_unit_tests.py index 2a6cbe00a9..20b33f81b5 100644 --- a/tests/logging_callback_tests/test_langfuse_unit_tests.py +++ b/tests/logging_callback_tests/test_langfuse_unit_tests.py @@ -1,19 +1,13 @@ -import json import os import sys +import threading from datetime import datetime -from pydantic.main import Model - sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system-path import pytest -import litellm -import asyncio -import logging -from litellm._logging import verbose_logger from litellm.integrations.langfuse.langfuse import ( LangFuseLogger, ) @@ -217,3 +211,27 @@ def test_get_langfuse_logger_for_request_with_cached_logger(): assert result == cached_logger mock_cache.get_cache.assert_called_once() + +@pytest.mark.parametrize("metadata", [ + {'a': 1, 'b': 2, 'c': 3}, + {'a': {'nested_a': 1}, 'b': {'nested_b': 2}}, + {'a': [1, 2, 3], 'b': {4, 5, 6}}, + {'a': (1, 2), 'b': frozenset([3, 4]), 'c': {'d': [5, 6]}}, + {'lock': threading.Lock()}, + {'func': lambda x: x + 1}, + { + 'int': 42, + 'str': 'hello', + 'list': [1, 2, 3], + 'set': {4, 5}, + 'dict': {'nested': 'value'}, + 'non_copyable': threading.Lock(), + 'function': print + }, + ['list', 'not', 'a', 'dict'], + {'timestamp': datetime.now()}, + {}, + None, +]) +def test_langfuse_logger_prepare_metadata(metadata): + global_langfuse_logger._prepare_metadata(metadata) diff --git a/tests/router_unit_tests/test_router_helper_utils.py b/tests/router_unit_tests/test_router_helper_utils.py index 0231e199fd..ddd7a502c2 100644 --- a/tests/router_unit_tests/test_router_helper_utils.py +++ b/tests/router_unit_tests/test_router_helper_utils.py @@ -986,3 +986,16 @@ def test_pattern_match_deployment_set_model_name( print(updated_model) # Expected output: "openai/fo::hi:static::hello" assert updated_model == expected_model + + updated_models = pattern_router._return_pattern_matched_deployments( + match, + deployments=[ + { + "model_name": model_name, + "litellm_params": {"model": litellm_model}, + } + ], + ) + + for model in updated_models: + assert model["litellm_params"]["model"] == expected_model diff --git a/tests/test_keys.py b/tests/test_keys.py index ab1e97ac2c..554a084c90 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -523,8 +523,8 @@ async def test_key_info_spend_values(): @pytest.mark.asyncio -@pytest.mark.flaky(retries=3, delay=1) -async def test_key_info_spend_values_streaming(): +@pytest.mark.flaky(retries=6, delay=2) +async def test_aaaaakey_info_spend_values_streaming(): """ Test to ensure spend is correctly calculated. - create key @@ -545,7 +545,7 @@ async def test_key_info_spend_values_streaming(): completion_tokens=completion_tokens, ) response_cost = prompt_cost + completion_cost - await asyncio.sleep(5) # allow db log to be updated + await asyncio.sleep(8) # allow db log to be updated print(f"new_key: {new_key}") key_info = await get_key_info( session=session, get_key=new_key, call_key=new_key