LiteLLM Minor Fixes & Improvements (10/04/2024) (#6064)

* fix(litellm_logging.py): ensure cache hits are scrubbed if 'turn_off_message_logging' is enabled * fix(sagemaker.py): fix streaming to raise error immediately Fixes https://github.com/BerriAI/litellm/issues/6054 * (fixes) gcs bucket key based logging (#6044) * fixes for gcs bucket logging * fix StandardCallbackDynamicParams * fix - gcs logging when payload is not serializable * add test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket * working success callbacks * linting fixes * fix linting error * add type hints to functions * fixes for dynamic success and failure logging * fix for test_async_chat_openai_stream * fix handle case when key based logging vars are set as os.environ/ vars * fix prometheus track cooldown events on custom logger (#6060) * (docs) add 1k rps load test doc (#6059) * docs 1k rps load test * docs load testing * docs load testing litellm * docs load testing * clean up load test doc * docs prom metrics for load testing * docs using prometheus on load testing * doc load testing with prometheus * (fixes) docs + qa - gcs key based logging (#6061) * fixes for required values for gcs bucket * docs gcs bucket logging * bump: version 1.48.12 → 1.48.13 * ci/cd run again * bump: version 1.48.13 → 1.48.14 * update load test doc * (docs) router settings - on litellm config (#6037) * add yaml with all router settings * add docs for router settings * docs router settings litellm settings * (feat) OpenAI prompt caching models to model cost map (#6063) * add prompt caching for latest models * add cache_read_input_token_cost for prompt caching models * fix(litellm_logging.py): check if param is iterable Fixes https://github.com/BerriAI/litellm/issues/6025#issuecomment-2393929946 * fix(factory.py): support passing an 'assistant_continue_message' to prevent bedrock error Fixes https://github.com/BerriAI/litellm/issues/6053 * fix(databricks/chat): handle streaming responses * fix(factory.py): fix linting error * fix(utils.py): unify anthropic + deepseek prompt caching information to openai format Fixes https://github.com/BerriAI/litellm/issues/6069 * test: fix test * fix(types/utils.py): support all openai roles Fixes https://github.com/BerriAI/litellm/issues/6052 * test: fix test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
2024-10-04 21:28:53 -04:00 · 2024-10-04 21:28:53 -04:00 · 2e5c46ef6d
commit 2e5c46ef6d
parent fc6e0dd6cb
19 changed files with 1034 additions and 259 deletions
--- a/tests/local_testing/test_bedrock_completion.py
+++ b/tests/local_testing/test_bedrock_completion.py
@ -1633,3 +1633,288 @@ def test_bedrock_completion_test_3():
            }
        }
    ]
+
+
+@pytest.mark.parametrize("modify_params", [True, False])
+def test_bedrock_completion_test_4(modify_params):
+    litellm.set_verbose = True
+    litellm.modify_params = modify_params
+
+    data = {
+        "model": "anthropic.claude-3-opus-20240229-v1:0",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "<task>\nWhat is this file?\n</task>"},
+                    {
+                        "type": "text",
+                        "text": "<environment_details>\n# VSCode Visible Files\ncomputer-vision/hm-open3d/src/main.py\n\n# VSCode Open Tabs\ncomputer-vision/hm-open3d/src/main.py\n\n# Current Working Directory (/Users/hongbo-miao/Clouds/Git/hongbomiao.com) Files\n.ansible-lint\n.clang-format\n.cmakelintrc\n.dockerignore\n.editorconfig\n.gitignore\n.gitmodules\n.hadolint.yaml\n.isort.cfg\n.markdownlint-cli2.jsonc\n.mergify.yml\n.npmrc\n.nvmrc\n.prettierignore\n.rubocop.yml\n.ruby-version\n.ruff.toml\n.shellcheckrc\n.solhint.json\n.solhintignore\n.sqlfluff\n.sqlfluffignore\n.stylelintignore\n.yamllint.yaml\nCODE_OF_CONDUCT.md\ncommitlint.config.js\nGemfile\nGemfile.lock\nLICENSE\nlint-staged.config.js\nMakefile\nmiss_hit.cfg\nmypy.ini\npackage-lock.json\npackage.json\npoetry.lock\npoetry.toml\nprettier.config.js\npyproject.toml\nREADME.md\nrelease.config.js\nrenovate.json\nSECURITY.md\nstylelint.config.js\naerospace/\naerospace/air-defense-system/\naerospace/hm-aerosandbox/\naerospace/hm-openaerostruct/\naerospace/px4/\naerospace/quadcopter-pd-controller/\naerospace/simulate-satellite/\naerospace/simulated-and-actual-flights/\naerospace/toroidal-propeller/\nansible/\nansible/inventory.yaml\nansible/Makefile\nansible/requirements.yml\nansible/hm_macos_group/\nansible/hm_ubuntu_group/\nansible/hm_windows_group/\napi-go/\napi-go/buf.yaml\napi-go/go.mod\napi-go/go.sum\napi-go/Makefile\napi-go/api/\napi-go/build/\napi-go/cmd/\napi-go/config/\napi-go/internal/\napi-node/\napi-node/.env.development\napi-node/.env.development.local.example\napi-node/.env.development.local.example.docker\napi-node/.env.production\napi-node/.env.production.local.example\napi-node/.env.test\napi-node/.eslintignore\napi-node/.eslintrc.js\napi-node/.npmrc\napi-node/.nvmrc\napi-node/babel.config.js\napi-node/docker-compose.cypress.yaml\napi-node/docker-compose.development.yaml\napi-node/Dockerfile\napi-node/Dockerfile.development\napi-node/jest.config.js\napi-node/Makefile\napi-node/package-lock.json\napi-node/package.json\napi-node/Procfile\napi-node/stryker.conf.js\napi-node/tsconfig.json\napi-node/bin/\napi-node/postgres/\napi-node/scripts/\napi-node/src/\napi-python/\napi-python/.flaskenv\napi-python/docker-entrypoint.sh\napi-python/Dockerfile\napi-python/Makefile\napi-python/poetry.lock\napi-python/poetry.toml\napi-python/pyproject.toml\napi-python/flaskr/\nasterios/\nasterios/led-blinker/\nauthorization/\nauthorization/hm-opal-client/\nauthorization/ory-hydra/\nautomobile/\nautomobile/build-map-by-lidar-point-cloud/\nautomobile/detect-lane-by-lidar-point-cloud/\nbin/\nbin/clean.sh\nbin/count_code_lines.sh\nbin/lint_javascript_fix.sh\nbin/lint_javascript.sh\nbin/set_up.sh\nbiology/\nbiology/compare-nucleotide-sequences/\nbusybox/\nbusybox/Makefile\ncaddy/\ncaddy/Caddyfile\ncaddy/Makefile\ncaddy/bin/\ncloud-computing/\ncloud-computing/hm-ray/\ncloud-computing/hm-skypilot/\ncloud-cost/\ncloud-cost/komiser/\ncloud-infrastructure/\ncloud-infrastructure/hm-pulumi/\ncloud-infrastructure/karpenter/\ncloud-infrastructure/terraform/\ncloud-platform/\ncloud-platform/aws/\ncloud-platform/google-cloud/\ncloud-security/\ncloud-security/hm-prowler/\ncomputational-fluid-dynamics/\ncomputational-fluid-dynamics/matlab/\ncomputational-fluid-dynamics/openfoam/\ncomputer-vision/\ncomputer-vision/hm-open3d/\ncomputer-vision/hm-pyvista/\ndata-analytics/\ndata-analytics/hm-geopandas/\ndata-distribution-service/\ndata-distribution-service/dummy_test.py\ndata-distribution-service/hm_message.idl\ndata-distribution-service/hm_message.xml\ndata-distribution-service/Makefile\ndata-distribution-service/poetry.lock\ndata-distribution-service/poetry.toml\ndata-distribution-service/publish.py\ndata-ingestion/\ndata-orchestration/\ndata-processing/\ndata-storage/\ndata-transformation/\ndata-visualization/\ndesktop-qt/\nembedded/\nethereum/\ngit/\ngolang-migrate/\nhardware-in-the-loop/\nhasura-graphql-engine/\nhigh-performance-computing/\nhm-alpine/\nhm-kafka/\nhm-locust/\nhm-rust/\nhm-traefik/\nhm-xxhash/\nkubernetes/\nmachine-learning/\nmatlab/\nmobile/\nnetwork-programmability/\noperating-system/\nparallel-computing/\nphysics/\nquantum-computing/\nrclone/\nrestic/\nreverse-engineering/\nrobotics/\nsubmodules/\ntrino/\nvagrant/\nvalgrind/\nvhdl/\nvim/\nweb/\nweb-cypress/\nwireless-network/\n\n(File list truncated. Use list_files on specific subdirectories if you need to explore further.)\n</environment_details>",
+                    },
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": '<thinking>\nThe user is asking about a specific file: main.py. Based on the environment details provided, this file is located in the computer-vision/hm-open3d/src/ directory and is currently open in a VSCode tab.\n\nTo answer the question of what this file is, the most relevant tool would be the read_file tool. This will allow me to examine the contents of main.py to determine its purpose.\n\nThe read_file tool requires the "path" parameter. I can infer this path based on the environment details:\npath: "computer-vision/hm-open3d/src/main.py"\n\nSince I have the necessary parameter, I can proceed with calling the read_file tool.\n</thinking>',
+                "tool_calls": [
+                    {
+                        "id": "tooluse_qCt-KEyWQlWiyHl26spQVA",
+                        "type": "function",
+                        "function": {
+                            "name": "read_file",
+                            "arguments": '{"path":"computer-vision/hm-open3d/src/main.py"}',
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "tooluse_qCt-KEyWQlWiyHl26spQVA",
+                "content": 'import numpy as np\nimport open3d as o3d\n\n\ndef main():\n    ply_point_cloud = o3d.data.PLYPointCloud()\n    pcd = o3d.io.read_point_cloud(ply_point_cloud.path)\n    print(pcd)\n    print(np.asarray(pcd.points))\n\n    demo_crop_data = o3d.data.DemoCropPointCloud()\n    vol = o3d.visualization.read_selection_polygon_volume(\n        demo_crop_data.cropped_json_path\n    )\n    chair = vol.crop_point_cloud(pcd)\n\n    dists = pcd.compute_point_cloud_distance(chair)\n    dists = np.asarray(dists)\n    idx = np.where(dists > 0.01)[0]\n    pcd_without_chair = pcd.select_by_index(idx)\n\n    axis_aligned_bounding_box = chair.get_axis_aligned_bounding_box()\n    axis_aligned_bounding_box.color = (1, 0, 0)\n\n    oriented_bounding_box = chair.get_oriented_bounding_box()\n    oriented_bounding_box.color = (0, 1, 0)\n\n    o3d.visualization.draw_geometries(\n        [pcd_without_chair, chair, axis_aligned_bounding_box, oriented_bounding_box],\n        zoom=0.3412,\n        front=[0.4, -0.2, -0.9],\n        lookat=[2.6, 2.0, 1.5],\n        up=[-0.10, -1.0, 0.2],\n    )\n\n\nif __name__ == "__main__":\n    main()\n',
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "<environment_details>\n# VSCode Visible Files\ncomputer-vision/hm-open3d/src/main.py\n\n# VSCode Open Tabs\ncomputer-vision/hm-open3d/src/main.py\n</environment_details>",
+                    }
+                ],
+            },
+        ],
+        "temperature": 0.2,
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "execute_command",
+                    "description": "Execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Commands will be executed in the current working directory: /Users/hongbo-miao/Clouds/Git/hongbomiao.com",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "command": {
+                                "type": "string",
+                                "description": "The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.",
+                            }
+                        },
+                        "required": ["command"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "read_file",
+                    "description": "Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "path": {
+                                "type": "string",
+                                "description": "The path of the file to read (relative to the current working directory /Users/hongbo-miao/Clouds/Git/hongbomiao.com)",
+                            }
+                        },
+                        "required": ["path"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "write_to_file",
+                    "description": "Write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. Always provide the full intended content of the file, without any truncation. This tool will automatically create any directories needed to write the file.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "path": {
+                                "type": "string",
+                                "description": "The path of the file to write to (relative to the current working directory /Users/hongbo-miao/Clouds/Git/hongbomiao.com)",
+                            },
+                            "content": {
+                                "type": "string",
+                                "description": "The full content to write to the file.",
+                            },
+                        },
+                        "required": ["path", "content"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "search_files",
+                    "description": "Perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "path": {
+                                "type": "string",
+                                "description": "The path of the directory to search in (relative to the current working directory /Users/hongbo-miao/Clouds/Git/hongbomiao.com). This directory will be recursively searched.",
+                            },
+                            "regex": {
+                                "type": "string",
+                                "description": "The regular expression pattern to search for. Uses Rust regex syntax.",
+                            },
+                            "filePattern": {
+                                "type": "string",
+                                "description": "Optional glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*).",
+                            },
+                        },
+                        "required": ["path", "regex"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "list_files",
+                    "description": "List files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "path": {
+                                "type": "string",
+                                "description": "The path of the directory to list contents for (relative to the current working directory /Users/hongbo-miao/Clouds/Git/hongbomiao.com)",
+                            },
+                            "recursive": {
+                                "type": "string",
+                                "enum": ["true", "false"],
+                                "description": "Whether to list files recursively. Use 'true' for recursive listing, 'false' or omit for top-level only.",
+                            },
+                        },
+                        "required": ["path"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "list_code_definition_names",
+                    "description": "Lists definition names (classes, functions, methods, etc.) used in source code files at the top level of the specified directory. This tool provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "path": {
+                                "type": "string",
+                                "description": "The path of the directory (relative to the current working directory /Users/hongbo-miao/Clouds/Git/hongbomiao.com) to list top level source code definitions for",
+                            }
+                        },
+                        "required": ["path"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "inspect_site",
+                    "description": "Captures a screenshot and console logs of the initial state of a website. This tool navigates to the specified URL, takes a screenshot of the entire page as it appears immediately after loading, and collects any console logs or errors that occur during page load. It does not interact with the page or capture any state changes after the initial load.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "url": {
+                                "type": "string",
+                                "description": "The URL of the site to inspect. This should be a valid URL including the protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.)",
+                            }
+                        },
+                        "required": ["url"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "ask_followup_question",
+                    "description": "Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "question": {
+                                "type": "string",
+                                "description": "The question to ask the user. This should be a clear, specific question that addresses the information you need.",
+                            }
+                        },
+                        "required": ["question"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "attempt_completion",
+                    "description": "Once you've completed the task, use this tool to present the result to the user. Optionally you may provide a CLI command to showcase the result of your work, but avoid using commands like 'echo' or 'cat' that merely print text. They may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "command": {
+                                "type": "string",
+                                "description": "A CLI command to execute to show a live demo of the result to the user. For example, use 'open index.html' to display a created website. This command should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.",
+                            },
+                            "result": {
+                                "type": "string",
+                                "description": "The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.",
+                            },
+                        },
+                        "required": ["result"],
+                    },
+                },
+            },
+        ],
+        "tool_choice": "auto",
+    }
+
+    if modify_params:
+        transformed_messages = _bedrock_converse_messages_pt(
+            messages=data["messages"], model="", llm_provider=""
+        )
+        expected_messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"text": "<task>\nWhat is this file?\n</task>"},
+                    {
+                        "text": "<environment_details>\n# VSCode Visible Files\ncomputer-vision/hm-open3d/src/main.py\n\n# VSCode Open Tabs\ncomputer-vision/hm-open3d/src/main.py\n\n# Current Working Directory (/Users/hongbo-miao/Clouds/Git/hongbomiao.com) Files\n.ansible-lint\n.clang-format\n.cmakelintrc\n.dockerignore\n.editorconfig\n.gitignore\n.gitmodules\n.hadolint.yaml\n.isort.cfg\n.markdownlint-cli2.jsonc\n.mergify.yml\n.npmrc\n.nvmrc\n.prettierignore\n.rubocop.yml\n.ruby-version\n.ruff.toml\n.shellcheckrc\n.solhint.json\n.solhintignore\n.sqlfluff\n.sqlfluffignore\n.stylelintignore\n.yamllint.yaml\nCODE_OF_CONDUCT.md\ncommitlint.config.js\nGemfile\nGemfile.lock\nLICENSE\nlint-staged.config.js\nMakefile\nmiss_hit.cfg\nmypy.ini\npackage-lock.json\npackage.json\npoetry.lock\npoetry.toml\nprettier.config.js\npyproject.toml\nREADME.md\nrelease.config.js\nrenovate.json\nSECURITY.md\nstylelint.config.js\naerospace/\naerospace/air-defense-system/\naerospace/hm-aerosandbox/\naerospace/hm-openaerostruct/\naerospace/px4/\naerospace/quadcopter-pd-controller/\naerospace/simulate-satellite/\naerospace/simulated-and-actual-flights/\naerospace/toroidal-propeller/\nansible/\nansible/inventory.yaml\nansible/Makefile\nansible/requirements.yml\nansible/hm_macos_group/\nansible/hm_ubuntu_group/\nansible/hm_windows_group/\napi-go/\napi-go/buf.yaml\napi-go/go.mod\napi-go/go.sum\napi-go/Makefile\napi-go/api/\napi-go/build/\napi-go/cmd/\napi-go/config/\napi-go/internal/\napi-node/\napi-node/.env.development\napi-node/.env.development.local.example\napi-node/.env.development.local.example.docker\napi-node/.env.production\napi-node/.env.production.local.example\napi-node/.env.test\napi-node/.eslintignore\napi-node/.eslintrc.js\napi-node/.npmrc\napi-node/.nvmrc\napi-node/babel.config.js\napi-node/docker-compose.cypress.yaml\napi-node/docker-compose.development.yaml\napi-node/Dockerfile\napi-node/Dockerfile.development\napi-node/jest.config.js\napi-node/Makefile\napi-node/package-lock.json\napi-node/package.json\napi-node/Procfile\napi-node/stryker.conf.js\napi-node/tsconfig.json\napi-node/bin/\napi-node/postgres/\napi-node/scripts/\napi-node/src/\napi-python/\napi-python/.flaskenv\napi-python/docker-entrypoint.sh\napi-python/Dockerfile\napi-python/Makefile\napi-python/poetry.lock\napi-python/poetry.toml\napi-python/pyproject.toml\napi-python/flaskr/\nasterios/\nasterios/led-blinker/\nauthorization/\nauthorization/hm-opal-client/\nauthorization/ory-hydra/\nautomobile/\nautomobile/build-map-by-lidar-point-cloud/\nautomobile/detect-lane-by-lidar-point-cloud/\nbin/\nbin/clean.sh\nbin/count_code_lines.sh\nbin/lint_javascript_fix.sh\nbin/lint_javascript.sh\nbin/set_up.sh\nbiology/\nbiology/compare-nucleotide-sequences/\nbusybox/\nbusybox/Makefile\ncaddy/\ncaddy/Caddyfile\ncaddy/Makefile\ncaddy/bin/\ncloud-computing/\ncloud-computing/hm-ray/\ncloud-computing/hm-skypilot/\ncloud-cost/\ncloud-cost/komiser/\ncloud-infrastructure/\ncloud-infrastructure/hm-pulumi/\ncloud-infrastructure/karpenter/\ncloud-infrastructure/terraform/\ncloud-platform/\ncloud-platform/aws/\ncloud-platform/google-cloud/\ncloud-security/\ncloud-security/hm-prowler/\ncomputational-fluid-dynamics/\ncomputational-fluid-dynamics/matlab/\ncomputational-fluid-dynamics/openfoam/\ncomputer-vision/\ncomputer-vision/hm-open3d/\ncomputer-vision/hm-pyvista/\ndata-analytics/\ndata-analytics/hm-geopandas/\ndata-distribution-service/\ndata-distribution-service/dummy_test.py\ndata-distribution-service/hm_message.idl\ndata-distribution-service/hm_message.xml\ndata-distribution-service/Makefile\ndata-distribution-service/poetry.lock\ndata-distribution-service/poetry.toml\ndata-distribution-service/publish.py\ndata-ingestion/\ndata-orchestration/\ndata-processing/\ndata-storage/\ndata-transformation/\ndata-visualization/\ndesktop-qt/\nembedded/\nethereum/\ngit/\ngolang-migrate/\nhardware-in-the-loop/\nhasura-graphql-engine/\nhigh-performance-computing/\nhm-alpine/\nhm-kafka/\nhm-locust/\nhm-rust/\nhm-traefik/\nhm-xxhash/\nkubernetes/\nmachine-learning/\nmatlab/\nmobile/\nnetwork-programmability/\noperating-system/\nparallel-computing/\nphysics/\nquantum-computing/\nrclone/\nrestic/\nreverse-engineering/\nrobotics/\nsubmodules/\ntrino/\nvagrant/\nvalgrind/\nvhdl/\nvim/\nweb/\nweb-cypress/\nwireless-network/\n\n(File list truncated. Use list_files on specific subdirectories if you need to explore further.)\n</environment_details>"
+                    },
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": [
+                    {
+                        "toolUse": {
+                            "input": {"path": "computer-vision/hm-open3d/src/main.py"},
+                            "name": "read_file",
+                            "toolUseId": "tooluse_qCt-KEyWQlWiyHl26spQVA",
+                        }
+                    }
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "toolResult": {
+                            "content": [
+                                {
+                                    "text": 'import numpy as np\nimport open3d as o3d\n\n\ndef main():\n    ply_point_cloud = o3d.data.PLYPointCloud()\n    pcd = o3d.io.read_point_cloud(ply_point_cloud.path)\n    print(pcd)\n    print(np.asarray(pcd.points))\n\n    demo_crop_data = o3d.data.DemoCropPointCloud()\n    vol = o3d.visualization.read_selection_polygon_volume(\n        demo_crop_data.cropped_json_path\n    )\n    chair = vol.crop_point_cloud(pcd)\n\n    dists = pcd.compute_point_cloud_distance(chair)\n    dists = np.asarray(dists)\n    idx = np.where(dists > 0.01)[0]\n    pcd_without_chair = pcd.select_by_index(idx)\n\n    axis_aligned_bounding_box = chair.get_axis_aligned_bounding_box()\n    axis_aligned_bounding_box.color = (1, 0, 0)\n\n    oriented_bounding_box = chair.get_oriented_bounding_box()\n    oriented_bounding_box.color = (0, 1, 0)\n\n    o3d.visualization.draw_geometries(\n        [pcd_without_chair, chair, axis_aligned_bounding_box, oriented_bounding_box],\n        zoom=0.3412,\n        front=[0.4, -0.2, -0.9],\n        lookat=[2.6, 2.0, 1.5],\n        up=[-0.10, -1.0, 0.2],\n    )\n\n\nif __name__ == "__main__":\n    main()\n'
+                                }
+                            ],
+                            "toolUseId": "tooluse_qCt-KEyWQlWiyHl26spQVA",
+                        }
+                    }
+                ],
+            },
+            {"role": "assistant", "content": [{"text": "Please continue."}]},
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "text": "<environment_details>\n# VSCode Visible Files\ncomputer-vision/hm-open3d/src/main.py\n\n# VSCode Open Tabs\ncomputer-vision/hm-open3d/src/main.py\n</environment_details>"
+                    }
+                ],
+            },
+        ]
+        assert transformed_messages == expected_messages
+    else:
+        with pytest.raises(Exception) as e:
+            litellm.completion(**data)
+        assert "litellm.modify_params" in str(e.value)
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -1077,7 +1077,9 @@ def test_completion_cost_deepseek():
        assert response_2.usage.prompt_cache_hit_tokens is not None
        assert response_2.usage.prompt_cache_miss_tokens is not None
        assert (
-            response_2.usage.prompt_tokens == response_2.usage.prompt_cache_miss_tokens
+            response_2.usage.prompt_tokens
+            == response_2.usage.prompt_cache_miss_tokens
+            + response_2.usage.prompt_cache_hit_tokens
        )
        assert (
            response_2.usage._cache_read_input_tokens
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@ -1258,6 +1258,7 @@ def test_standard_logging_payload(model, turn_off_message_logging):
            "standard_logging_object"
        ]
        if turn_off_message_logging:
+            print("checks redacted-by-litellm")
            assert "redacted-by-litellm" == slobject["messages"][0]["content"]
            assert "redacted-by-litellm" == slobject["response"]

@ -1307,9 +1308,15 @@ def test_aaastandard_logging_payload_cache_hit():
        assert standard_logging_object["saved_cache_cost"] > 0


-def test_logging_async_cache_hit_sync_call():
+@pytest.mark.parametrize(
+    "turn_off_message_logging",
+    [False, True],
+)  # False
+def test_logging_async_cache_hit_sync_call(turn_off_message_logging):
    from litellm.types.utils import StandardLoggingPayload

+    litellm.turn_off_message_logging = turn_off_message_logging
+
    litellm.cache = Cache()

    response = litellm.completion(
@ -1356,6 +1363,14 @@ def test_logging_async_cache_hit_sync_call():
        assert standard_logging_object["response_cost"] == 0
        assert standard_logging_object["saved_cache_cost"] > 0

+        if turn_off_message_logging:
+            print("checks redacted-by-litellm")
+            assert (
+                "redacted-by-litellm"
+                == standard_logging_object["messages"][0]["content"]
+            )
+            assert "redacted-by-litellm" == standard_logging_object["response"]
+

 def test_logging_standard_payload_failure_call():
    from litellm.types.utils import StandardLoggingPayload
--- a/tests/local_testing/test_prompt_caching.py
+++ b/tests/local_testing/test_prompt_caching.py
@ -0,0 +1,81 @@
+"""Asserts that prompt caching information is correctly returned for Anthropic, OpenAI, and Deepseek"""
+
+import io
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+import litellm
+import pytest
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "anthropic/claude-3-5-sonnet-20240620",
+        "openai/gpt-4o",
+        "deepseek/deepseek-chat",
+    ],
+)
+def test_prompt_caching_model(model):
+    for _ in range(2):
+        response = litellm.completion(
+            model=model,
+            messages=[
+                # System Message
+                {
+                    "role": "system",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "Here is the full text of a complex legal agreement"
+                            * 400,
+                            "cache_control": {"type": "ephemeral"},
+                        }
+                    ],
+                },
+                # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "What are the key terms and conditions in this agreement?",
+                            "cache_control": {"type": "ephemeral"},
+                        }
+                    ],
+                },
+                {
+                    "role": "assistant",
+                    "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
+                },
+                # The final turn is marked with cache-control, for continuing in followups.
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "What are the key terms and conditions in this agreement?",
+                            "cache_control": {"type": "ephemeral"},
+                        }
+                    ],
+                },
+            ],
+            temperature=0.2,
+            max_tokens=10,
+        )
+
+    print("response=", response)
+    print("response.usage=", response.usage)
+
+    assert "prompt_tokens_details" in response.usage
+    assert response.usage.prompt_tokens_details.cached_tokens > 0
+
+    # assert "cache_read_input_tokens" in response.usage
+    # assert "cache_creation_input_tokens" in response.usage
+
+    # # Assert either a cache entry was created or cache was read - changes depending on the anthropic api ttl
+    # assert (response.usage.cache_read_input_tokens > 0) or (
+    #     response.usage.cache_creation_input_tokens > 0
+    # )
--- a/tests/local_testing/test_sagemaker.py
+++ b/tests/local_testing/test_sagemaker.py
@ -180,6 +180,40 @@ async def test_completion_sagemaker_stream(sync_mode, model):
        pytest.fail(f"Error occurred: {e}")


+@pytest.mark.asyncio()
+@pytest.mark.parametrize("sync_mode", [False, True])
+@pytest.mark.parametrize(
+    "model",
+    [
+        "sagemaker_chat/huggingface-pytorch-tgi-inference-2024-08-23-15-48-59-245",
+        "sagemaker/jumpstart-dft-hf-textgeneration1-mp-20240815-185614",
+    ],
+)
+async def test_completion_sagemaker_streaming_bad_request(sync_mode, model):
+    litellm.set_verbose = True
+    print("testing sagemaker")
+    if sync_mode is True:
+        with pytest.raises(litellm.BadRequestError):
+            response = litellm.completion(
+                model=model,
+                messages=[
+                    {"role": "user", "content": "hi"},
+                ],
+                stream=True,
+                max_tokens=8000000000000000,
+            )
+    else:
+        with pytest.raises(litellm.BadRequestError):
+            response = await litellm.acompletion(
+                model=model,
+                messages=[
+                    {"role": "user", "content": "hi"},
+                ],
+                stream=True,
+                max_tokens=8000000000000000,
+            )
+
+
@pytest.mark.asyncio
 async def test_acompletion_sagemaker_non_stream():
    mock_response = AsyncMock()
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@ -517,17 +517,19 @@ def test_redact_msgs_from_logs():
        ]
    )

+    litellm_logging_obj = Logging(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "hi"}],
+        stream=False,
+        call_type="acompletion",
+        litellm_call_id="1234",
+        start_time=datetime.now(),
+        function_id="1234",
+    )
+
    _redacted_response_obj = redact_message_input_output_from_logging(
        result=response_obj,
-        litellm_logging_obj=Logging(
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "hi"}],
-            stream=False,
-            call_type="acompletion",
-            litellm_call_id="1234",
-            start_time=datetime.now(),
-            function_id="1234",
-        ),
+        model_call_details=litellm_logging_obj.model_call_details,
    )

    # Assert the response_obj content is NOT modified