import os import re import ast from pathlib import Path class DataReplaceVisitor(ast.NodeVisitor): """AST visitor that finds calls to .replace("data:", ...) in the code.""" def __init__(self): self.issues = [] self.current_file = None def set_file(self, filename): self.current_file = filename def visit_Call(self, node): # Check for method calls like x.replace(...) if isinstance(node.func, ast.Attribute) and node.func.attr == "replace": # Check if first argument is "data:" if ( len(node.args) >= 2 and isinstance(node.args[0], ast.Constant) and isinstance(node.args[0].value, str) and "data:" in node.args[0].value ): self.issues.append( { "file": self.current_file, "line": node.lineno, "col": node.col_offset, "text": f'Found .replace("data:", ...) at line {node.lineno}', } ) # Continue visiting child nodes self.generic_visit(node) def check_file_with_ast(file_path): """Check a Python file for .replace("data:", ...) using AST parsing.""" with open(file_path, "r", encoding="utf-8") as f: try: tree = ast.parse(f.read(), filename=file_path) visitor = DataReplaceVisitor() visitor.set_file(file_path) visitor.visit(tree) return visitor.issues except SyntaxError: return [ { "file": file_path, "line": 0, "col": 0, "text": f"Syntax error in file, could not parse", } ] def check_file_with_regex(file_path): """Check any file for .replace("data:", ...) using regex.""" issues = [] with open(file_path, "r", encoding="utf-8", errors="ignore") as f: for i, line in enumerate(f, 1): matches = re.finditer(r'\.replace\(\s*[\'"]data:[\'"]', line) for match in matches: issues.append( { "file": file_path, "line": i, "col": match.start(), "text": f'Found .replace("data:", ...) at line {i}', } ) return issues def scan_directory(base_dir): """Scan a directory recursively for files containing .replace("data:", ...).""" all_issues = [] for root, _, files in os.walk(base_dir): for file in files: print("checking file: ", file) file_path = os.path.join(root, file) # Skip directories we don't want to check if any( d in file_path for d in [".git", "__pycache__", ".venv", "node_modules"] ): continue # For Python files, use AST for more accurate parsing if file.endswith(".py"): issues = check_file_with_ast(file_path) # For other files that might contain code, use regex elif file.endswith((".js", ".ts", ".jsx", ".tsx", ".md", ".ipynb")): issues = check_file_with_regex(file_path) else: continue all_issues.extend(issues) return all_issues def main(): # Start from the project root directory base_dir = "./litellm" # Local testing # base_dir = "../../litellm" print(f"Scanning for .replace('data:', ...) usage in {base_dir}") issues = scan_directory(base_dir) if issues: print(f"\n⚠️ Found {len(issues)} instances of .replace('data:', ...):") for issue in issues: print(f"{issue['file']}:{issue['line']} - {issue['text']}") # Fail the test if issues are found raise Exception( f"Found {len(issues)} instances of .replace('data:', ...) which may be unsafe. Use litellm.CustomStreamWrapper._strip_sse_data_from_chunk instead." ) else: print("✅ No instances of .replace('data:', ...) found.") if __name__ == "__main__": main()