UI - new API Playground for testing LiteLLM translation (#9073)

* feat: initial commit - enable dev to see translated request

* feat(utils.py): expose new endpoint - `/utils/transform_request` to see the raw request sent by litellm

* feat(transform_request.tsx): allow user to see their transformed request

* refactor(litellm_logging.py): return raw request in 3 parts - api_base, headers, request body

easier to render each individually on UI vs. extracting from combined string

* feat: transform_request.tsx

working e2e raw request viewing

* fix(litellm_logging.py): fix transform viewing for bedrock models

* fix(litellm_logging.py): don't return sensitive headers in raw request headers

prevent accidental leak

* feat(transform_request.tsx): style improvements
This commit is contained in:
Krish Dholakia 2025-03-07 19:39:31 -08:00 committed by GitHub
parent 4ed9db4093
commit 9fc7bd0493
9 changed files with 426 additions and 13 deletions

View file

@ -55,6 +55,7 @@ from litellm.types.utils import (
LiteLLMLoggingBaseClass, LiteLLMLoggingBaseClass,
ModelResponse, ModelResponse,
ModelResponseStream, ModelResponseStream,
RawRequestTypedDict,
StandardCallbackDynamicParams, StandardCallbackDynamicParams,
StandardLoggingAdditionalHeaders, StandardLoggingAdditionalHeaders,
StandardLoggingHiddenParams, StandardLoggingHiddenParams,
@ -205,6 +206,7 @@ class Logging(LiteLLMLoggingBaseClass):
] = None, ] = None,
applied_guardrails: Optional[List[str]] = None, applied_guardrails: Optional[List[str]] = None,
kwargs: Optional[Dict] = None, kwargs: Optional[Dict] = None,
log_raw_request_response: bool = False,
): ):
_input: Optional[str] = messages # save original value of messages _input: Optional[str] = messages # save original value of messages
if messages is not None: if messages is not None:
@ -233,6 +235,7 @@ class Logging(LiteLLMLoggingBaseClass):
self.sync_streaming_chunks: List[Any] = ( self.sync_streaming_chunks: List[Any] = (
[] []
) # for generating complete stream response ) # for generating complete stream response
self.log_raw_request_response = log_raw_request_response
# Initialize dynamic callbacks # Initialize dynamic callbacks
self.dynamic_input_callbacks: Optional[ self.dynamic_input_callbacks: Optional[
@ -453,6 +456,18 @@ class Logging(LiteLLMLoggingBaseClass):
return model, messages, non_default_params return model, messages, non_default_params
def _get_raw_request_body(self, data: Optional[Union[dict, str]]) -> dict:
if data is None:
return {"error": "Received empty dictionary for raw request body"}
if isinstance(data, str):
try:
return json.loads(data)
except Exception:
return {
"error": "Unable to parse raw request body. Got - {}".format(data)
}
return data
def _pre_call(self, input, api_key, model=None, additional_args={}): def _pre_call(self, input, api_key, model=None, additional_args={}):
""" """
Common helper function across the sync + async pre-call function Common helper function across the sync + async pre-call function
@ -468,6 +483,7 @@ class Logging(LiteLLMLoggingBaseClass):
self.model_call_details["model"] = model self.model_call_details["model"] = model
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915 def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
# Log the exact input to the LLM API # Log the exact input to the LLM API
litellm.error_logs["PRE_CALL"] = locals() litellm.error_logs["PRE_CALL"] = locals()
try: try:
@ -485,28 +501,54 @@ class Logging(LiteLLMLoggingBaseClass):
additional_args=additional_args, additional_args=additional_args,
) )
# log raw request to provider (like LangFuse) -- if opted in. # log raw request to provider (like LangFuse) -- if opted in.
if log_raw_request_response is True: if (
self.log_raw_request_response is True
or log_raw_request_response is True
):
_litellm_params = self.model_call_details.get("litellm_params", {}) _litellm_params = self.model_call_details.get("litellm_params", {})
_metadata = _litellm_params.get("metadata", {}) or {} _metadata = _litellm_params.get("metadata", {}) or {}
try: try:
# [Non-blocking Extra Debug Information in metadata] # [Non-blocking Extra Debug Information in metadata]
if ( if turn_off_message_logging is True:
turn_off_message_logging is not None
and turn_off_message_logging is True
):
_metadata["raw_request"] = ( _metadata["raw_request"] = (
"redacted by litellm. \ "redacted by litellm. \
'litellm.turn_off_message_logging=True'" 'litellm.turn_off_message_logging=True'"
) )
else: else:
curl_command = self._get_request_curl_command( curl_command = self._get_request_curl_command(
api_base=additional_args.get("api_base", ""), api_base=additional_args.get("api_base", ""),
headers=additional_args.get("headers", {}), headers=additional_args.get("headers", {}),
additional_args=additional_args, additional_args=additional_args,
data=additional_args.get("complete_input_dict", {}), data=additional_args.get("complete_input_dict", {}),
) )
_metadata["raw_request"] = str(curl_command) _metadata["raw_request"] = str(curl_command)
# split up, so it's easier to parse in the UI
self.model_call_details["raw_request_typed_dict"] = (
RawRequestTypedDict(
raw_request_api_base=str(
additional_args.get("api_base") or ""
),
raw_request_body=self._get_raw_request_body(
additional_args.get("complete_input_dict", {})
),
raw_request_headers=self._get_masked_headers(
additional_args.get("headers", {}) or {},
ignore_sensitive_headers=True,
),
error=None,
)
)
except Exception as e: except Exception as e:
self.model_call_details["raw_request_typed_dict"] = (
RawRequestTypedDict(
error=str(e),
)
)
traceback.print_exc()
_metadata["raw_request"] = ( _metadata["raw_request"] = (
"Unable to Log \ "Unable to Log \
raw request: {}".format( raw request: {}".format(
@ -639,9 +681,14 @@ class Logging(LiteLLMLoggingBaseClass):
) )
verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n") verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
def _get_request_body(self, data: dict) -> str:
return str(data)
def _get_request_curl_command( def _get_request_curl_command(
self, api_base: str, headers: dict, additional_args: dict, data: dict self, api_base: str, headers: Optional[dict], additional_args: dict, data: dict
) -> str: ) -> str:
if headers is None:
headers = {}
curl_command = "\n\nPOST Request Sent from LiteLLM:\n" curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
curl_command += "curl -X POST \\\n" curl_command += "curl -X POST \\\n"
curl_command += f"{api_base} \\\n" curl_command += f"{api_base} \\\n"
@ -649,11 +696,10 @@ class Logging(LiteLLMLoggingBaseClass):
formatted_headers = " ".join( formatted_headers = " ".join(
[f"-H '{k}: {v}'" for k, v in masked_headers.items()] [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
) )
curl_command += ( curl_command += (
f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else "" f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
) )
curl_command += f"-d '{str(data)}'\n" curl_command += f"-d '{self._get_request_body(data)}'\n"
if additional_args.get("request_str", None) is not None: if additional_args.get("request_str", None) is not None:
# print the sagemaker / bedrock client request # print the sagemaker / bedrock client request
curl_command = "\nRequest Sent from LiteLLM:\n" curl_command = "\nRequest Sent from LiteLLM:\n"
@ -662,12 +708,20 @@ class Logging(LiteLLMLoggingBaseClass):
curl_command = str(self.model_call_details) curl_command = str(self.model_call_details)
return curl_command return curl_command
def _get_masked_headers(self, headers: dict): def _get_masked_headers(
self, headers: dict, ignore_sensitive_headers: bool = False
) -> dict:
""" """
Internal debugging helper function Internal debugging helper function
Masks the headers of the request sent from LiteLLM Masks the headers of the request sent from LiteLLM
""" """
sensitive_keywords = [
"authorization",
"token",
"key",
"secret",
]
return { return {
k: ( k: (
(v[:-44] + "*" * 44) (v[:-44] + "*" * 44)
@ -675,6 +729,11 @@ class Logging(LiteLLMLoggingBaseClass):
else "*****" else "*****"
) )
for k, v in headers.items() for k, v in headers.items()
if not ignore_sensitive_headers
or not any(
sensitive_keyword in k.lower()
for sensitive_keyword in sensitive_keywords
)
} }
def post_call( def post_call(

File diff suppressed because one or more lines are too long

View file

@ -19,6 +19,7 @@ from litellm.types.integrations.slack_alerting import AlertType
from litellm.types.llms.openai import AllMessageValues from litellm.types.llms.openai import AllMessageValues
from litellm.types.router import RouterErrors, UpdateRouterConfig from litellm.types.router import RouterErrors, UpdateRouterConfig
from litellm.types.utils import ( from litellm.types.utils import (
CallTypes,
EmbeddingResponse, EmbeddingResponse,
GenericBudgetConfigType, GenericBudgetConfigType,
ImageResponse, ImageResponse,
@ -2579,3 +2580,8 @@ class PrismaCompatibleUpdateDBModel(TypedDict, total=False):
class SpecialManagementEndpointEnums(enum.Enum): class SpecialManagementEndpointEnums(enum.Enum):
DEFAULT_ORGANIZATION = "default_organization" DEFAULT_ORGANIZATION = "default_organization"
class TransformRequestBody(BaseModel):
call_type: CallTypes
request_body: dict

View file

@ -290,7 +290,7 @@ from litellm.types.router import ModelInfo as RouterModelInfo
from litellm.types.router import RouterGeneralSettings, updateDeployment from litellm.types.router import RouterGeneralSettings, updateDeployment
from litellm.types.utils import CustomHuggingfaceTokenizer from litellm.types.utils import CustomHuggingfaceTokenizer
from litellm.types.utils import ModelInfo as ModelMapInfo from litellm.types.utils import ModelInfo as ModelMapInfo
from litellm.types.utils import StandardLoggingPayload from litellm.types.utils import RawRequestTypedDict, StandardLoggingPayload
from litellm.utils import _add_custom_logger_callback_to_specific_event from litellm.utils import _add_custom_logger_callback_to_specific_event
try: try:
@ -5604,6 +5604,18 @@ async def supported_openai_params(model: str):
) )
@router.post(
"/utils/transform_request",
tags=["llm utils"],
dependencies=[Depends(user_api_key_auth)],
response_model=RawRequestTypedDict,
)
async def transform_request(request: TransformRequestBody):
from litellm.utils import return_raw_request
return return_raw_request(endpoint=request.call_type, kwargs=request.request_body)
#### [BETA] - This is a beta endpoint, format might change based on user feedback. - https://github.com/BerriAI/litellm/issues/964 #### [BETA] - This is a beta endpoint, format might change based on user feedback. - https://github.com/BerriAI/litellm/issues/964
@router.post( @router.post(
"/model/new", "/model/new",

View file

@ -1995,3 +1995,10 @@ class LiteLLMBatch(Batch):
except Exception: except Exception:
# if using pydantic v1 # if using pydantic v1
return self.dict() return self.dict()
class RawRequestTypedDict(TypedDict, total=False):
raw_request_api_base: Optional[str]
raw_request_body: Optional[dict]
raw_request_headers: Optional[dict]
error: Optional[str]

View file

@ -156,6 +156,7 @@ from litellm.types.utils import (
ModelResponseStream, ModelResponseStream,
ProviderField, ProviderField,
ProviderSpecificModelInfo, ProviderSpecificModelInfo,
RawRequestTypedDict,
SelectTokenizerResponse, SelectTokenizerResponse,
StreamingChoices, StreamingChoices,
TextChoices, TextChoices,
@ -6477,3 +6478,48 @@ def add_openai_metadata(metadata: dict) -> dict:
} }
return visible_metadata.copy() return visible_metadata.copy()
def return_raw_request(endpoint: CallTypes, kwargs: dict) -> RawRequestTypedDict:
"""
Return the json str of the request
This is currently in BETA, and tested for `/chat/completions` -> `litellm.completion` calls.
"""
from datetime import datetime
from litellm.litellm_core_utils.litellm_logging import Logging
litellm_logging_obj = Logging(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hi"}],
stream=False,
call_type="acompletion",
litellm_call_id="1234",
start_time=datetime.now(),
function_id="1234",
log_raw_request_response=True,
)
llm_api_endpoint = getattr(litellm, endpoint.value)
received_exception = ""
try:
llm_api_endpoint(
**kwargs,
litellm_logging_obj=litellm_logging_obj,
api_key="my-fake-api-key", # 👈 ensure the request fails
)
except Exception as e:
received_exception = str(e)
raw_request_typed_dict = litellm_logging_obj.model_call_details.get(
"raw_request_typed_dict"
)
if raw_request_typed_dict:
return cast(RawRequestTypedDict, raw_request_typed_dict)
else:
return RawRequestTypedDict(
error=received_exception,
)

View file

@ -28,6 +28,7 @@ import CacheDashboard from "@/components/cache_dashboard";
import { setGlobalLitellmHeaderName } from "@/components/networking"; import { setGlobalLitellmHeaderName } from "@/components/networking";
import { Organization } from "@/components/networking"; import { Organization } from "@/components/networking";
import GuardrailsPanel from "@/components/guardrails"; import GuardrailsPanel from "@/components/guardrails";
import TransformRequestPanel from "@/components/transform_request";
import { fetchUserModels } from "@/components/create_key_button"; import { fetchUserModels } from "@/components/create_key_button";
import { fetchTeams } from "@/components/common_components/fetch_teams"; import { fetchTeams } from "@/components/common_components/fetch_teams";
function getCookie(name: string) { function getCookie(name: string) {
@ -308,6 +309,8 @@ export default function CreateKeyPage() {
<BudgetPanel accessToken={accessToken} /> <BudgetPanel accessToken={accessToken} />
) : page == "guardrails" ? ( ) : page == "guardrails" ? (
<GuardrailsPanel accessToken={accessToken} /> <GuardrailsPanel accessToken={accessToken} />
) : page == "transform-request" ? (
<TransformRequestPanel accessToken={accessToken} />
) : page == "general-settings" ? ( ) : page == "general-settings" ? (
<GeneralSettings <GeneralSettings
userID={userID} userID={userID}

View file

@ -18,7 +18,8 @@ import {
LineOutlined, LineOutlined,
LineChartOutlined, LineChartOutlined,
SafetyOutlined, SafetyOutlined,
ExperimentOutlined ExperimentOutlined,
ThunderboltOutlined,
} from '@ant-design/icons'; } from '@ant-design/icons';
import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage } from '../utils/roles'; import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage } from '../utils/roles';
@ -53,6 +54,7 @@ const menuItems: MenuItem[] = [
{ key: "14", page: "api_ref", label: "API Reference", icon: <ApiOutlined /> }, { key: "14", page: "api_ref", label: "API Reference", icon: <ApiOutlined /> },
{ key: "16", page: "model-hub", label: "Model Hub", icon: <AppstoreOutlined /> }, { key: "16", page: "model-hub", label: "Model Hub", icon: <AppstoreOutlined /> },
{ key: "15", page: "logs", label: "Logs", icon: <LineChartOutlined />}, { key: "15", page: "logs", label: "Logs", icon: <LineChartOutlined />},
{ {
@ -65,7 +67,7 @@ const menuItems: MenuItem[] = [
{ key: "9", page: "caching", label: "Caching", icon: <DatabaseOutlined />, roles: all_admin_roles }, { key: "9", page: "caching", label: "Caching", icon: <DatabaseOutlined />, roles: all_admin_roles },
{ key: "10", page: "budgets", label: "Budgets", icon: <BankOutlined />, roles: all_admin_roles }, { key: "10", page: "budgets", label: "Budgets", icon: <BankOutlined />, roles: all_admin_roles },
{ key: "11", page: "guardrails", label: "Guardrails", icon: <SafetyOutlined />, roles: all_admin_roles }, { key: "11", page: "guardrails", label: "Guardrails", icon: <SafetyOutlined />, roles: all_admin_roles },
{ key: "18", page: "transform-request", label: "Playground", icon: <ThunderboltOutlined />, roles: all_admin_roles },
] ]
}, },
{ {

View file

@ -0,0 +1,279 @@
import React, { useState } from 'react';
import { Button, Select, Tabs, message } from 'antd';
import { CopyOutlined } from '@ant-design/icons';
import { Title } from '@tremor/react';
interface TransformRequestPanelProps {
accessToken: string | null;
}
interface TransformResponse {
raw_request_api_base: string;
raw_request_body: Record<string, any>;
raw_request_headers: Record<string, string>;
}
const TransformRequestPanel: React.FC<TransformRequestPanelProps> = ({ accessToken }) => {
const [originalRequestJSON, setOriginalRequestJSON] = useState(`{
"model": "openai/gpt-4o",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Explain quantum computing in simple terms"
}
],
"temperature": 0.7,
"max_tokens": 500,
"stream": true
}`);
const [transformedResponse, setTransformedResponse] = useState('');
const [isLoading, setIsLoading] = useState(false);
// Function to format curl command from API response parts
const formatCurlCommand = (apiBase: string, requestBody: Record<string, any>, requestHeaders: Record<string, string>) => {
// Format the request body as nicely indented JSON with 2 spaces
const formattedBody = JSON.stringify(requestBody, null, 2)
// Add additional indentation for the entire body
.split('\n')
.map(line => ` ${line}`)
.join('\n');
// Build headers string with consistent indentation
const headerString = Object.entries(requestHeaders)
.map(([key, value]) => `-H '${key}: ${value}'`)
.join(' \\\n ');
// Build the curl command with consistent indentation
return `curl -X POST \\
${apiBase} \\
${headerString ? `${headerString} \\\n ` : ''}-H 'Content-Type: application/json' \\
-d '{
${formattedBody}
}'`;
};
// Function to handle the transform request
const handleTransform = async () => {
setIsLoading(true);
try {
// Parse the JSON from the textarea
let requestBody;
try {
requestBody = JSON.parse(originalRequestJSON);
} catch (e) {
message.error('Invalid JSON in request body');
setIsLoading(false);
return;
}
// Create the request payload
const payload = {
call_type: "completion",
request_body: requestBody
};
// Make the API call using fetch
const response = await fetch('http://0.0.0.0:4000/utils/transform_request', {
method: 'POST',
headers: {
'Authorization': `Bearer ${accessToken}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(payload)
});
if (!response.ok) {
throw new Error(`HTTP error ${response.status}`);
}
// Parse the response as JSON
const data = await response.json();
console.log("API response:", data);
// Check if the response has the expected fields
if (data.raw_request_api_base && data.raw_request_body) {
// Format the curl command with the separate parts
const formattedCurl = formatCurlCommand(
data.raw_request_api_base,
data.raw_request_body,
data.raw_request_headers || {}
);
// Update state with the formatted curl command
setTransformedResponse(formattedCurl);
message.success('Request transformed successfully');
} else {
// Handle the case where the API returns a different format
// Try to extract the parts from a string response if needed
const rawText = typeof data === 'string' ? data : JSON.stringify(data);
setTransformedResponse(rawText);
message.info('Transformed request received in unexpected format');
}
} catch (err) {
console.error('Error transforming request:', err);
message.error('Failed to transform request');
} finally {
setIsLoading(false);
}
};
// Add this handler function near your other handlers
const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
if ((e.metaKey || e.ctrlKey) && e.key === 'Enter') {
e.preventDefault(); // Prevent default behavior
handleTransform();
}
};
return (
<div className="w-full m-2" style={{ overflow: 'hidden' }}>
<Title>Playground</Title>
<p className="text-sm text-gray-500">See how LiteLLM transforms your request for the specified provider.</p>
<div style={{
display: 'flex',
gap: '16px',
width: '100%',
minWidth: 0,
overflow: 'hidden'
}} className="mt-4">
{/* Original Request Panel */}
<div style={{
flex: '1 1 50%',
display: 'flex',
flexDirection: 'column',
border: '1px solid #e8e8e8',
borderRadius: '8px',
padding: '24px',
overflow: 'hidden',
maxHeight: '600px',
minWidth: 0
}}>
<div style={{ marginBottom: '24px' }}>
<h2 style={{ fontSize: '24px', fontWeight: 'bold', margin: '0 0 4px 0' }}>Original Request</h2>
<p style={{ color: '#666', margin: 0 }}>The request you would send to LiteLLM's `/chat/completions` endpoint.</p>
</div>
<textarea
style={{
flex: '1 1 auto',
width: '100%',
minHeight: '240px',
padding: '16px',
border: '1px solid #e8e8e8',
borderRadius: '6px',
fontFamily: 'monospace',
fontSize: '14px',
resize: 'none',
marginBottom: '24px',
overflow: 'auto'
}}
value={originalRequestJSON}
onChange={(e) => setOriginalRequestJSON(e.target.value)}
onKeyDown={handleKeyDown}
placeholder="Press Cmd/Ctrl + Enter to transform"
/>
<div style={{
display: 'flex',
justifyContent: 'flex-end',
marginTop: 'auto'
}}>
<Button
type="primary"
style={{
backgroundColor: '#000',
display: 'flex',
alignItems: 'center',
gap: '8px'
}}
onClick={handleTransform}
loading={isLoading}
>
<span>Transform</span>
<span></span>
</Button>
</div>
</div>
{/* Transformed Request Panel */}
<div style={{
flex: '1 1 50%',
display: 'flex',
flexDirection: 'column',
border: '1px solid #e8e8e8',
borderRadius: '8px',
padding: '24px',
overflow: 'hidden',
maxHeight: '800px',
minWidth: 0
}}>
<div style={{ marginBottom: '24px' }}>
<h2 style={{ fontSize: '24px', fontWeight: 'bold', margin: '0 0 4px 0' }}>Transformed Request</h2>
<p style={{ color: '#666', margin: 0 }}>How LiteLLM transforms your request for the specified provider.</p>
<br/>
<p style={{ color: '#666', margin: 0 }} className="text-xs">Note: Sensitive headers are not shown.</p>
</div>
<div style={{
position: 'relative',
backgroundColor: '#f5f5f5',
borderRadius: '6px',
flex: '1 1 auto',
display: 'flex',
flexDirection: 'column',
overflow: 'hidden'
}}>
<pre
style={{
padding: '16px',
fontFamily: 'monospace',
fontSize: '14px',
margin: 0,
overflow: 'auto',
flex: '1 1 auto'
}}
>
{transformedResponse || `curl -X POST \\
https://api.openai.com/v1/chat/completions \\
-H 'Authorization: Bearer sk-xxx' \\
-H 'Content-Type: application/json' \\
-d '{
"model": "gpt-4",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
}
],
"temperature": 0.7
}'`}
</pre>
<Button
type="text"
icon={<CopyOutlined />}
style={{
position: 'absolute',
right: '8px',
top: '8px'
}}
size="small"
onClick={() => {
navigator.clipboard.writeText(transformedResponse || '');
message.success('Copied to clipboard');
}}
/>
</div>
</div>
</div>
</div>
);
};
export default TransformRequestPanel;