mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Merge branch 'BerriAI:main' into main
This commit is contained in:
commit
e6305b2247
128 changed files with 3858 additions and 744 deletions
|
@ -1127,6 +1127,7 @@ jobs:
|
|||
name: Install Dependencies
|
||||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install wheel setuptools
|
||||
python -m pip install -r requirements.txt
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-retry==1.6.3"
|
||||
|
|
|
@ -398,6 +398,8 @@ curl http://localhost:4000/v1/chat/completions \
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
You can also use the `enterpriseWebSearch` tool for an [enterprise compliant search](https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/web-grounding-enterprise).
|
||||
|
||||
#### **Moving from Vertex AI SDK to LiteLLM (GROUNDING)**
|
||||
|
||||
|
||||
|
|
|
@ -449,6 +449,7 @@ router_settings:
|
|||
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
|
||||
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
|
||||
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure
|
||||
| MICROSOFT_SERVICE_PRINCIPAL_ID | Service Principal ID for Microsoft Enterprise Application. (This is an advanced feature if you want litellm to auto-assign members to Litellm Teams based on their Microsoft Entra ID Groups)
|
||||
| NO_DOCS | Flag to disable documentation generation
|
||||
| NO_PROXY | List of addresses to bypass proxy
|
||||
| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval
|
||||
|
|
|
@ -26,10 +26,12 @@ model_list:
|
|||
- model_name: sagemaker-completion-model
|
||||
litellm_params:
|
||||
model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
|
||||
model_info:
|
||||
input_cost_per_second: 0.000420
|
||||
- model_name: sagemaker-embedding-model
|
||||
litellm_params:
|
||||
model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
|
||||
model_info:
|
||||
input_cost_per_second: 0.000420
|
||||
```
|
||||
|
||||
|
@ -55,11 +57,33 @@ model_list:
|
|||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_version: os.envrion/AZURE_API_VERSION
|
||||
model_info:
|
||||
input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
|
||||
output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
|
||||
```
|
||||
|
||||
### Debugging
|
||||
## Override Model Cost Map
|
||||
|
||||
You can override [our model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) with your own custom pricing for a mapped model.
|
||||
|
||||
Just add a `model_info` key to your model in the config, and override the desired keys.
|
||||
|
||||
Example: Override Anthropic's model cost map for the `prod/claude-3-5-sonnet-20241022` model.
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: "prod/claude-3-5-sonnet-20241022"
|
||||
litellm_params:
|
||||
model: "anthropic/claude-3-5-sonnet-20241022"
|
||||
api_key: os.environ/ANTHROPIC_PROD_API_KEY
|
||||
model_info:
|
||||
input_cost_per_token: 0.000006
|
||||
output_cost_per_token: 0.00003
|
||||
cache_creation_input_token_cost: 0.0000075
|
||||
cache_read_input_token_cost: 0.0000006
|
||||
```
|
||||
|
||||
## Debugging
|
||||
|
||||
If you're custom pricing is not being used or you're seeing errors, please check the following:
|
||||
|
||||
|
|
|
@ -161,6 +161,89 @@ Here's the available UI roles for a LiteLLM Internal User:
|
|||
- `internal_user`: can login, view/create/delete their own keys, view their spend. **Cannot** add new users.
|
||||
- `internal_user_viewer`: can login, view their own keys, view their own spend. **Cannot** create/delete keys, add new users.
|
||||
|
||||
## Auto-add SSO users to teams
|
||||
|
||||
This walks through setting up sso auto-add for **Okta, Google SSO**
|
||||
|
||||
### Okta, Google SSO
|
||||
|
||||
1. Specify the JWT field that contains the team ids, that the user belongs to.
|
||||
|
||||
```yaml
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
litellm_jwtauth:
|
||||
team_ids_jwt_field: "groups" # 👈 CAN BE ANY FIELD
|
||||
```
|
||||
|
||||
This is assuming your SSO token looks like this. **If you need to inspect the JWT fields received from your SSO provider by LiteLLM, follow these instructions [here](#debugging-sso-jwt-fields)**
|
||||
|
||||
```
|
||||
{
|
||||
...,
|
||||
"groups": ["team_id_1", "team_id_2"]
|
||||
}
|
||||
```
|
||||
|
||||
2. Create the teams on LiteLLM
|
||||
|
||||
```bash
|
||||
curl -X POST '<PROXY_BASE_URL>/team/new' \
|
||||
-H 'Authorization: Bearer <PROXY_MASTER_KEY>' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-D '{
|
||||
"team_alias": "team_1",
|
||||
"team_id": "team_id_1" # 👈 MUST BE THE SAME AS THE SSO GROUP ID
|
||||
}'
|
||||
```
|
||||
|
||||
3. Test the SSO flow
|
||||
|
||||
Here's a walkthrough of [how it works](https://www.loom.com/share/8959be458edf41fd85937452c29a33f3?sid=7ebd6d37-569a-4023-866e-e0cde67cb23e)
|
||||
|
||||
### Microsoft Entra ID SSO group assignment
|
||||
|
||||
This walks through setting up sso auto-add for **Microsoft Entra ID**
|
||||
|
||||
Follow along this video for a walkthrough of how to set this up with Microsoft Entra ID
|
||||
|
||||
|
||||
<iframe width="840" height="500" src="https://www.loom.com/embed/ea711323aa9a496d84a01fd7b2a12f54?sid=c53e238c-5bfd-4135-b8fb-b5b1a08632cf" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
|
||||
|
||||
|
||||
### Debugging SSO JWT fields
|
||||
|
||||
If you need to inspect the JWT fields received from your SSO provider by LiteLLM, follow these instructions. This guide walks you through setting up a debug callback to view the JWT data during the SSO process.
|
||||
|
||||
|
||||
<Image img={require('../../img/debug_sso.png')} style={{ width: '500px', height: 'auto' }} />
|
||||
<br />
|
||||
|
||||
1. Add `/sso/debug/callback` as a redirect URL in your SSO provider
|
||||
|
||||
In your SSO provider's settings, add the following URL as a new redirect (callback) URL:
|
||||
|
||||
```bash showLineNumbers title="Redirect URL"
|
||||
http://<proxy_base_url>/sso/debug/callback
|
||||
```
|
||||
|
||||
|
||||
2. Navigate to the debug login page on your browser
|
||||
|
||||
Navigate to the following URL on your browser:
|
||||
|
||||
```bash showLineNumbers title="URL to navigate to"
|
||||
https://<proxy_base_url>/sso/debug/login
|
||||
```
|
||||
|
||||
This will initiate the standard SSO flow. You will be redirected to your SSO provider's login screen, and after successful authentication, you will be redirected back to LiteLLM's debug callback route.
|
||||
|
||||
|
||||
3. View the JWT fields
|
||||
|
||||
Once redirected, you should see a page called "SSO Debug Information". This page displays the JWT fields received from your SSO provider (as shown in the image above)
|
||||
|
||||
|
||||
## Advanced
|
||||
### Setting custom logout URLs
|
||||
|
||||
|
@ -196,40 +279,6 @@ This budget does not apply to keys created under non-default teams.
|
|||
|
||||
[**Go Here**](./team_budgets.md)
|
||||
|
||||
### Auto-add SSO users to teams
|
||||
|
||||
1. Specify the JWT field that contains the team ids, that the user belongs to.
|
||||
|
||||
```yaml
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
litellm_jwtauth:
|
||||
team_ids_jwt_field: "groups" # 👈 CAN BE ANY FIELD
|
||||
```
|
||||
|
||||
This is assuming your SSO token looks like this:
|
||||
```
|
||||
{
|
||||
...,
|
||||
"groups": ["team_id_1", "team_id_2"]
|
||||
}
|
||||
```
|
||||
|
||||
2. Create the teams on LiteLLM
|
||||
|
||||
```bash
|
||||
curl -X POST '<PROXY_BASE_URL>/team/new' \
|
||||
-H 'Authorization: Bearer <PROXY_MASTER_KEY>' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-D '{
|
||||
"team_alias": "team_1",
|
||||
"team_id": "team_id_1" # 👈 MUST BE THE SAME AS THE SSO GROUP ID
|
||||
}'
|
||||
```
|
||||
|
||||
3. Test the SSO flow
|
||||
|
||||
Here's a walkthrough of [how it works](https://www.loom.com/share/8959be458edf41fd85937452c29a33f3?sid=7ebd6d37-569a-4023-866e-e0cde67cb23e)
|
||||
|
||||
### Restrict Users from creating personal keys
|
||||
|
||||
|
|
BIN
docs/my-website/img/debug_sso.png
Normal file
BIN
docs/my-website/img/debug_sso.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 167 KiB |
161
docs/my-website/src/components/TransformRequestPlayground.tsx
Normal file
161
docs/my-website/src/components/TransformRequestPlayground.tsx
Normal file
|
@ -0,0 +1,161 @@
|
|||
import React, { useState } from 'react';
|
||||
import styles from './transform_request.module.css';
|
||||
|
||||
const DEFAULT_REQUEST = {
|
||||
"model": "bedrock/gpt-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Explain quantum computing in simple terms"
|
||||
}
|
||||
],
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 500,
|
||||
"stream": true
|
||||
};
|
||||
|
||||
type ViewMode = 'split' | 'request' | 'transformed';
|
||||
|
||||
const TransformRequestPlayground: React.FC = () => {
|
||||
const [request, setRequest] = useState(JSON.stringify(DEFAULT_REQUEST, null, 2));
|
||||
const [transformedRequest, setTransformedRequest] = useState('');
|
||||
const [viewMode, setViewMode] = useState<ViewMode>('split');
|
||||
|
||||
const handleTransform = async () => {
|
||||
try {
|
||||
// Here you would make the actual API call to transform the request
|
||||
// For now, we'll just set a sample response
|
||||
const sampleResponse = `curl -X POST \\
|
||||
https://api.openai.com/v1/chat/completions \\
|
||||
-H 'Authorization: Bearer sk-xxx' \\
|
||||
-H 'Content-Type: application/json' \\
|
||||
-d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant."
|
||||
}
|
||||
],
|
||||
"temperature": 0.7
|
||||
}'`;
|
||||
setTransformedRequest(sampleResponse);
|
||||
} catch (error) {
|
||||
console.error('Error transforming request:', error);
|
||||
}
|
||||
};
|
||||
|
||||
const handleCopy = () => {
|
||||
navigator.clipboard.writeText(transformedRequest);
|
||||
};
|
||||
|
||||
const renderContent = () => {
|
||||
switch (viewMode) {
|
||||
case 'request':
|
||||
return (
|
||||
<div className={styles.panel}>
|
||||
<div className={styles['panel-header']}>
|
||||
<h2>Original Request</h2>
|
||||
<p>The request you would send to LiteLLM /chat/completions endpoint.</p>
|
||||
</div>
|
||||
<textarea
|
||||
className={styles['code-input']}
|
||||
value={request}
|
||||
onChange={(e) => setRequest(e.target.value)}
|
||||
spellCheck={false}
|
||||
/>
|
||||
<div className={styles['panel-footer']}>
|
||||
<button className={styles['transform-button']} onClick={handleTransform}>
|
||||
Transform →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
case 'transformed':
|
||||
return (
|
||||
<div className={styles.panel}>
|
||||
<div className={styles['panel-header']}>
|
||||
<h2>Transformed Request</h2>
|
||||
<p>How LiteLLM transforms your request for the specified provider.</p>
|
||||
<p className={styles.note}>Note: Sensitive headers are not shown.</p>
|
||||
</div>
|
||||
<div className={styles['code-output-container']}>
|
||||
<pre className={styles['code-output']}>{transformedRequest}</pre>
|
||||
<button className={styles['copy-button']} onClick={handleCopy}>
|
||||
Copy
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
default:
|
||||
return (
|
||||
<>
|
||||
<div className={styles.panel}>
|
||||
<div className={styles['panel-header']}>
|
||||
<h2>Original Request</h2>
|
||||
<p>The request you would send to LiteLLM /chat/completions endpoint.</p>
|
||||
</div>
|
||||
<textarea
|
||||
className={styles['code-input']}
|
||||
value={request}
|
||||
onChange={(e) => setRequest(e.target.value)}
|
||||
spellCheck={false}
|
||||
/>
|
||||
<div className={styles['panel-footer']}>
|
||||
<button className={styles['transform-button']} onClick={handleTransform}>
|
||||
Transform →
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div className={styles.panel}>
|
||||
<div className={styles['panel-header']}>
|
||||
<h2>Transformed Request</h2>
|
||||
<p>How LiteLLM transforms your request for the specified provider.</p>
|
||||
<p className={styles.note}>Note: Sensitive headers are not shown.</p>
|
||||
</div>
|
||||
<div className={styles['code-output-container']}>
|
||||
<pre className={styles['code-output']}>{transformedRequest}</pre>
|
||||
<button className={styles['copy-button']} onClick={handleCopy}>
|
||||
Copy
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={styles['transform-playground']}>
|
||||
<div className={styles['view-toggle']}>
|
||||
<button
|
||||
className={viewMode === 'split' ? styles.active : ''}
|
||||
onClick={() => setViewMode('split')}
|
||||
>
|
||||
Split View
|
||||
</button>
|
||||
<button
|
||||
className={viewMode === 'request' ? styles.active : ''}
|
||||
onClick={() => setViewMode('request')}
|
||||
>
|
||||
Request
|
||||
</button>
|
||||
<button
|
||||
className={viewMode === 'transformed' ? styles.active : ''}
|
||||
onClick={() => setViewMode('transformed')}
|
||||
>
|
||||
Transformed
|
||||
</button>
|
||||
</div>
|
||||
<div className={styles['playground-container']}>
|
||||
{renderContent()}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default TransformRequestPlayground;
|
|
@ -403,6 +403,7 @@ def _select_model_name_for_cost_calc(
|
|||
base_model: Optional[str] = None,
|
||||
custom_pricing: Optional[bool] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
router_model_id: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
1. If custom pricing is true, return received model name
|
||||
|
@ -417,12 +418,6 @@ def _select_model_name_for_cost_calc(
|
|||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
if custom_pricing is True:
|
||||
return_model = model
|
||||
|
||||
if base_model is not None:
|
||||
return_model = base_model
|
||||
|
||||
completion_response_model: Optional[str] = None
|
||||
if completion_response is not None:
|
||||
if isinstance(completion_response, BaseModel):
|
||||
|
@ -430,6 +425,16 @@ def _select_model_name_for_cost_calc(
|
|||
elif isinstance(completion_response, dict):
|
||||
completion_response_model = completion_response.get("model", None)
|
||||
hidden_params: Optional[dict] = getattr(completion_response, "_hidden_params", None)
|
||||
|
||||
if custom_pricing is True:
|
||||
if router_model_id is not None and router_model_id in litellm.model_cost:
|
||||
return_model = router_model_id
|
||||
else:
|
||||
return_model = model
|
||||
|
||||
if base_model is not None:
|
||||
return_model = base_model
|
||||
|
||||
if completion_response_model is None and hidden_params is not None:
|
||||
if (
|
||||
hidden_params.get("model", None) is not None
|
||||
|
@ -559,6 +564,7 @@ def completion_cost( # noqa: PLR0915
|
|||
base_model: Optional[str] = None,
|
||||
standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
|
||||
litellm_model_name: Optional[str] = None,
|
||||
router_model_id: Optional[str] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
|
||||
|
@ -617,12 +623,12 @@ def completion_cost( # noqa: PLR0915
|
|||
custom_llm_provider=custom_llm_provider,
|
||||
custom_pricing=custom_pricing,
|
||||
base_model=base_model,
|
||||
router_model_id=router_model_id,
|
||||
)
|
||||
|
||||
potential_model_names = [selected_model]
|
||||
if model is not None:
|
||||
potential_model_names.append(model)
|
||||
|
||||
for idx, model in enumerate(potential_model_names):
|
||||
try:
|
||||
verbose_logger.info(
|
||||
|
@ -943,6 +949,7 @@ def response_cost_calculator(
|
|||
prompt: str = "",
|
||||
standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
|
||||
litellm_model_name: Optional[str] = None,
|
||||
router_model_id: Optional[str] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Returns
|
||||
|
@ -973,6 +980,8 @@ def response_cost_calculator(
|
|||
base_model=base_model,
|
||||
prompt=prompt,
|
||||
standard_built_in_tools_params=standard_built_in_tools_params,
|
||||
litellm_model_name=litellm_model_name,
|
||||
router_model_id=router_model_id,
|
||||
)
|
||||
return response_cost
|
||||
except Exception as e:
|
||||
|
@ -1149,8 +1158,128 @@ def batch_cost_calculator(
|
|||
return total_prompt_cost, total_completion_cost
|
||||
|
||||
|
||||
class RealtimeAPITokenUsageProcessor:
|
||||
@staticmethod
|
||||
def collect_usage_from_realtime_stream_results(
|
||||
results: OpenAIRealtimeStreamList,
|
||||
) -> List[Usage]:
|
||||
"""
|
||||
Collect usage from realtime stream results
|
||||
"""
|
||||
response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
|
||||
List[OpenAIRealtimeStreamResponseBaseObject],
|
||||
[result for result in results if result["type"] == "response.done"],
|
||||
)
|
||||
usage_objects: List[Usage] = []
|
||||
for result in response_done_events:
|
||||
usage_object = (
|
||||
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||
result["response"].get("usage", {})
|
||||
)
|
||||
)
|
||||
usage_objects.append(usage_object)
|
||||
return usage_objects
|
||||
|
||||
@staticmethod
|
||||
def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
|
||||
"""
|
||||
Combine multiple Usage objects into a single Usage object, checking model keys for nested values.
|
||||
"""
|
||||
from litellm.types.utils import (
|
||||
CompletionTokensDetails,
|
||||
PromptTokensDetailsWrapper,
|
||||
Usage,
|
||||
)
|
||||
|
||||
combined = Usage()
|
||||
|
||||
# Sum basic token counts
|
||||
for usage in usage_objects:
|
||||
# Handle direct attributes by checking what exists in the model
|
||||
for attr in dir(usage):
|
||||
if not attr.startswith("_") and not callable(getattr(usage, attr)):
|
||||
current_val = getattr(combined, attr, 0)
|
||||
new_val = getattr(usage, attr, 0)
|
||||
if (
|
||||
new_val is not None
|
||||
and isinstance(new_val, (int, float))
|
||||
and isinstance(current_val, (int, float))
|
||||
):
|
||||
setattr(combined, attr, current_val + new_val)
|
||||
# Handle nested prompt_tokens_details
|
||||
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
|
||||
if (
|
||||
not hasattr(combined, "prompt_tokens_details")
|
||||
or not combined.prompt_tokens_details
|
||||
):
|
||||
combined.prompt_tokens_details = PromptTokensDetailsWrapper()
|
||||
|
||||
# Check what keys exist in the model's prompt_tokens_details
|
||||
for attr in dir(usage.prompt_tokens_details):
|
||||
if not attr.startswith("_") and not callable(
|
||||
getattr(usage.prompt_tokens_details, attr)
|
||||
):
|
||||
current_val = getattr(combined.prompt_tokens_details, attr, 0)
|
||||
new_val = getattr(usage.prompt_tokens_details, attr, 0)
|
||||
if new_val is not None:
|
||||
setattr(
|
||||
combined.prompt_tokens_details,
|
||||
attr,
|
||||
current_val + new_val,
|
||||
)
|
||||
|
||||
# Handle nested completion_tokens_details
|
||||
if (
|
||||
hasattr(usage, "completion_tokens_details")
|
||||
and usage.completion_tokens_details
|
||||
):
|
||||
if (
|
||||
not hasattr(combined, "completion_tokens_details")
|
||||
or not combined.completion_tokens_details
|
||||
):
|
||||
combined.completion_tokens_details = CompletionTokensDetails()
|
||||
|
||||
# Check what keys exist in the model's completion_tokens_details
|
||||
for attr in dir(usage.completion_tokens_details):
|
||||
if not attr.startswith("_") and not callable(
|
||||
getattr(usage.completion_tokens_details, attr)
|
||||
):
|
||||
current_val = getattr(
|
||||
combined.completion_tokens_details, attr, 0
|
||||
)
|
||||
new_val = getattr(usage.completion_tokens_details, attr, 0)
|
||||
if new_val is not None:
|
||||
setattr(
|
||||
combined.completion_tokens_details,
|
||||
attr,
|
||||
current_val + new_val,
|
||||
)
|
||||
|
||||
return combined
|
||||
|
||||
@staticmethod
|
||||
def collect_and_combine_usage_from_realtime_stream_results(
|
||||
results: OpenAIRealtimeStreamList,
|
||||
) -> Usage:
|
||||
"""
|
||||
Collect and combine usage from realtime stream results
|
||||
"""
|
||||
collected_usage_objects = (
|
||||
RealtimeAPITokenUsageProcessor.collect_usage_from_realtime_stream_results(
|
||||
results
|
||||
)
|
||||
)
|
||||
combined_usage_object = RealtimeAPITokenUsageProcessor.combine_usage_objects(
|
||||
collected_usage_objects
|
||||
)
|
||||
return combined_usage_object
|
||||
|
||||
|
||||
def handle_realtime_stream_cost_calculation(
|
||||
results: OpenAIRealtimeStreamList, custom_llm_provider: str, litellm_model_name: str
|
||||
results: OpenAIRealtimeStreamList,
|
||||
combined_usage_object: Usage,
|
||||
custom_llm_provider: str,
|
||||
litellm_model_name: str,
|
||||
) -> float:
|
||||
"""
|
||||
Handles the cost calculation for realtime stream responses.
|
||||
|
@ -1160,10 +1289,6 @@ def handle_realtime_stream_cost_calculation(
|
|||
Args:
|
||||
results: A list of OpenAIRealtimeStreamBaseObject objects
|
||||
"""
|
||||
response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
|
||||
List[OpenAIRealtimeStreamResponseBaseObject],
|
||||
[result for result in results if result["type"] == "response.done"],
|
||||
)
|
||||
received_model = None
|
||||
potential_model_names = []
|
||||
for result in results:
|
||||
|
@ -1176,21 +1301,19 @@ def handle_realtime_stream_cost_calculation(
|
|||
potential_model_names.append(litellm_model_name)
|
||||
input_cost_per_token = 0.0
|
||||
output_cost_per_token = 0.0
|
||||
for result in response_done_events:
|
||||
usage_object = (
|
||||
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
|
||||
result["response"].get("usage", {})
|
||||
)
|
||||
)
|
||||
|
||||
for model_name in potential_model_names:
|
||||
for model_name in potential_model_names:
|
||||
try:
|
||||
_input_cost_per_token, _output_cost_per_token = generic_cost_per_token(
|
||||
model=model_name,
|
||||
usage=usage_object,
|
||||
usage=combined_usage_object,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
input_cost_per_token += _input_cost_per_token
|
||||
output_cost_per_token += _output_cost_per_token
|
||||
except Exception:
|
||||
continue
|
||||
input_cost_per_token += _input_cost_per_token
|
||||
output_cost_per_token += _output_cost_per_token
|
||||
break # exit if we find a valid model
|
||||
total_cost = input_cost_per_token + output_cost_per_token
|
||||
|
||||
return total_cost
|
||||
|
|
|
@ -110,5 +110,8 @@ def get_litellm_params(
|
|||
"azure_password": kwargs.get("azure_password"),
|
||||
"max_retries": max_retries,
|
||||
"timeout": kwargs.get("timeout"),
|
||||
"bucket_name": kwargs.get("bucket_name"),
|
||||
"vertex_credentials": kwargs.get("vertex_credentials"),
|
||||
"vertex_project": kwargs.get("vertex_project"),
|
||||
}
|
||||
return litellm_params
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from typing import Literal, Optional
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.exceptions import BadRequestError
|
||||
from litellm.types.utils import LlmProviders, LlmProvidersSet
|
||||
|
||||
|
@ -43,9 +42,6 @@ def get_supported_openai_params( # noqa: PLR0915
|
|||
provider_config = None
|
||||
|
||||
if provider_config and request_type == "chat_completion":
|
||||
verbose_logger.info(
|
||||
f"using provider_config: {provider_config} for checking supported params"
|
||||
)
|
||||
return provider_config.get_supported_openai_params(model=model)
|
||||
|
||||
if custom_llm_provider == "bedrock":
|
||||
|
|
|
@ -33,6 +33,7 @@ from litellm.constants import (
|
|||
DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
|
||||
)
|
||||
from litellm.cost_calculator import (
|
||||
RealtimeAPITokenUsageProcessor,
|
||||
_select_model_name_for_cost_calc,
|
||||
handle_realtime_stream_cost_calculation,
|
||||
)
|
||||
|
@ -621,7 +622,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
] = RawRequestTypedDict(
|
||||
error=str(e),
|
||||
)
|
||||
traceback.print_exc()
|
||||
_metadata[
|
||||
"raw_request"
|
||||
] = "Unable to Log \
|
||||
|
@ -905,6 +905,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
],
|
||||
cache_hit: Optional[bool] = None,
|
||||
litellm_model_name: Optional[str] = None,
|
||||
router_model_id: Optional[str] = None,
|
||||
) -> Optional[float]:
|
||||
"""
|
||||
Calculate response cost using result + logging object variables.
|
||||
|
@ -943,6 +944,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
"custom_pricing": custom_pricing,
|
||||
"prompt": prompt,
|
||||
"standard_built_in_tools_params": self.standard_built_in_tools_params,
|
||||
"router_model_id": router_model_id,
|
||||
}
|
||||
except Exception as e: # error creating kwargs for cost calculation
|
||||
debug_info = StandardLoggingModelCostFailureDebugInformation(
|
||||
|
@ -1054,11 +1056,18 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
## else set cost to None
|
||||
|
||||
if self.call_type == CallTypes.arealtime.value and isinstance(result, list):
|
||||
combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
|
||||
results=result
|
||||
)
|
||||
self.model_call_details[
|
||||
"response_cost"
|
||||
] = handle_realtime_stream_cost_calculation(
|
||||
result, self.custom_llm_provider, self.model
|
||||
results=result,
|
||||
combined_usage_object=combined_usage_object,
|
||||
custom_llm_provider=self.custom_llm_provider,
|
||||
litellm_model_name=self.model,
|
||||
)
|
||||
self.model_call_details["combined_usage_object"] = combined_usage_object
|
||||
if (
|
||||
standard_logging_object is None
|
||||
and result is not None
|
||||
|
@ -3132,6 +3141,7 @@ class StandardLoggingPayloadSetup:
|
|||
prompt_integration: Optional[str] = None,
|
||||
applied_guardrails: Optional[List[str]] = None,
|
||||
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
|
||||
usage_object: Optional[dict] = None,
|
||||
) -> StandardLoggingMetadata:
|
||||
"""
|
||||
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
|
||||
|
@ -3179,6 +3189,7 @@ class StandardLoggingPayloadSetup:
|
|||
prompt_management_metadata=prompt_management_metadata,
|
||||
applied_guardrails=applied_guardrails,
|
||||
mcp_tool_call_metadata=mcp_tool_call_metadata,
|
||||
usage_object=usage_object,
|
||||
)
|
||||
if isinstance(metadata, dict):
|
||||
# Filter the metadata dictionary to include only the specified keys
|
||||
|
@ -3204,8 +3215,12 @@ class StandardLoggingPayloadSetup:
|
|||
return clean_metadata
|
||||
|
||||
@staticmethod
|
||||
def get_usage_from_response_obj(response_obj: Optional[dict]) -> Usage:
|
||||
def get_usage_from_response_obj(
|
||||
response_obj: Optional[dict], combined_usage_object: Optional[Usage] = None
|
||||
) -> Usage:
|
||||
## BASE CASE ##
|
||||
if combined_usage_object is not None:
|
||||
return combined_usage_object
|
||||
if response_obj is None:
|
||||
return Usage(
|
||||
prompt_tokens=0,
|
||||
|
@ -3334,6 +3349,7 @@ class StandardLoggingPayloadSetup:
|
|||
litellm_overhead_time_ms=None,
|
||||
batch_models=None,
|
||||
litellm_model_name=None,
|
||||
usage_object=None,
|
||||
)
|
||||
if hidden_params is not None:
|
||||
for key in StandardLoggingHiddenParams.__annotations__.keys():
|
||||
|
@ -3450,6 +3466,7 @@ def get_standard_logging_object_payload(
|
|||
litellm_overhead_time_ms=None,
|
||||
batch_models=None,
|
||||
litellm_model_name=None,
|
||||
usage_object=None,
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -3466,8 +3483,12 @@ def get_standard_logging_object_payload(
|
|||
call_type = kwargs.get("call_type")
|
||||
cache_hit = kwargs.get("cache_hit", False)
|
||||
usage = StandardLoggingPayloadSetup.get_usage_from_response_obj(
|
||||
response_obj=response_obj
|
||||
response_obj=response_obj,
|
||||
combined_usage_object=cast(
|
||||
Optional[Usage], kwargs.get("combined_usage_object")
|
||||
),
|
||||
)
|
||||
|
||||
id = response_obj.get("id", kwargs.get("litellm_call_id"))
|
||||
|
||||
_model_id = metadata.get("model_info", {}).get("id", "")
|
||||
|
@ -3506,6 +3527,7 @@ def get_standard_logging_object_payload(
|
|||
prompt_integration=kwargs.get("prompt_integration", None),
|
||||
applied_guardrails=kwargs.get("applied_guardrails", None),
|
||||
mcp_tool_call_metadata=kwargs.get("mcp_tool_call_metadata", None),
|
||||
usage_object=usage.model_dump(),
|
||||
)
|
||||
|
||||
_request_body = proxy_server_request.get("body", {})
|
||||
|
@ -3646,6 +3668,7 @@ def get_standard_logging_metadata(
|
|||
prompt_management_metadata=None,
|
||||
applied_guardrails=None,
|
||||
mcp_tool_call_metadata=None,
|
||||
usage_object=None,
|
||||
)
|
||||
if isinstance(metadata, dict):
|
||||
# Filter the metadata dictionary to include only the specified keys
|
||||
|
@ -3740,6 +3763,7 @@ def create_dummy_standard_logging_payload() -> StandardLoggingPayload:
|
|||
litellm_overhead_time_ms=None,
|
||||
batch_models=None,
|
||||
litellm_model_name=None,
|
||||
usage_object=None,
|
||||
)
|
||||
|
||||
# Convert numeric values to appropriate types
|
||||
|
|
|
@ -90,35 +90,45 @@ def _generic_cost_per_character(
|
|||
return prompt_cost, completion_cost
|
||||
|
||||
|
||||
def _get_prompt_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
|
||||
def _get_token_base_cost(model_info: ModelInfo, usage: Usage) -> Tuple[float, float]:
|
||||
"""
|
||||
Return prompt cost for a given model and usage.
|
||||
|
||||
If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
|
||||
If input_tokens > threshold and `input_cost_per_token_above_[x]k_tokens` or `input_cost_per_token_above_[x]_tokens` is set,
|
||||
then we use the corresponding threshold cost.
|
||||
"""
|
||||
input_cost_per_token_above_128k_tokens = model_info.get(
|
||||
"input_cost_per_token_above_128k_tokens"
|
||||
)
|
||||
if _is_above_128k(usage.prompt_tokens) and input_cost_per_token_above_128k_tokens:
|
||||
return input_cost_per_token_above_128k_tokens
|
||||
return model_info["input_cost_per_token"]
|
||||
prompt_base_cost = model_info["input_cost_per_token"]
|
||||
completion_base_cost = model_info["output_cost_per_token"]
|
||||
|
||||
## CHECK IF ABOVE THRESHOLD
|
||||
threshold: Optional[float] = None
|
||||
for key, value in sorted(model_info.items(), reverse=True):
|
||||
if key.startswith("input_cost_per_token_above_") and value is not None:
|
||||
try:
|
||||
# Handle both formats: _above_128k_tokens and _above_128_tokens
|
||||
threshold_str = key.split("_above_")[1].split("_tokens")[0]
|
||||
threshold = float(threshold_str.replace("k", "")) * (
|
||||
1000 if "k" in threshold_str else 1
|
||||
)
|
||||
if usage.prompt_tokens > threshold:
|
||||
prompt_base_cost = cast(
|
||||
float,
|
||||
model_info.get(key, prompt_base_cost),
|
||||
)
|
||||
completion_base_cost = cast(
|
||||
float,
|
||||
model_info.get(
|
||||
f"output_cost_per_token_above_{threshold_str}_tokens",
|
||||
completion_base_cost,
|
||||
),
|
||||
)
|
||||
break
|
||||
except (IndexError, ValueError):
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
|
||||
"""
|
||||
Return prompt cost for a given model and usage.
|
||||
|
||||
If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
|
||||
"""
|
||||
output_cost_per_token_above_128k_tokens = model_info.get(
|
||||
"output_cost_per_token_above_128k_tokens"
|
||||
)
|
||||
if (
|
||||
_is_above_128k(usage.completion_tokens)
|
||||
and output_cost_per_token_above_128k_tokens
|
||||
):
|
||||
return output_cost_per_token_above_128k_tokens
|
||||
return model_info["output_cost_per_token"]
|
||||
return prompt_base_cost, completion_base_cost
|
||||
|
||||
|
||||
def calculate_cost_component(
|
||||
|
@ -215,7 +225,9 @@ def generic_cost_per_token(
|
|||
if text_tokens == 0:
|
||||
text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
|
||||
|
||||
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
|
||||
prompt_base_cost, completion_base_cost = _get_token_base_cost(
|
||||
model_info=model_info, usage=usage
|
||||
)
|
||||
|
||||
prompt_cost = float(text_tokens) * prompt_base_cost
|
||||
|
||||
|
@ -253,9 +265,6 @@ def generic_cost_per_token(
|
|||
)
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
completion_base_cost = _get_completion_token_base_cost(
|
||||
model_info=model_info, usage=usage
|
||||
)
|
||||
text_tokens = usage.completion_tokens
|
||||
audio_tokens = 0
|
||||
if usage.completion_tokens_details is not None:
|
||||
|
|
|
@ -36,11 +36,16 @@ class ResponseMetadata:
|
|||
self, logging_obj: LiteLLMLoggingObject, model: Optional[str], kwargs: dict
|
||||
) -> None:
|
||||
"""Set hidden parameters on the response"""
|
||||
|
||||
## ADD OTHER HIDDEN PARAMS
|
||||
model_id = kwargs.get("model_info", {}).get("id", None)
|
||||
new_params = {
|
||||
"litellm_call_id": getattr(logging_obj, "litellm_call_id", None),
|
||||
"model_id": kwargs.get("model_info", {}).get("id", None),
|
||||
"api_base": get_api_base(model=model or "", optional_params=kwargs),
|
||||
"response_cost": logging_obj._response_cost_calculator(result=self.result),
|
||||
"model_id": model_id,
|
||||
"response_cost": logging_obj._response_cost_calculator(
|
||||
result=self.result, litellm_model_name=model, router_model_id=model_id
|
||||
),
|
||||
"additional_headers": process_response_headers(
|
||||
self._get_value_from_hidden_params("additional_headers") or {}
|
||||
),
|
||||
|
|
|
@ -2,7 +2,10 @@
|
|||
Common utility functions used for translating messages across providers
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Literal, Optional, Union, cast
|
||||
import io
|
||||
import mimetypes
|
||||
from os import PathLike
|
||||
from typing import Dict, List, Literal, Mapping, Optional, Union, cast
|
||||
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
|
@ -10,7 +13,13 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionFileObject,
|
||||
ChatCompletionUserMessage,
|
||||
)
|
||||
from litellm.types.utils import Choices, ModelResponse, StreamingChoices
|
||||
from litellm.types.utils import (
|
||||
Choices,
|
||||
ExtractedFileData,
|
||||
FileTypes,
|
||||
ModelResponse,
|
||||
StreamingChoices,
|
||||
)
|
||||
|
||||
DEFAULT_USER_CONTINUE_MESSAGE = ChatCompletionUserMessage(
|
||||
content="Please continue.", role="user"
|
||||
|
@ -350,6 +359,68 @@ def update_messages_with_model_file_ids(
|
|||
return messages
|
||||
|
||||
|
||||
def extract_file_data(file_data: FileTypes) -> ExtractedFileData:
|
||||
"""
|
||||
Extracts and processes file data from various input formats.
|
||||
|
||||
Args:
|
||||
file_data: Can be a tuple of (filename, content, [content_type], [headers]) or direct file content
|
||||
|
||||
Returns:
|
||||
ExtractedFileData containing:
|
||||
- filename: Name of the file if provided
|
||||
- content: The file content in bytes
|
||||
- content_type: MIME type of the file
|
||||
- headers: Any additional headers
|
||||
"""
|
||||
# Parse the file_data based on its type
|
||||
filename = None
|
||||
file_content = None
|
||||
content_type = None
|
||||
file_headers: Mapping[str, str] = {}
|
||||
|
||||
if isinstance(file_data, tuple):
|
||||
if len(file_data) == 2:
|
||||
filename, file_content = file_data
|
||||
elif len(file_data) == 3:
|
||||
filename, file_content, content_type = file_data
|
||||
elif len(file_data) == 4:
|
||||
filename, file_content, content_type, file_headers = file_data
|
||||
else:
|
||||
file_content = file_data
|
||||
# Convert content to bytes
|
||||
if isinstance(file_content, (str, PathLike)):
|
||||
# If it's a path, open and read the file
|
||||
with open(file_content, "rb") as f:
|
||||
content = f.read()
|
||||
elif isinstance(file_content, io.IOBase):
|
||||
# If it's a file-like object
|
||||
content = file_content.read()
|
||||
|
||||
if isinstance(content, str):
|
||||
content = content.encode("utf-8")
|
||||
# Reset file pointer to beginning
|
||||
file_content.seek(0)
|
||||
elif isinstance(file_content, bytes):
|
||||
content = file_content
|
||||
else:
|
||||
raise ValueError(f"Unsupported file content type: {type(file_content)}")
|
||||
|
||||
# Use provided content type or guess based on filename
|
||||
if not content_type:
|
||||
content_type = (
|
||||
mimetypes.guess_type(filename)[0]
|
||||
if filename
|
||||
else "application/octet-stream"
|
||||
)
|
||||
|
||||
return ExtractedFileData(
|
||||
filename=filename,
|
||||
content=content,
|
||||
content_type=content_type,
|
||||
headers=file_headers,
|
||||
)
|
||||
|
||||
def unpack_defs(schema, defs):
|
||||
properties = schema.get("properties", None)
|
||||
if properties is None:
|
||||
|
@ -381,3 +452,4 @@ def unpack_defs(schema, defs):
|
|||
unpack_defs(ref, defs)
|
||||
value["items"] = ref
|
||||
continue
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import copy
|
||||
import json
|
||||
import re
|
||||
import traceback
|
||||
import uuid
|
||||
import xml.etree.ElementTree as ET
|
||||
from enum import Enum
|
||||
|
@ -748,7 +747,6 @@ def convert_to_anthropic_image_obj(
|
|||
data=base64_data,
|
||||
)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
if "Error: Unable to fetch image from URL" in str(e):
|
||||
raise e
|
||||
raise Exception(
|
||||
|
|
|
@ -50,6 +50,7 @@ class AiohttpOpenAIChatConfig(OpenAILikeChatConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -4,7 +4,7 @@ Calling + translation logic for anthropic's `/v1/messages` endpoint
|
|||
|
||||
import copy
|
||||
import json
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
import httpx # type: ignore
|
||||
|
||||
|
@ -301,6 +301,7 @@ class AnthropicChatCompletion(BaseLLM):
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params={**optional_params, "is_vertex_request": is_vertex_request},
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
config = ProviderConfigManager.get_provider_chat_config(
|
||||
|
@ -491,29 +492,10 @@ class ModelResponseIterator:
|
|||
return False
|
||||
|
||||
def _handle_usage(self, anthropic_usage_chunk: Union[dict, UsageDelta]) -> Usage:
|
||||
usage_block = Usage(
|
||||
prompt_tokens=anthropic_usage_chunk.get("input_tokens", 0),
|
||||
completion_tokens=anthropic_usage_chunk.get("output_tokens", 0),
|
||||
total_tokens=anthropic_usage_chunk.get("input_tokens", 0)
|
||||
+ anthropic_usage_chunk.get("output_tokens", 0),
|
||||
return AnthropicConfig().calculate_usage(
|
||||
usage_object=cast(dict, anthropic_usage_chunk), reasoning_content=None
|
||||
)
|
||||
|
||||
cache_creation_input_tokens = anthropic_usage_chunk.get(
|
||||
"cache_creation_input_tokens"
|
||||
)
|
||||
if cache_creation_input_tokens is not None and isinstance(
|
||||
cache_creation_input_tokens, int
|
||||
):
|
||||
usage_block["cache_creation_input_tokens"] = cache_creation_input_tokens
|
||||
|
||||
cache_read_input_tokens = anthropic_usage_chunk.get("cache_read_input_tokens")
|
||||
if cache_read_input_tokens is not None and isinstance(
|
||||
cache_read_input_tokens, int
|
||||
):
|
||||
usage_block["cache_read_input_tokens"] = cache_read_input_tokens
|
||||
|
||||
return usage_block
|
||||
|
||||
def _content_block_delta_helper(
|
||||
self, chunk: dict
|
||||
) -> Tuple[
|
||||
|
|
|
@ -682,6 +682,45 @@ class AnthropicConfig(BaseConfig):
|
|||
reasoning_content += block["thinking"]
|
||||
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
|
||||
|
||||
def calculate_usage(
|
||||
self, usage_object: dict, reasoning_content: Optional[str]
|
||||
) -> Usage:
|
||||
prompt_tokens = usage_object.get("input_tokens", 0)
|
||||
completion_tokens = usage_object.get("output_tokens", 0)
|
||||
_usage = usage_object
|
||||
cache_creation_input_tokens: int = 0
|
||||
cache_read_input_tokens: int = 0
|
||||
|
||||
if "cache_creation_input_tokens" in _usage:
|
||||
cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
|
||||
if "cache_read_input_tokens" in _usage:
|
||||
cache_read_input_tokens = _usage["cache_read_input_tokens"]
|
||||
prompt_tokens += cache_read_input_tokens
|
||||
|
||||
prompt_tokens_details = PromptTokensDetailsWrapper(
|
||||
cached_tokens=cache_read_input_tokens
|
||||
)
|
||||
completion_token_details = (
|
||||
CompletionTokensDetailsWrapper(
|
||||
reasoning_tokens=token_counter(
|
||||
text=reasoning_content, count_response_tokens=True
|
||||
)
|
||||
)
|
||||
if reasoning_content
|
||||
else None
|
||||
)
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
prompt_tokens_details=prompt_tokens_details,
|
||||
cache_creation_input_tokens=cache_creation_input_tokens,
|
||||
cache_read_input_tokens=cache_read_input_tokens,
|
||||
completion_tokens_details=completion_token_details,
|
||||
)
|
||||
return usage
|
||||
|
||||
def transform_response(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -772,45 +811,14 @@ class AnthropicConfig(BaseConfig):
|
|||
)
|
||||
|
||||
## CALCULATING USAGE
|
||||
prompt_tokens = completion_response["usage"]["input_tokens"]
|
||||
completion_tokens = completion_response["usage"]["output_tokens"]
|
||||
_usage = completion_response["usage"]
|
||||
cache_creation_input_tokens: int = 0
|
||||
cache_read_input_tokens: int = 0
|
||||
usage = self.calculate_usage(
|
||||
usage_object=completion_response["usage"],
|
||||
reasoning_content=reasoning_content,
|
||||
)
|
||||
setattr(model_response, "usage", usage) # type: ignore
|
||||
|
||||
model_response.created = int(time.time())
|
||||
model_response.model = completion_response["model"]
|
||||
if "cache_creation_input_tokens" in _usage:
|
||||
cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
|
||||
prompt_tokens += cache_creation_input_tokens
|
||||
if "cache_read_input_tokens" in _usage:
|
||||
cache_read_input_tokens = _usage["cache_read_input_tokens"]
|
||||
prompt_tokens += cache_read_input_tokens
|
||||
|
||||
prompt_tokens_details = PromptTokensDetailsWrapper(
|
||||
cached_tokens=cache_read_input_tokens
|
||||
)
|
||||
completion_token_details = (
|
||||
CompletionTokensDetailsWrapper(
|
||||
reasoning_tokens=token_counter(
|
||||
text=reasoning_content, count_response_tokens=True
|
||||
)
|
||||
)
|
||||
if reasoning_content
|
||||
else None
|
||||
)
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
prompt_tokens_details=prompt_tokens_details,
|
||||
cache_creation_input_tokens=cache_creation_input_tokens,
|
||||
cache_read_input_tokens=cache_read_input_tokens,
|
||||
completion_tokens_details=completion_token_details,
|
||||
)
|
||||
|
||||
setattr(model_response, "usage", usage) # type: ignore
|
||||
|
||||
model_response._hidden_params = _hidden_params
|
||||
return model_response
|
||||
|
@ -868,6 +876,7 @@ class AnthropicConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Dict:
|
||||
|
|
|
@ -87,6 +87,7 @@ class AnthropicTextConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -293,6 +293,7 @@ class AzureOpenAIConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -39,6 +39,7 @@ class AzureAIStudioConfig(OpenAIConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -262,6 +262,7 @@ class BaseConfig(ABC):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from abc import abstractmethod
|
||||
from typing import TYPE_CHECKING, Any, List, Optional
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -33,23 +33,22 @@ class BaseFilesConfig(BaseConfig):
|
|||
) -> List[OpenAICreateFileRequestOptionalParams]:
|
||||
pass
|
||||
|
||||
def get_complete_url(
|
||||
def get_complete_file_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
stream: Optional[bool] = None,
|
||||
) -> str:
|
||||
"""
|
||||
OPTIONAL
|
||||
|
||||
Get the complete url for the request
|
||||
|
||||
Some providers need `model` in `api_base`
|
||||
"""
|
||||
return api_base or ""
|
||||
data: CreateFileRequest,
|
||||
):
|
||||
return self.get_complete_url(
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def transform_create_file_request(
|
||||
|
@ -58,7 +57,7 @@ class BaseFilesConfig(BaseConfig):
|
|||
create_file_data: CreateFileRequest,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
) -> dict:
|
||||
) -> Union[dict, str, bytes]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
|
|
|
@ -65,6 +65,7 @@ class BaseImageVariationConfig(BaseConfig, ABC):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -653,8 +653,10 @@ class AmazonConverseConfig(BaseConfig):
|
|||
cache_read_input_tokens = usage["cacheReadInputTokens"]
|
||||
input_tokens += cache_read_input_tokens
|
||||
if "cacheWriteInputTokens" in usage:
|
||||
"""
|
||||
Do not increment prompt_tokens with cacheWriteInputTokens
|
||||
"""
|
||||
cache_creation_input_tokens = usage["cacheWriteInputTokens"]
|
||||
input_tokens += cache_creation_input_tokens
|
||||
|
||||
prompt_tokens_details = PromptTokensDetailsWrapper(
|
||||
cached_tokens=cache_read_input_tokens
|
||||
|
@ -831,6 +833,7 @@ class AmazonConverseConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -442,6 +442,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -118,6 +118,7 @@ class ClarifaiConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -60,6 +60,7 @@ class CloudflareChatConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -118,6 +118,7 @@ class CohereChatConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -101,6 +101,7 @@ class CohereTextConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -229,6 +229,7 @@ class BaseLLMAIOHTTPHandler:
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
|
@ -498,6 +499,7 @@ class BaseLLMAIOHTTPHandler:
|
|||
model=model,
|
||||
messages=[{"role": "user", "content": "test"}],
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
api_base=api_base,
|
||||
)
|
||||
|
||||
|
|
|
@ -192,7 +192,7 @@ class AsyncHTTPHandler:
|
|||
async def post(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[Union[dict, str]] = None, # type: ignore
|
||||
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
||||
json: Optional[dict] = None,
|
||||
params: Optional[dict] = None,
|
||||
headers: Optional[dict] = None,
|
||||
|
@ -427,7 +427,7 @@ class AsyncHTTPHandler:
|
|||
self,
|
||||
url: str,
|
||||
client: httpx.AsyncClient,
|
||||
data: Optional[Union[dict, str]] = None, # type: ignore
|
||||
data: Optional[Union[dict, str, bytes]] = None, # type: ignore
|
||||
json: Optional[dict] = None,
|
||||
params: Optional[dict] = None,
|
||||
headers: Optional[dict] = None,
|
||||
|
@ -527,7 +527,7 @@ class HTTPHandler:
|
|||
def post(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[Union[dict, str]] = None,
|
||||
data: Optional[Union[dict, str, bytes]] = None,
|
||||
json: Optional[Union[dict, str, List]] = None,
|
||||
params: Optional[dict] = None,
|
||||
headers: Optional[dict] = None,
|
||||
|
@ -573,7 +573,6 @@ class HTTPHandler:
|
|||
setattr(e, "text", error_text)
|
||||
|
||||
setattr(e, "status_code", e.response.status_code)
|
||||
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
|
|
@ -247,6 +247,7 @@ class BaseLLMHTTPHandler:
|
|||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
api_base=api_base,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
api_base = provider_config.get_complete_url(
|
||||
|
@ -625,6 +626,7 @@ class BaseLLMHTTPHandler:
|
|||
model=model,
|
||||
messages=[],
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
api_base = provider_config.get_complete_url(
|
||||
|
@ -896,6 +898,7 @@ class BaseLLMHTTPHandler:
|
|||
model=model,
|
||||
messages=[],
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
|
@ -1228,15 +1231,19 @@ class BaseLLMHTTPHandler:
|
|||
model="",
|
||||
messages=[],
|
||||
optional_params={},
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
api_base = provider_config.get_complete_url(
|
||||
api_base = provider_config.get_complete_file_url(
|
||||
api_base=api_base,
|
||||
api_key=api_key,
|
||||
model="",
|
||||
optional_params={},
|
||||
litellm_params=litellm_params,
|
||||
data=create_file_data,
|
||||
)
|
||||
if api_base is None:
|
||||
raise ValueError("api_base is required for create_file")
|
||||
|
||||
# Get the transformed request data for both steps
|
||||
transformed_request = provider_config.transform_create_file_request(
|
||||
|
@ -1263,48 +1270,57 @@ class BaseLLMHTTPHandler:
|
|||
else:
|
||||
sync_httpx_client = client
|
||||
|
||||
try:
|
||||
# Step 1: Initial request to get upload URL
|
||||
initial_response = sync_httpx_client.post(
|
||||
url=api_base,
|
||||
headers={
|
||||
**headers,
|
||||
**transformed_request["initial_request"]["headers"],
|
||||
},
|
||||
data=json.dumps(transformed_request["initial_request"]["data"]),
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
# Extract upload URL from response headers
|
||||
upload_url = initial_response.headers.get("X-Goog-Upload-URL")
|
||||
|
||||
if not upload_url:
|
||||
raise ValueError("Failed to get upload URL from initial request")
|
||||
|
||||
# Step 2: Upload the actual file
|
||||
if isinstance(transformed_request, str) or isinstance(
|
||||
transformed_request, bytes
|
||||
):
|
||||
upload_response = sync_httpx_client.post(
|
||||
url=upload_url,
|
||||
headers=transformed_request["upload_request"]["headers"],
|
||||
data=transformed_request["upload_request"]["data"],
|
||||
url=api_base,
|
||||
headers=headers,
|
||||
data=transformed_request,
|
||||
timeout=timeout,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
# Step 1: Initial request to get upload URL
|
||||
initial_response = sync_httpx_client.post(
|
||||
url=api_base,
|
||||
headers={
|
||||
**headers,
|
||||
**transformed_request["initial_request"]["headers"],
|
||||
},
|
||||
data=json.dumps(transformed_request["initial_request"]["data"]),
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
return provider_config.transform_create_file_response(
|
||||
model=None,
|
||||
raw_response=upload_response,
|
||||
logging_obj=logging_obj,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
# Extract upload URL from response headers
|
||||
upload_url = initial_response.headers.get("X-Goog-Upload-URL")
|
||||
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e,
|
||||
provider_config=provider_config,
|
||||
)
|
||||
if not upload_url:
|
||||
raise ValueError("Failed to get upload URL from initial request")
|
||||
|
||||
# Step 2: Upload the actual file
|
||||
upload_response = sync_httpx_client.post(
|
||||
url=upload_url,
|
||||
headers=transformed_request["upload_request"]["headers"],
|
||||
data=transformed_request["upload_request"]["data"],
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
raise self._handle_error(
|
||||
e=e,
|
||||
provider_config=provider_config,
|
||||
)
|
||||
|
||||
return provider_config.transform_create_file_response(
|
||||
model=None,
|
||||
raw_response=upload_response,
|
||||
logging_obj=logging_obj,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
async def async_create_file(
|
||||
self,
|
||||
transformed_request: dict,
|
||||
transformed_request: Union[bytes, str, dict],
|
||||
litellm_params: dict,
|
||||
provider_config: BaseFilesConfig,
|
||||
headers: dict,
|
||||
|
@ -1323,45 +1339,54 @@ class BaseLLMHTTPHandler:
|
|||
else:
|
||||
async_httpx_client = client
|
||||
|
||||
try:
|
||||
# Step 1: Initial request to get upload URL
|
||||
initial_response = await async_httpx_client.post(
|
||||
url=api_base,
|
||||
headers={
|
||||
**headers,
|
||||
**transformed_request["initial_request"]["headers"],
|
||||
},
|
||||
data=json.dumps(transformed_request["initial_request"]["data"]),
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
# Extract upload URL from response headers
|
||||
upload_url = initial_response.headers.get("X-Goog-Upload-URL")
|
||||
|
||||
if not upload_url:
|
||||
raise ValueError("Failed to get upload URL from initial request")
|
||||
|
||||
# Step 2: Upload the actual file
|
||||
if isinstance(transformed_request, str) or isinstance(
|
||||
transformed_request, bytes
|
||||
):
|
||||
upload_response = await async_httpx_client.post(
|
||||
url=upload_url,
|
||||
headers=transformed_request["upload_request"]["headers"],
|
||||
data=transformed_request["upload_request"]["data"],
|
||||
url=api_base,
|
||||
headers=headers,
|
||||
data=transformed_request,
|
||||
timeout=timeout,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
# Step 1: Initial request to get upload URL
|
||||
initial_response = await async_httpx_client.post(
|
||||
url=api_base,
|
||||
headers={
|
||||
**headers,
|
||||
**transformed_request["initial_request"]["headers"],
|
||||
},
|
||||
data=json.dumps(transformed_request["initial_request"]["data"]),
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
return provider_config.transform_create_file_response(
|
||||
model=None,
|
||||
raw_response=upload_response,
|
||||
logging_obj=logging_obj,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
# Extract upload URL from response headers
|
||||
upload_url = initial_response.headers.get("X-Goog-Upload-URL")
|
||||
|
||||
except Exception as e:
|
||||
verbose_logger.exception(f"Error creating file: {e}")
|
||||
raise self._handle_error(
|
||||
e=e,
|
||||
provider_config=provider_config,
|
||||
)
|
||||
if not upload_url:
|
||||
raise ValueError("Failed to get upload URL from initial request")
|
||||
|
||||
# Step 2: Upload the actual file
|
||||
upload_response = await async_httpx_client.post(
|
||||
url=upload_url,
|
||||
headers=transformed_request["upload_request"]["headers"],
|
||||
data=transformed_request["upload_request"]["data"],
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(f"Error creating file: {e}")
|
||||
raise self._handle_error(
|
||||
e=e,
|
||||
provider_config=provider_config,
|
||||
)
|
||||
|
||||
return provider_config.transform_create_file_response(
|
||||
model=None,
|
||||
raw_response=upload_response,
|
||||
logging_obj=logging_obj,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
def list_files(self):
|
||||
"""
|
||||
|
|
|
@ -116,6 +116,7 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -171,6 +171,7 @@ class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -41,6 +41,7 @@ class FireworksAIMixin:
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -20,6 +20,7 @@ class GeminiModelInfo(BaseLLMModelInfo):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -4,11 +4,12 @@ Supports writing files to Google AI Studio Files API.
|
|||
For vertex ai, check out the vertex_ai/files/handler.py file.
|
||||
"""
|
||||
import time
|
||||
from typing import List, Mapping, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
|
||||
from litellm.llms.base_llm.files.transformation import (
|
||||
BaseFilesConfig,
|
||||
LiteLLMLoggingObj,
|
||||
|
@ -91,66 +92,28 @@ class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
|
|||
if file_data is None:
|
||||
raise ValueError("File data is required")
|
||||
|
||||
# Parse the file_data based on its type
|
||||
filename = None
|
||||
file_content = None
|
||||
content_type = None
|
||||
file_headers: Mapping[str, str] = {}
|
||||
|
||||
if isinstance(file_data, tuple):
|
||||
if len(file_data) == 2:
|
||||
filename, file_content = file_data
|
||||
elif len(file_data) == 3:
|
||||
filename, file_content, content_type = file_data
|
||||
elif len(file_data) == 4:
|
||||
filename, file_content, content_type, file_headers = file_data
|
||||
else:
|
||||
file_content = file_data
|
||||
|
||||
# Handle the file content based on its type
|
||||
import io
|
||||
from os import PathLike
|
||||
|
||||
# Convert content to bytes
|
||||
if isinstance(file_content, (str, PathLike)):
|
||||
# If it's a path, open and read the file
|
||||
with open(file_content, "rb") as f:
|
||||
content = f.read()
|
||||
elif isinstance(file_content, io.IOBase):
|
||||
# If it's a file-like object
|
||||
content = file_content.read()
|
||||
if isinstance(content, str):
|
||||
content = content.encode("utf-8")
|
||||
elif isinstance(file_content, bytes):
|
||||
content = file_content
|
||||
else:
|
||||
raise ValueError(f"Unsupported file content type: {type(file_content)}")
|
||||
# Use the common utility function to extract file data
|
||||
extracted_data = extract_file_data(file_data)
|
||||
|
||||
# Get file size
|
||||
file_size = len(content)
|
||||
|
||||
# Use provided content type or guess based on filename
|
||||
if not content_type:
|
||||
import mimetypes
|
||||
|
||||
content_type = (
|
||||
mimetypes.guess_type(filename)[0]
|
||||
if filename
|
||||
else "application/octet-stream"
|
||||
)
|
||||
file_size = len(extracted_data["content"])
|
||||
|
||||
# Step 1: Initial resumable upload request
|
||||
headers = {
|
||||
"X-Goog-Upload-Protocol": "resumable",
|
||||
"X-Goog-Upload-Command": "start",
|
||||
"X-Goog-Upload-Header-Content-Length": str(file_size),
|
||||
"X-Goog-Upload-Header-Content-Type": content_type,
|
||||
"X-Goog-Upload-Header-Content-Type": extracted_data["content_type"],
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
headers.update(file_headers) # Add any custom headers
|
||||
headers.update(extracted_data["headers"]) # Add any custom headers
|
||||
|
||||
# Initial metadata request body
|
||||
initial_data = {"file": {"display_name": filename or str(int(time.time()))}}
|
||||
initial_data = {
|
||||
"file": {
|
||||
"display_name": extracted_data["filename"] or str(int(time.time()))
|
||||
}
|
||||
}
|
||||
|
||||
# Step 2: Actual file upload data
|
||||
upload_headers = {
|
||||
|
@ -161,7 +124,10 @@ class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
|
|||
|
||||
return {
|
||||
"initial_request": {"headers": headers, "data": initial_data},
|
||||
"upload_request": {"headers": upload_headers, "data": content},
|
||||
"upload_request": {
|
||||
"headers": upload_headers,
|
||||
"data": extracted_data["content"],
|
||||
},
|
||||
}
|
||||
|
||||
def transform_create_file_response(
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import logging
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -18,7 +18,6 @@ from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
|||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
from ..common_utils import HuggingFaceError, _fetch_inference_provider_mapping
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
BASE_URL = "https://router.huggingface.co"
|
||||
|
@ -34,7 +33,8 @@ class HuggingFaceChatConfig(OpenAIGPTConfig):
|
|||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
optional_params: Dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
@ -51,7 +51,9 @@ class HuggingFaceChatConfig(OpenAIGPTConfig):
|
|||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
return HuggingFaceError(status_code=status_code, message=error_message, headers=headers)
|
||||
return HuggingFaceError(
|
||||
status_code=status_code, message=error_message, headers=headers
|
||||
)
|
||||
|
||||
def get_base_url(self, model: str, base_url: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
|
@ -82,7 +84,9 @@ class HuggingFaceChatConfig(OpenAIGPTConfig):
|
|||
if api_base is not None:
|
||||
complete_url = api_base
|
||||
elif os.getenv("HF_API_BASE") or os.getenv("HUGGINGFACE_API_BASE"):
|
||||
complete_url = str(os.getenv("HF_API_BASE")) or str(os.getenv("HUGGINGFACE_API_BASE"))
|
||||
complete_url = str(os.getenv("HF_API_BASE")) or str(
|
||||
os.getenv("HUGGINGFACE_API_BASE")
|
||||
)
|
||||
elif model.startswith(("http://", "https://")):
|
||||
complete_url = model
|
||||
# 4. Default construction with provider
|
||||
|
@ -138,4 +142,8 @@ class HuggingFaceChatConfig(OpenAIGPTConfig):
|
|||
)
|
||||
mapped_model = provider_mapping["providerId"]
|
||||
messages = self._transform_messages(messages=messages, model=mapped_model)
|
||||
return dict(ChatCompletionRequest(model=mapped_model, messages=messages, **optional_params))
|
||||
return dict(
|
||||
ChatCompletionRequest(
|
||||
model=mapped_model, messages=messages, **optional_params
|
||||
)
|
||||
)
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
import json
|
||||
import os
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Union,
|
||||
get_args,
|
||||
)
|
||||
from typing import Any, Callable, Dict, List, Literal, Optional, Union, get_args
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -35,8 +26,9 @@ hf_tasks_embeddings = Literal[ # pipeline tags + hf tei endpoints - https://hug
|
|||
]
|
||||
|
||||
|
||||
|
||||
def get_hf_task_embedding_for_model(model: str, task_type: Optional[str], api_base: str) -> Optional[str]:
|
||||
def get_hf_task_embedding_for_model(
|
||||
model: str, task_type: Optional[str], api_base: str
|
||||
) -> Optional[str]:
|
||||
if task_type is not None:
|
||||
if task_type in get_args(hf_tasks_embeddings):
|
||||
return task_type
|
||||
|
@ -57,7 +49,9 @@ def get_hf_task_embedding_for_model(model: str, task_type: Optional[str], api_ba
|
|||
return pipeline_tag
|
||||
|
||||
|
||||
async def async_get_hf_task_embedding_for_model(model: str, task_type: Optional[str], api_base: str) -> Optional[str]:
|
||||
async def async_get_hf_task_embedding_for_model(
|
||||
model: str, task_type: Optional[str], api_base: str
|
||||
) -> Optional[str]:
|
||||
if task_type is not None:
|
||||
if task_type in get_args(hf_tasks_embeddings):
|
||||
return task_type
|
||||
|
@ -116,7 +110,9 @@ class HuggingFaceEmbedding(BaseLLM):
|
|||
input: List,
|
||||
optional_params: dict,
|
||||
) -> dict:
|
||||
hf_task = await async_get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL)
|
||||
hf_task = await async_get_hf_task_embedding_for_model(
|
||||
model=model, task_type=task_type, api_base=HF_HUB_URL
|
||||
)
|
||||
|
||||
data = self._transform_input_on_pipeline_tag(input=input, pipeline_tag=hf_task)
|
||||
|
||||
|
@ -173,7 +169,9 @@ class HuggingFaceEmbedding(BaseLLM):
|
|||
task_type = optional_params.pop("input_type", None)
|
||||
|
||||
if call_type == "sync":
|
||||
hf_task = get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL)
|
||||
hf_task = get_hf_task_embedding_for_model(
|
||||
model=model, task_type=task_type, api_base=HF_HUB_URL
|
||||
)
|
||||
elif call_type == "async":
|
||||
return self._async_transform_input(
|
||||
model=model, task_type=task_type, embed_url=embed_url, input=input
|
||||
|
@ -325,6 +323,7 @@ class HuggingFaceEmbedding(BaseLLM):
|
|||
input: list,
|
||||
model_response: EmbeddingResponse,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
encoding: Callable,
|
||||
api_key: Optional[str] = None,
|
||||
|
@ -341,9 +340,12 @@ class HuggingFaceEmbedding(BaseLLM):
|
|||
model=model,
|
||||
optional_params=optional_params,
|
||||
messages=[],
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
task_type = optional_params.pop("input_type", None)
|
||||
task = get_hf_task_embedding_for_model(model=model, task_type=task_type, api_base=HF_HUB_URL)
|
||||
task = get_hf_task_embedding_for_model(
|
||||
model=model, task_type=task_type, api_base=HF_HUB_URL
|
||||
)
|
||||
# print_verbose(f"{model}, {task}")
|
||||
embed_url = ""
|
||||
if "https" in model:
|
||||
|
@ -355,7 +357,9 @@ class HuggingFaceEmbedding(BaseLLM):
|
|||
elif "HUGGINGFACE_API_BASE" in os.environ:
|
||||
embed_url = os.getenv("HUGGINGFACE_API_BASE", "")
|
||||
else:
|
||||
embed_url = f"https://router.huggingface.co/hf-inference/pipeline/{task}/{model}"
|
||||
embed_url = (
|
||||
f"https://router.huggingface.co/hf-inference/pipeline/{task}/{model}"
|
||||
)
|
||||
|
||||
## ROUTING ##
|
||||
if aembedding is True:
|
||||
|
|
|
@ -355,6 +355,7 @@ class HuggingFaceEmbeddingConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: Dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Dict:
|
||||
|
|
|
@ -36,6 +36,7 @@ def completion(
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
## Load Config
|
||||
|
|
|
@ -93,6 +93,7 @@ class NLPCloudConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -353,6 +353,7 @@ class OllamaConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -32,6 +32,7 @@ def completion(
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
if "https" in model:
|
||||
completion_url = model
|
||||
|
@ -123,6 +124,7 @@ def embedding(
|
|||
model=model,
|
||||
messages=[],
|
||||
optional_params=optional_params,
|
||||
litellm_params={},
|
||||
)
|
||||
response = litellm.module_level_client.post(
|
||||
embeddings_url, headers=headers, json=data
|
||||
|
|
|
@ -88,6 +88,7 @@ class OobaboogaConfig(OpenAIGPTConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -321,6 +321,7 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -286,6 +286,7 @@ class OpenAIConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -53,6 +53,7 @@ class OpenAIWhisperAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -131,6 +131,7 @@ class PetalsConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -228,10 +228,10 @@ class PredibaseChatCompletion:
|
|||
api_key: str,
|
||||
logging_obj,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
tenant_id: str,
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
acompletion=None,
|
||||
litellm_params=None,
|
||||
logger_fn=None,
|
||||
headers: dict = {},
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
|
@ -241,6 +241,7 @@ class PredibaseChatCompletion:
|
|||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
model=model,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
completion_url = ""
|
||||
input_text = ""
|
||||
|
|
|
@ -164,6 +164,7 @@ class PredibaseConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -141,6 +141,7 @@ def completion(
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
# Start a prediction and get the prediction URL
|
||||
version_id = replicate_config.model_to_version_id(model)
|
||||
|
|
|
@ -312,6 +312,7 @@ class ReplicateConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -96,6 +96,7 @@ class SagemakerLLM(BaseAWSLLM):
|
|||
model: str,
|
||||
data: dict,
|
||||
messages: List[AllMessageValues],
|
||||
litellm_params: dict,
|
||||
optional_params: dict,
|
||||
aws_region_name: str,
|
||||
extra_headers: Optional[dict] = None,
|
||||
|
@ -122,6 +123,7 @@ class SagemakerLLM(BaseAWSLLM):
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
request = AWSRequest(
|
||||
method="POST", url=api_base, data=encoded_data, headers=headers
|
||||
|
@ -198,6 +200,7 @@ class SagemakerLLM(BaseAWSLLM):
|
|||
data=data,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
credentials=credentials,
|
||||
aws_region_name=aws_region_name,
|
||||
)
|
||||
|
@ -274,6 +277,7 @@ class SagemakerLLM(BaseAWSLLM):
|
|||
"model": model,
|
||||
"data": _data,
|
||||
"optional_params": optional_params,
|
||||
"litellm_params": litellm_params,
|
||||
"credentials": credentials,
|
||||
"aws_region_name": aws_region_name,
|
||||
"messages": messages,
|
||||
|
@ -426,6 +430,7 @@ class SagemakerLLM(BaseAWSLLM):
|
|||
"model": model,
|
||||
"data": data,
|
||||
"optional_params": optional_params,
|
||||
"litellm_params": litellm_params,
|
||||
"credentials": credentials,
|
||||
"aws_region_name": aws_region_name,
|
||||
"messages": messages,
|
||||
|
@ -496,6 +501,7 @@ class SagemakerLLM(BaseAWSLLM):
|
|||
"model": model,
|
||||
"data": data,
|
||||
"optional_params": optional_params,
|
||||
"litellm_params": litellm_params,
|
||||
"credentials": credentials,
|
||||
"aws_region_name": aws_region_name,
|
||||
"messages": messages,
|
||||
|
|
|
@ -263,6 +263,7 @@ class SagemakerConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -92,6 +92,7 @@ class SnowflakeConfig(OpenAIGPTConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -37,6 +37,7 @@ class TopazImageVariationConfig(BaseImageVariationConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -48,6 +48,7 @@ class TritonConfig(BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: Dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Dict:
|
||||
|
|
|
@ -42,6 +42,7 @@ class TritonEmbeddingConfig(BaseEmbeddingConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import asyncio
|
||||
from typing import Any, Coroutine, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
@ -11,9 +12,9 @@ from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
|||
from litellm.types.llms.openai import CreateFileRequest, OpenAIFileObject
|
||||
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
|
||||
|
||||
from .transformation import VertexAIFilesTransformation
|
||||
from .transformation import VertexAIJsonlFilesTransformation
|
||||
|
||||
vertex_ai_files_transformation = VertexAIFilesTransformation()
|
||||
vertex_ai_files_transformation = VertexAIJsonlFilesTransformation()
|
||||
|
||||
|
||||
class VertexAIFilesHandler(GCSBucketBase):
|
||||
|
@ -92,5 +93,15 @@ class VertexAIFilesHandler(GCSBucketBase):
|
|||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
)
|
||||
|
||||
return None # type: ignore
|
||||
else:
|
||||
return asyncio.run(
|
||||
self.async_create_file(
|
||||
create_file_data=create_file_data,
|
||||
api_base=api_base,
|
||||
vertex_credentials=vertex_credentials,
|
||||
vertex_project=vertex_project,
|
||||
vertex_location=vertex_location,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
)
|
||||
)
|
||||
|
|
|
@ -1,7 +1,17 @@
|
|||
import json
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from httpx import Headers, Response
|
||||
|
||||
from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.llms.base_llm.files.transformation import (
|
||||
BaseFilesConfig,
|
||||
LiteLLMLoggingObj,
|
||||
)
|
||||
from litellm.llms.vertex_ai.common_utils import (
|
||||
_convert_vertex_datetime_to_openai_datetime,
|
||||
)
|
||||
|
@ -10,14 +20,317 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
|||
VertexGeminiConfig,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
CreateFileRequest,
|
||||
FileTypes,
|
||||
OpenAICreateFileRequestOptionalParams,
|
||||
OpenAIFileObject,
|
||||
PathLike,
|
||||
)
|
||||
from litellm.types.llms.vertex_ai import GcsBucketResponse
|
||||
from litellm.types.utils import ExtractedFileData, LlmProviders
|
||||
|
||||
from ..common_utils import VertexAIError
|
||||
from ..vertex_llm_base import VertexBase
|
||||
|
||||
|
||||
class VertexAIFilesTransformation(VertexGeminiConfig):
|
||||
class VertexAIFilesConfig(VertexBase, BaseFilesConfig):
|
||||
"""
|
||||
Config for VertexAI Files
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.jsonl_transformation = VertexAIJsonlFilesTransformation()
|
||||
super().__init__()
|
||||
|
||||
@property
|
||||
def custom_llm_provider(self) -> LlmProviders:
|
||||
return LlmProviders.VERTEX_AI
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
if not api_key:
|
||||
api_key, _ = self.get_access_token(
|
||||
credentials=litellm_params.get("vertex_credentials"),
|
||||
project_id=litellm_params.get("vertex_project"),
|
||||
)
|
||||
if not api_key:
|
||||
raise ValueError("api_key is required")
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
return headers
|
||||
|
||||
def _get_content_from_openai_file(self, openai_file_content: FileTypes) -> str:
|
||||
"""
|
||||
Helper to extract content from various OpenAI file types and return as string.
|
||||
|
||||
Handles:
|
||||
- Direct content (str, bytes, IO[bytes])
|
||||
- Tuple formats: (filename, content, [content_type], [headers])
|
||||
- PathLike objects
|
||||
"""
|
||||
content: Union[str, bytes] = b""
|
||||
# Extract file content from tuple if necessary
|
||||
if isinstance(openai_file_content, tuple):
|
||||
# Take the second element which is always the file content
|
||||
file_content = openai_file_content[1]
|
||||
else:
|
||||
file_content = openai_file_content
|
||||
|
||||
# Handle different file content types
|
||||
if isinstance(file_content, str):
|
||||
# String content can be used directly
|
||||
content = file_content
|
||||
elif isinstance(file_content, bytes):
|
||||
# Bytes content can be decoded
|
||||
content = file_content
|
||||
elif isinstance(file_content, PathLike): # PathLike
|
||||
with open(str(file_content), "rb") as f:
|
||||
content = f.read()
|
||||
elif hasattr(file_content, "read"): # IO[bytes]
|
||||
# File-like objects need to be read
|
||||
content = file_content.read()
|
||||
|
||||
# Ensure content is string
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode("utf-8")
|
||||
|
||||
return content
|
||||
|
||||
def _get_gcs_object_name_from_batch_jsonl(
|
||||
self,
|
||||
openai_jsonl_content: List[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""
|
||||
Gets a unique GCS object name for the VertexAI batch prediction job
|
||||
|
||||
named as: litellm-vertex-{model}-{uuid}
|
||||
"""
|
||||
_model = openai_jsonl_content[0].get("body", {}).get("model", "")
|
||||
if "publishers/google/models" not in _model:
|
||||
_model = f"publishers/google/models/{_model}"
|
||||
object_name = f"litellm-vertex-files/{_model}/{uuid.uuid4()}"
|
||||
return object_name
|
||||
|
||||
def get_object_name(
|
||||
self, extracted_file_data: ExtractedFileData, purpose: str
|
||||
) -> str:
|
||||
"""
|
||||
Get the object name for the request
|
||||
"""
|
||||
extracted_file_data_content = extracted_file_data.get("content")
|
||||
|
||||
if extracted_file_data_content is None:
|
||||
raise ValueError("file content is required")
|
||||
|
||||
if purpose == "batch":
|
||||
## 1. If jsonl, check if there's a model name
|
||||
file_content = self._get_content_from_openai_file(
|
||||
extracted_file_data_content
|
||||
)
|
||||
|
||||
# Split into lines and parse each line as JSON
|
||||
openai_jsonl_content = [
|
||||
json.loads(line) for line in file_content.splitlines() if line.strip()
|
||||
]
|
||||
if len(openai_jsonl_content) > 0:
|
||||
return self._get_gcs_object_name_from_batch_jsonl(openai_jsonl_content)
|
||||
|
||||
## 2. If not jsonl, return the filename
|
||||
filename = extracted_file_data.get("filename")
|
||||
if filename:
|
||||
return filename
|
||||
## 3. If no file name, return timestamp
|
||||
return str(int(time.time()))
|
||||
|
||||
def get_complete_file_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: Dict,
|
||||
litellm_params: Dict,
|
||||
data: CreateFileRequest,
|
||||
) -> str:
|
||||
"""
|
||||
Get the complete url for the request
|
||||
"""
|
||||
bucket_name = litellm_params.get("bucket_name") or os.getenv("GCS_BUCKET_NAME")
|
||||
if not bucket_name:
|
||||
raise ValueError("GCS bucket_name is required")
|
||||
file_data = data.get("file")
|
||||
purpose = data.get("purpose")
|
||||
if file_data is None:
|
||||
raise ValueError("file is required")
|
||||
if purpose is None:
|
||||
raise ValueError("purpose is required")
|
||||
extracted_file_data = extract_file_data(file_data)
|
||||
object_name = self.get_object_name(extracted_file_data, purpose)
|
||||
endpoint = (
|
||||
f"upload/storage/v1/b/{bucket_name}/o?uploadType=media&name={object_name}"
|
||||
)
|
||||
api_base = api_base or "https://storage.googleapis.com"
|
||||
if not api_base:
|
||||
raise ValueError("api_base is required")
|
||||
|
||||
return f"{api_base}/{endpoint}"
|
||||
|
||||
def get_supported_openai_params(
|
||||
self, model: str
|
||||
) -> List[OpenAICreateFileRequestOptionalParams]:
|
||||
return []
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
return optional_params
|
||||
|
||||
def _map_openai_to_vertex_params(
|
||||
self,
|
||||
openai_request_body: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
wrapper to call VertexGeminiConfig.map_openai_params
|
||||
"""
|
||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
||||
VertexGeminiConfig,
|
||||
)
|
||||
|
||||
config = VertexGeminiConfig()
|
||||
_model = openai_request_body.get("model", "")
|
||||
vertex_params = config.map_openai_params(
|
||||
model=_model,
|
||||
non_default_params=openai_request_body,
|
||||
optional_params={},
|
||||
drop_params=False,
|
||||
)
|
||||
return vertex_params
|
||||
|
||||
def _transform_openai_jsonl_content_to_vertex_ai_jsonl_content(
|
||||
self, openai_jsonl_content: List[Dict[str, Any]]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Transforms OpenAI JSONL content to VertexAI JSONL content
|
||||
|
||||
jsonl body for vertex is {"request": <request_body>}
|
||||
Example Vertex jsonl
|
||||
{"request":{"contents": [{"role": "user", "parts": [{"text": "What is the relation between the following video and image samples?"}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/video/animals.mp4", "mimeType": "video/mp4"}}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/image/cricket.jpeg", "mimeType": "image/jpeg"}}]}]}}
|
||||
{"request":{"contents": [{"role": "user", "parts": [{"text": "Describe what is happening in this video."}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/video/another_video.mov", "mimeType": "video/mov"}}]}]}}
|
||||
"""
|
||||
|
||||
vertex_jsonl_content = []
|
||||
for _openai_jsonl_content in openai_jsonl_content:
|
||||
openai_request_body = _openai_jsonl_content.get("body") or {}
|
||||
vertex_request_body = _transform_request_body(
|
||||
messages=openai_request_body.get("messages", []),
|
||||
model=openai_request_body.get("model", ""),
|
||||
optional_params=self._map_openai_to_vertex_params(openai_request_body),
|
||||
custom_llm_provider="vertex_ai",
|
||||
litellm_params={},
|
||||
cached_content=None,
|
||||
)
|
||||
vertex_jsonl_content.append({"request": vertex_request_body})
|
||||
return vertex_jsonl_content
|
||||
|
||||
def transform_create_file_request(
|
||||
self,
|
||||
model: str,
|
||||
create_file_data: CreateFileRequest,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
) -> Union[bytes, str, dict]:
|
||||
"""
|
||||
2 Cases:
|
||||
1. Handle basic file upload
|
||||
2. Handle batch file upload (.jsonl)
|
||||
"""
|
||||
file_data = create_file_data.get("file")
|
||||
if file_data is None:
|
||||
raise ValueError("file is required")
|
||||
extracted_file_data = extract_file_data(file_data)
|
||||
extracted_file_data_content = extracted_file_data.get("content")
|
||||
if (
|
||||
create_file_data.get("purpose") == "batch"
|
||||
and extracted_file_data.get("content_type") == "application/jsonl"
|
||||
and extracted_file_data_content is not None
|
||||
):
|
||||
## 1. If jsonl, check if there's a model name
|
||||
file_content = self._get_content_from_openai_file(
|
||||
extracted_file_data_content
|
||||
)
|
||||
|
||||
# Split into lines and parse each line as JSON
|
||||
openai_jsonl_content = [
|
||||
json.loads(line) for line in file_content.splitlines() if line.strip()
|
||||
]
|
||||
vertex_jsonl_content = (
|
||||
self._transform_openai_jsonl_content_to_vertex_ai_jsonl_content(
|
||||
openai_jsonl_content
|
||||
)
|
||||
)
|
||||
return json.dumps(vertex_jsonl_content)
|
||||
elif isinstance(extracted_file_data_content, bytes):
|
||||
return extracted_file_data_content
|
||||
else:
|
||||
raise ValueError("Unsupported file content type")
|
||||
|
||||
def transform_create_file_response(
|
||||
self,
|
||||
model: Optional[str],
|
||||
raw_response: Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
litellm_params: dict,
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
Transform VertexAI File upload response into OpenAI-style FileObject
|
||||
"""
|
||||
response_json = raw_response.json()
|
||||
|
||||
try:
|
||||
response_object = GcsBucketResponse(**response_json) # type: ignore
|
||||
except Exception as e:
|
||||
raise VertexAIError(
|
||||
status_code=raw_response.status_code,
|
||||
message=f"Error reading GCS bucket response: {e}",
|
||||
headers=raw_response.headers,
|
||||
)
|
||||
|
||||
gcs_id = response_object.get("id", "")
|
||||
# Remove the last numeric ID from the path
|
||||
gcs_id = "/".join(gcs_id.split("/")[:-1]) if gcs_id else ""
|
||||
|
||||
return OpenAIFileObject(
|
||||
purpose=response_object.get("purpose", "batch"),
|
||||
id=f"gs://{gcs_id}",
|
||||
filename=response_object.get("name", ""),
|
||||
created_at=_convert_vertex_datetime_to_openai_datetime(
|
||||
vertex_datetime=response_object.get("timeCreated", "")
|
||||
),
|
||||
status="uploaded",
|
||||
bytes=int(response_object.get("size", 0)),
|
||||
object="file",
|
||||
)
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[Dict, Headers]
|
||||
) -> BaseLLMException:
|
||||
return VertexAIError(
|
||||
status_code=status_code, message=error_message, headers=headers
|
||||
)
|
||||
|
||||
|
||||
class VertexAIJsonlFilesTransformation(VertexGeminiConfig):
|
||||
"""
|
||||
Transforms OpenAI /v1/files/* requests to VertexAI /v1/files/* requests
|
||||
"""
|
||||
|
|
|
@ -240,6 +240,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
gtool_func_declarations = []
|
||||
googleSearch: Optional[dict] = None
|
||||
googleSearchRetrieval: Optional[dict] = None
|
||||
enterpriseWebSearch: Optional[dict] = None
|
||||
code_execution: Optional[dict] = None
|
||||
# remove 'additionalProperties' from tools
|
||||
value = _remove_additional_properties(value)
|
||||
|
@ -273,6 +274,8 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
googleSearch = tool["googleSearch"]
|
||||
elif tool.get("googleSearchRetrieval", None) is not None:
|
||||
googleSearchRetrieval = tool["googleSearchRetrieval"]
|
||||
elif tool.get("enterpriseWebSearch", None) is not None:
|
||||
enterpriseWebSearch = tool["enterpriseWebSearch"]
|
||||
elif tool.get("code_execution", None) is not None:
|
||||
code_execution = tool["code_execution"]
|
||||
elif openai_function_object is not None:
|
||||
|
@ -299,6 +302,8 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
_tools["googleSearch"] = googleSearch
|
||||
if googleSearchRetrieval is not None:
|
||||
_tools["googleSearchRetrieval"] = googleSearchRetrieval
|
||||
if enterpriseWebSearch is not None:
|
||||
_tools["enterpriseWebSearch"] = enterpriseWebSearch
|
||||
if code_execution is not None:
|
||||
_tools["code_execution"] = code_execution
|
||||
return [_tools]
|
||||
|
@ -900,6 +905,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: Dict,
|
||||
litellm_params: Dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Dict:
|
||||
|
@ -1017,7 +1023,7 @@ class VertexLLM(VertexBase):
|
|||
logging_obj,
|
||||
stream,
|
||||
optional_params: dict,
|
||||
litellm_params=None,
|
||||
litellm_params: dict,
|
||||
logger_fn=None,
|
||||
api_base: Optional[str] = None,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
|
@ -1058,6 +1064,7 @@ class VertexLLM(VertexBase):
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
|
@ -1144,6 +1151,7 @@ class VertexLLM(VertexBase):
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
request_body = await async_transform_request_body(**data) # type: ignore
|
||||
|
@ -1317,6 +1325,7 @@ class VertexLLM(VertexBase):
|
|||
model=model,
|
||||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
## TRANSFORMATION ##
|
||||
|
|
|
@ -94,6 +94,7 @@ class VertexMultimodalEmbedding(VertexLLM):
|
|||
optional_params=optional_params,
|
||||
api_key=auth_header,
|
||||
api_base=api_base,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
## LOGGING
|
||||
|
|
|
@ -47,6 +47,7 @@ class VertexAIMultimodalEmbeddingConfig(BaseEmbeddingConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Tuple
|
|||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.asyncify import asyncify
|
||||
from litellm.llms.base import BaseLLM
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
|
||||
|
||||
|
@ -22,7 +21,7 @@ else:
|
|||
GoogleCredentialsObject = Any
|
||||
|
||||
|
||||
class VertexBase(BaseLLM):
|
||||
class VertexBase:
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.access_token: Optional[str] = None
|
||||
|
|
|
@ -83,6 +83,7 @@ class VoyageEmbeddingConfig(BaseEmbeddingConfig):
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
|
|
|
@ -49,6 +49,7 @@ class WatsonXChatHandler(OpenAILikeChatHandler):
|
|||
messages=messages,
|
||||
optional_params=optional_params,
|
||||
api_key=api_key,
|
||||
litellm_params=litellm_params,
|
||||
)
|
||||
|
||||
## UPDATE PAYLOAD (optional params)
|
||||
|
|
|
@ -165,6 +165,7 @@ class IBMWatsonXMixin:
|
|||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: Dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> Dict:
|
||||
|
|
|
@ -3656,6 +3656,7 @@ def embedding( # noqa: PLR0915
|
|||
optional_params=optional_params,
|
||||
client=client,
|
||||
aembedding=aembedding,
|
||||
litellm_params=litellm_params_dict,
|
||||
)
|
||||
elif custom_llm_provider == "bedrock":
|
||||
if isinstance(input, str):
|
||||
|
|
|
@ -380,6 +380,7 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": false,
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supported_endpoints": ["/v1/responses", "/v1/batch"]
|
||||
},
|
||||
"o1-pro-2025-03-19": {
|
||||
|
@ -401,6 +402,7 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": false,
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supported_endpoints": ["/v1/responses", "/v1/batch"]
|
||||
},
|
||||
"o1": {
|
||||
|
@ -2409,25 +2411,26 @@
|
|||
"max_tokens": 4096,
|
||||
"max_input_tokens": 131072,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
|
||||
"source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112"
|
||||
},
|
||||
"azure_ai/Phi-4-multimodal-instruct": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 131072,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"input_cost_per_token": 0.00000008,
|
||||
"input_cost_per_audio_token": 0.000004,
|
||||
"output_cost_per_token": 0.00032,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_audio_input": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
|
||||
"source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112"
|
||||
},
|
||||
"azure_ai/Phi-4": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -4511,20 +4514,10 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.00001,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -4535,6 +4528,9 @@
|
|||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-pro-exp-02-05": {
|
||||
|
@ -4547,20 +4543,10 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.00001,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -4571,6 +4557,9 @@
|
|||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-flash-exp": {
|
||||
|
@ -4604,6 +4593,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4628,6 +4619,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-flash-thinking-exp": {
|
||||
|
@ -4661,6 +4654,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4695,6 +4690,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": false,
|
||||
"supports_audio_output": false,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4720,6 +4717,7 @@
|
|||
"supports_audio_output": true,
|
||||
"supports_audio_input": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
|
@ -4742,6 +4740,32 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini-2.0-flash-lite-001": {
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 50,
|
||||
"input_cost_per_audio_token": 0.000000075,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4807,6 +4831,7 @@
|
|||
"supports_audio_output": true,
|
||||
"supports_audio_input": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
|
@ -4832,6 +4857,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-001": {
|
||||
|
@ -4857,6 +4884,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
"gemini/gemini-2.5-pro-preview-03-25": {
|
||||
|
@ -4871,9 +4900,9 @@
|
|||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_audio_token": 0.0000007,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.0000010,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000015,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"rpm": 10000,
|
||||
|
@ -4884,6 +4913,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-exp": {
|
||||
|
@ -4919,6 +4950,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4945,6 +4978,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-thinking-exp": {
|
||||
|
@ -4980,6 +5015,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -5016,6 +5053,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
|
|
@ -29,10 +29,14 @@ model_list:
|
|||
model: databricks/databricks-claude-3-7-sonnet
|
||||
api_key: os.environ/DATABRICKS_API_KEY
|
||||
api_base: os.environ/DATABRICKS_API_BASE
|
||||
- model_name: "gemini/gemini-2.0-flash"
|
||||
- model_name: "llmaas-meta/llama-3.1-8b-instruct"
|
||||
litellm_params:
|
||||
model: gemini/gemini-2.0-flash
|
||||
api_key: os.environ/GEMINI_API_KEY
|
||||
model: nvidia_nim/meta/llama-3.3-70b-instruct
|
||||
api_key: "invalid"
|
||||
api_base: "http://0.0.0.0:8090"
|
||||
model_info:
|
||||
input_cost_per_token: "100"
|
||||
output_cost_per_token: "100"
|
||||
|
||||
litellm_settings:
|
||||
num_retries: 0
|
||||
|
@ -40,4 +44,7 @@ litellm_settings:
|
|||
|
||||
files_settings:
|
||||
- custom_llm_provider: gemini
|
||||
api_key: os.environ/GEMINI_API_KEY
|
||||
api_key: os.environ/GEMINI_API_KEY
|
||||
|
||||
general_settings:
|
||||
store_prompts_in_spend_logs: true
|
|
@ -28,6 +28,7 @@ from litellm.types.utils import (
|
|||
ProviderField,
|
||||
StandardCallbackDynamicParams,
|
||||
StandardLoggingMCPToolCall,
|
||||
StandardLoggingModelInformation,
|
||||
StandardLoggingPayloadErrorInformation,
|
||||
StandardLoggingPayloadStatus,
|
||||
StandardPassThroughResponseObject,
|
||||
|
@ -1625,6 +1626,7 @@ class LiteLLM_UserTable(LiteLLMPydanticObjectBase):
|
|||
model_max_budget: Optional[Dict] = {}
|
||||
model_spend: Optional[Dict] = {}
|
||||
user_email: Optional[str] = None
|
||||
user_alias: Optional[str] = None
|
||||
models: list = []
|
||||
tpm_limit: Optional[int] = None
|
||||
rpm_limit: Optional[int] = None
|
||||
|
@ -1935,6 +1937,8 @@ class SpendLogsMetadata(TypedDict):
|
|||
proxy_server_request: Optional[str]
|
||||
batch_models: Optional[List[str]]
|
||||
error_information: Optional[StandardLoggingPayloadErrorInformation]
|
||||
usage_object: Optional[dict]
|
||||
model_map_information: Optional[StandardLoggingModelInformation]
|
||||
|
||||
|
||||
class SpendLogsPayload(TypedDict):
|
||||
|
|
|
@ -100,7 +100,6 @@ async def cache_ping():
|
|||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
error_message = {
|
||||
"message": f"Service Unhealthy ({str(e)})",
|
||||
"litellm_cache_params": safe_dumps(litellm_cache_params),
|
||||
|
|
|
@ -51,9 +51,6 @@ def decrypt_value_helper(value: str):
|
|||
# if it's not str - do not decrypt it, return the value
|
||||
return value
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_stack()
|
||||
verbose_proxy_logger.error(
|
||||
f"Error decrypting value, Did your master_key/salt key change recently? \nError: {str(e)}\nSet permanent salt key - https://docs.litellm.ai/docs/proxy/prod#5-set-litellm-salt-key"
|
||||
)
|
||||
|
|
284
litellm/proxy/common_utils/html_forms/jwt_display_template.py
Normal file
284
litellm/proxy/common_utils/html_forms/jwt_display_template.py
Normal file
|
@ -0,0 +1,284 @@
|
|||
# JWT display template for SSO debug callback
|
||||
jwt_display_template = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>LiteLLM SSO Debug - JWT Information</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||
background-color: #f8fafc;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
min-height: 100vh;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.container {
|
||||
background-color: #fff;
|
||||
padding: 40px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
|
||||
width: 800px;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
.logo-container {
|
||||
text-align: center;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.logo {
|
||||
font-size: 24px;
|
||||
font-weight: 600;
|
||||
color: #1e293b;
|
||||
}
|
||||
|
||||
h2 {
|
||||
margin: 0 0 10px;
|
||||
color: #1e293b;
|
||||
font-size: 28px;
|
||||
font-weight: 600;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.subtitle {
|
||||
color: #64748b;
|
||||
margin: 0 0 20px;
|
||||
font-size: 16px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.info-box {
|
||||
background-color: #f1f5f9;
|
||||
border-radius: 6px;
|
||||
padding: 20px;
|
||||
margin-bottom: 30px;
|
||||
border-left: 4px solid #2563eb;
|
||||
}
|
||||
|
||||
.success-box {
|
||||
background-color: #f0fdf4;
|
||||
border-radius: 6px;
|
||||
padding: 20px;
|
||||
margin-bottom: 30px;
|
||||
border-left: 4px solid #16a34a;
|
||||
}
|
||||
|
||||
.info-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
margin-bottom: 12px;
|
||||
color: #1e40af;
|
||||
font-weight: 600;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
.success-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
margin-bottom: 12px;
|
||||
color: #166534;
|
||||
font-weight: 600;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
.info-header svg, .success-header svg {
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
.data-container {
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.data-row {
|
||||
display: flex;
|
||||
border-bottom: 1px solid #e2e8f0;
|
||||
padding: 12px 0;
|
||||
}
|
||||
|
||||
.data-row:last-child {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.data-label {
|
||||
font-weight: 500;
|
||||
color: #334155;
|
||||
width: 180px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.data-value {
|
||||
color: #475569;
|
||||
word-break: break-all;
|
||||
}
|
||||
|
||||
.jwt-container {
|
||||
background-color: #f8fafc;
|
||||
border-radius: 6px;
|
||||
padding: 15px;
|
||||
margin-top: 20px;
|
||||
overflow-x: auto;
|
||||
border: 1px solid #e2e8f0;
|
||||
}
|
||||
|
||||
.jwt-text {
|
||||
font-family: monospace;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-all;
|
||||
margin: 0;
|
||||
color: #334155;
|
||||
}
|
||||
|
||||
.back-button {
|
||||
display: inline-block;
|
||||
background-color: #6466E9;
|
||||
color: #fff;
|
||||
text-decoration: none;
|
||||
padding: 10px 16px;
|
||||
border-radius: 6px;
|
||||
font-weight: 500;
|
||||
margin-top: 20px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.back-button:hover {
|
||||
background-color: #4138C2;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.buttons {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.copy-button {
|
||||
background-color: #e2e8f0;
|
||||
color: #334155;
|
||||
border: none;
|
||||
padding: 8px 12px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 14px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.copy-button:hover {
|
||||
background-color: #cbd5e1;
|
||||
}
|
||||
|
||||
.copy-button svg {
|
||||
margin-right: 6px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="logo-container">
|
||||
<div class="logo">
|
||||
🚅 LiteLLM
|
||||
</div>
|
||||
</div>
|
||||
<h2>SSO Debug Information</h2>
|
||||
<p class="subtitle">Results from the SSO authentication process.</p>
|
||||
|
||||
<div class="success-box">
|
||||
<div class="success-header">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||||
<path d="M22 11.08V12a10 10 0 1 1-5.93-9.14"></path>
|
||||
<polyline points="22 4 12 14.01 9 11.01"></polyline>
|
||||
</svg>
|
||||
Authentication Successful
|
||||
</div>
|
||||
<p>The SSO authentication completed successfully. Below is the information returned by the provider.</p>
|
||||
</div>
|
||||
|
||||
<div class="data-container" id="userData">
|
||||
<!-- Data will be inserted here by JavaScript -->
|
||||
</div>
|
||||
|
||||
<div class="info-box">
|
||||
<div class="info-header">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||||
<circle cx="12" cy="12" r="10"></circle>
|
||||
<line x1="12" y1="16" x2="12" y2="12"></line>
|
||||
<line x1="12" y1="8" x2="12.01" y2="8"></line>
|
||||
</svg>
|
||||
JSON Representation
|
||||
</div>
|
||||
<div class="jwt-container">
|
||||
<pre class="jwt-text" id="jsonData">Loading...</pre>
|
||||
</div>
|
||||
<div class="buttons">
|
||||
<button class="copy-button" onclick="copyToClipboard('jsonData')">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||||
<rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect>
|
||||
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path>
|
||||
</svg>
|
||||
Copy to Clipboard
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<a href="/sso/debug/login" class="back-button">
|
||||
Try Another SSO Login
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// This will be populated with the actual data from the server
|
||||
const userData = SSO_DATA;
|
||||
|
||||
function renderUserData() {
|
||||
const container = document.getElementById('userData');
|
||||
const jsonDisplay = document.getElementById('jsonData');
|
||||
|
||||
// Format JSON with indentation for display
|
||||
jsonDisplay.textContent = JSON.stringify(userData, null, 2);
|
||||
|
||||
// Clear container
|
||||
container.innerHTML = '';
|
||||
|
||||
// Add each key-value pair to the UI
|
||||
for (const [key, value] of Object.entries(userData)) {
|
||||
if (typeof value !== 'object' || value === null) {
|
||||
const row = document.createElement('div');
|
||||
row.className = 'data-row';
|
||||
|
||||
const label = document.createElement('div');
|
||||
label.className = 'data-label';
|
||||
label.textContent = key;
|
||||
|
||||
const dataValue = document.createElement('div');
|
||||
dataValue.className = 'data-value';
|
||||
dataValue.textContent = value !== null ? value : 'null';
|
||||
|
||||
row.appendChild(label);
|
||||
row.appendChild(dataValue);
|
||||
container.appendChild(row);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function copyToClipboard(elementId) {
|
||||
const text = document.getElementById(elementId).textContent;
|
||||
navigator.clipboard.writeText(text).then(() => {
|
||||
alert('Copied to clipboard!');
|
||||
}).catch(err => {
|
||||
console.error('Could not copy text: ', err);
|
||||
});
|
||||
}
|
||||
|
||||
// Render the data when the page loads
|
||||
document.addEventListener('DOMContentLoaded', renderUserData);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
|
@ -1434,7 +1434,7 @@ async def get_user_daily_activity(
|
|||
default=1, description="Page number for pagination", ge=1
|
||||
),
|
||||
page_size: int = fastapi.Query(
|
||||
default=50, description="Items per page", ge=1, le=100
|
||||
default=50, description="Items per page", ge=1, le=1000
|
||||
),
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
) -> SpendAnalyticsPaginatedResponse:
|
||||
|
|
|
@ -816,9 +816,6 @@ async def add_member_to_organization(
|
|||
return user_object, organization_membership
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
raise ValueError(
|
||||
f"Error adding member={member} to organization={organization_id}: {e}"
|
||||
)
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -4,16 +4,26 @@ import json
|
|||
import uuid
|
||||
from base64 import b64encode
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Union
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from urllib.parse import parse_qs, urlencode, urlparse
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
|
||||
from fastapi import (
|
||||
APIRouter,
|
||||
Depends,
|
||||
HTTPException,
|
||||
Request,
|
||||
Response,
|
||||
UploadFile,
|
||||
status,
|
||||
)
|
||||
from fastapi.responses import StreamingResponse
|
||||
from starlette.datastructures import UploadFile as StarletteUploadFile
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
||||
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
||||
from litellm.proxy._types import (
|
||||
ConfigFieldInfo,
|
||||
|
@ -358,6 +368,92 @@ class HttpPassThroughEndpointHelpers:
|
|||
)
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
async def non_streaming_http_request_handler(
|
||||
request: Request,
|
||||
async_client: httpx.AsyncClient,
|
||||
url: httpx.URL,
|
||||
headers: dict,
|
||||
requested_query_params: Optional[dict] = None,
|
||||
_parsed_body: Optional[dict] = None,
|
||||
) -> httpx.Response:
|
||||
"""
|
||||
Handle non-streaming HTTP requests
|
||||
|
||||
Handles special cases when GET requests, multipart/form-data requests, and generic httpx requests
|
||||
"""
|
||||
if request.method == "GET":
|
||||
response = await async_client.request(
|
||||
method=request.method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=requested_query_params,
|
||||
)
|
||||
elif HttpPassThroughEndpointHelpers.is_multipart(request) is True:
|
||||
return await HttpPassThroughEndpointHelpers.make_multipart_http_request(
|
||||
request=request,
|
||||
async_client=async_client,
|
||||
url=url,
|
||||
headers=headers,
|
||||
requested_query_params=requested_query_params,
|
||||
)
|
||||
else:
|
||||
# Generic httpx method
|
||||
response = await async_client.request(
|
||||
method=request.method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=requested_query_params,
|
||||
json=_parsed_body,
|
||||
)
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def is_multipart(request: Request) -> bool:
|
||||
"""Check if the request is a multipart/form-data request"""
|
||||
return "multipart/form-data" in request.headers.get("content-type", "")
|
||||
|
||||
@staticmethod
|
||||
async def _build_request_files_from_upload_file(
|
||||
upload_file: Union[UploadFile, StarletteUploadFile],
|
||||
) -> Tuple[Optional[str], bytes, Optional[str]]:
|
||||
"""Build a request files dict from an UploadFile object"""
|
||||
file_content = await upload_file.read()
|
||||
return (upload_file.filename, file_content, upload_file.content_type)
|
||||
|
||||
@staticmethod
|
||||
async def make_multipart_http_request(
|
||||
request: Request,
|
||||
async_client: httpx.AsyncClient,
|
||||
url: httpx.URL,
|
||||
headers: dict,
|
||||
requested_query_params: Optional[dict] = None,
|
||||
) -> httpx.Response:
|
||||
"""Process multipart/form-data requests, handling both files and form fields"""
|
||||
form_data = await request.form()
|
||||
files = {}
|
||||
form_data_dict = {}
|
||||
|
||||
for field_name, field_value in form_data.items():
|
||||
if isinstance(field_value, (StarletteUploadFile, UploadFile)):
|
||||
files[field_name] = (
|
||||
await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file(
|
||||
upload_file=field_value
|
||||
)
|
||||
)
|
||||
else:
|
||||
form_data_dict[field_name] = field_value
|
||||
|
||||
response = await async_client.request(
|
||||
method=request.method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=requested_query_params,
|
||||
files=files,
|
||||
data=form_data_dict,
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
async def pass_through_request( # noqa: PLR0915
|
||||
request: Request,
|
||||
|
@ -424,7 +520,7 @@ async def pass_through_request( # noqa: PLR0915
|
|||
start_time = datetime.now()
|
||||
logging_obj = Logging(
|
||||
model="unknown",
|
||||
messages=[{"role": "user", "content": json.dumps(_parsed_body)}],
|
||||
messages=[{"role": "user", "content": safe_dumps(_parsed_body)}],
|
||||
stream=False,
|
||||
call_type="pass_through_endpoint",
|
||||
start_time=start_time,
|
||||
|
@ -453,7 +549,6 @@ async def pass_through_request( # noqa: PLR0915
|
|||
logging_obj.model_call_details["litellm_call_id"] = litellm_call_id
|
||||
|
||||
# combine url with query params for logging
|
||||
|
||||
requested_query_params: Optional[dict] = (
|
||||
query_params or request.query_params.__dict__
|
||||
)
|
||||
|
@ -474,7 +569,7 @@ async def pass_through_request( # noqa: PLR0915
|
|||
logging_url = str(url) + "?" + requested_query_params_str
|
||||
|
||||
logging_obj.pre_call(
|
||||
input=[{"role": "user", "content": json.dumps(_parsed_body)}],
|
||||
input=[{"role": "user", "content": safe_dumps(_parsed_body)}],
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": _parsed_body,
|
||||
|
@ -525,22 +620,16 @@ async def pass_through_request( # noqa: PLR0915
|
|||
)
|
||||
verbose_proxy_logger.debug("request body: {}".format(_parsed_body))
|
||||
|
||||
if request.method == "GET":
|
||||
response = await async_client.request(
|
||||
method=request.method,
|
||||
response = (
|
||||
await HttpPassThroughEndpointHelpers.non_streaming_http_request_handler(
|
||||
request=request,
|
||||
async_client=async_client,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=requested_query_params,
|
||||
requested_query_params=requested_query_params,
|
||||
_parsed_body=_parsed_body,
|
||||
)
|
||||
else:
|
||||
response = await async_client.request(
|
||||
method=request.method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=requested_query_params,
|
||||
json=_parsed_body,
|
||||
)
|
||||
|
||||
)
|
||||
verbose_proxy_logger.debug("response.headers= %s", response.headers)
|
||||
|
||||
if _is_streaming_response(response) is True:
|
||||
|
|
|
@ -13,7 +13,11 @@ from litellm._logging import verbose_proxy_logger
|
|||
from litellm.litellm_core_utils.core_helpers import get_litellm_metadata_from_kwargs
|
||||
from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
|
||||
from litellm.proxy.utils import PrismaClient, hash_token
|
||||
from litellm.types.utils import StandardLoggingMCPToolCall, StandardLoggingPayload
|
||||
from litellm.types.utils import (
|
||||
StandardLoggingMCPToolCall,
|
||||
StandardLoggingModelInformation,
|
||||
StandardLoggingPayload,
|
||||
)
|
||||
from litellm.utils import get_end_user_id_for_cost_tracking
|
||||
|
||||
|
||||
|
@ -39,6 +43,8 @@ def _get_spend_logs_metadata(
|
|||
applied_guardrails: Optional[List[str]] = None,
|
||||
batch_models: Optional[List[str]] = None,
|
||||
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
|
||||
usage_object: Optional[dict] = None,
|
||||
model_map_information: Optional[StandardLoggingModelInformation] = None,
|
||||
) -> SpendLogsMetadata:
|
||||
if metadata is None:
|
||||
return SpendLogsMetadata(
|
||||
|
@ -57,6 +63,8 @@ def _get_spend_logs_metadata(
|
|||
proxy_server_request=None,
|
||||
batch_models=None,
|
||||
mcp_tool_call_metadata=None,
|
||||
model_map_information=None,
|
||||
usage_object=None,
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
"getting payload for SpendLogs, available keys in metadata: "
|
||||
|
@ -74,6 +82,8 @@ def _get_spend_logs_metadata(
|
|||
clean_metadata["applied_guardrails"] = applied_guardrails
|
||||
clean_metadata["batch_models"] = batch_models
|
||||
clean_metadata["mcp_tool_call_metadata"] = mcp_tool_call_metadata
|
||||
clean_metadata["usage_object"] = usage_object
|
||||
clean_metadata["model_map_information"] = model_map_information
|
||||
return clean_metadata
|
||||
|
||||
|
||||
|
@ -153,6 +163,17 @@ def get_logging_payload( # noqa: PLR0915
|
|||
|
||||
api_key = metadata.get("user_api_key", "")
|
||||
|
||||
standard_logging_prompt_tokens: int = 0
|
||||
standard_logging_completion_tokens: int = 0
|
||||
standard_logging_total_tokens: int = 0
|
||||
if standard_logging_payload is not None:
|
||||
standard_logging_prompt_tokens = standard_logging_payload.get(
|
||||
"prompt_tokens", 0
|
||||
)
|
||||
standard_logging_completion_tokens = standard_logging_payload.get(
|
||||
"completion_tokens", 0
|
||||
)
|
||||
standard_logging_total_tokens = standard_logging_payload.get("total_tokens", 0)
|
||||
if api_key is not None and isinstance(api_key, str):
|
||||
if api_key.startswith("sk-"):
|
||||
# hash the api_key
|
||||
|
@ -208,6 +229,16 @@ def get_logging_payload( # noqa: PLR0915
|
|||
if standard_logging_payload is not None
|
||||
else None
|
||||
),
|
||||
usage_object=(
|
||||
standard_logging_payload["metadata"].get("usage_object", None)
|
||||
if standard_logging_payload is not None
|
||||
else None
|
||||
),
|
||||
model_map_information=(
|
||||
standard_logging_payload["model_map_information"]
|
||||
if standard_logging_payload is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
special_usage_fields = ["completion_tokens", "prompt_tokens", "total_tokens"]
|
||||
|
@ -227,6 +258,7 @@ def get_logging_payload( # noqa: PLR0915
|
|||
import time
|
||||
|
||||
id = f"{id}_cache_hit{time.time()}" # SpendLogs does not allow duplicate request_id
|
||||
|
||||
try:
|
||||
payload: SpendLogsPayload = SpendLogsPayload(
|
||||
request_id=str(id),
|
||||
|
@ -242,9 +274,11 @@ def get_logging_payload( # noqa: PLR0915
|
|||
metadata=json.dumps(clean_metadata),
|
||||
cache_key=cache_key,
|
||||
spend=kwargs.get("response_cost", 0),
|
||||
total_tokens=usage.get("total_tokens", 0),
|
||||
prompt_tokens=usage.get("prompt_tokens", 0),
|
||||
completion_tokens=usage.get("completion_tokens", 0),
|
||||
total_tokens=usage.get("total_tokens", standard_logging_total_tokens),
|
||||
prompt_tokens=usage.get("prompt_tokens", standard_logging_prompt_tokens),
|
||||
completion_tokens=usage.get(
|
||||
"completion_tokens", standard_logging_completion_tokens
|
||||
),
|
||||
request_tags=request_tags,
|
||||
end_user=end_user_id or "",
|
||||
api_base=litellm_params.get("api_base", ""),
|
||||
|
@ -360,6 +394,39 @@ def _get_messages_for_spend_logs_payload(
|
|||
return "{}"
|
||||
|
||||
|
||||
def _sanitize_request_body_for_spend_logs_payload(
|
||||
request_body: dict,
|
||||
visited: Optional[set] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Recursively sanitize request body to prevent logging large base64 strings or other large values.
|
||||
Truncates strings longer than 1000 characters and handles nested dictionaries.
|
||||
"""
|
||||
MAX_STRING_LENGTH = 1000
|
||||
|
||||
if visited is None:
|
||||
visited = set()
|
||||
|
||||
# Get the object's memory address to track visited objects
|
||||
obj_id = id(request_body)
|
||||
if obj_id in visited:
|
||||
return {}
|
||||
visited.add(obj_id)
|
||||
|
||||
def _sanitize_value(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
return _sanitize_request_body_for_spend_logs_payload(value, visited)
|
||||
elif isinstance(value, list):
|
||||
return [_sanitize_value(item) for item in value]
|
||||
elif isinstance(value, str):
|
||||
if len(value) > MAX_STRING_LENGTH:
|
||||
return f"{value[:MAX_STRING_LENGTH]}... (truncated {len(value) - MAX_STRING_LENGTH} chars)"
|
||||
return value
|
||||
return value
|
||||
|
||||
return {k: _sanitize_value(v) for k, v in request_body.items()}
|
||||
|
||||
|
||||
def _add_proxy_server_request_to_metadata(
|
||||
metadata: dict,
|
||||
litellm_params: dict,
|
||||
|
@ -373,6 +440,7 @@ def _add_proxy_server_request_to_metadata(
|
|||
)
|
||||
if _proxy_server_request is not None:
|
||||
_request_body = _proxy_server_request.get("body", {}) or {}
|
||||
_request_body = _sanitize_request_body_for_spend_logs_payload(_request_body)
|
||||
_request_body_json_str = json.dumps(_request_body, default=str)
|
||||
metadata["proxy_server_request"] = _request_body_json_str
|
||||
return metadata
|
||||
|
|
|
@ -116,6 +116,7 @@ from litellm.types.router import (
|
|||
AllowedFailsPolicy,
|
||||
AssistantsTypedDict,
|
||||
CredentialLiteLLMParams,
|
||||
CustomPricingLiteLLMParams,
|
||||
CustomRoutingStrategyBase,
|
||||
Deployment,
|
||||
DeploymentTypedDict,
|
||||
|
@ -132,6 +133,7 @@ from litellm.types.router import (
|
|||
)
|
||||
from litellm.types.services import ServiceTypes
|
||||
from litellm.types.utils import GenericBudgetConfigType
|
||||
from litellm.types.utils import ModelInfo
|
||||
from litellm.types.utils import ModelInfo as ModelMapInfo
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.utils import (
|
||||
|
@ -3324,7 +3326,6 @@ class Router:
|
|||
|
||||
return response
|
||||
except Exception as new_exception:
|
||||
traceback.print_exc()
|
||||
parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs)
|
||||
verbose_router_logger.error(
|
||||
"litellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}\n{}\n\nDebug Information:\nCooldown Deployments={}".format(
|
||||
|
@ -4301,7 +4302,20 @@ class Router:
|
|||
model_info=_model_info,
|
||||
)
|
||||
|
||||
for field in CustomPricingLiteLLMParams.model_fields.keys():
|
||||
if deployment.litellm_params.get(field) is not None:
|
||||
_model_info[field] = deployment.litellm_params[field]
|
||||
|
||||
## REGISTER MODEL INFO IN LITELLM MODEL COST MAP
|
||||
model_id = deployment.model_info.id
|
||||
if model_id is not None:
|
||||
litellm.register_model(
|
||||
model_cost={
|
||||
model_id: _model_info,
|
||||
}
|
||||
)
|
||||
|
||||
## OLD MODEL REGISTRATION ## Kept to prevent breaking changes
|
||||
_model_name = deployment.litellm_params.model
|
||||
if deployment.litellm_params.custom_llm_provider is not None:
|
||||
_model_name = (
|
||||
|
@ -4802,6 +4816,42 @@ class Router:
|
|||
model_name = model_info["model_name"]
|
||||
return self.get_model_list(model_name=model_name)
|
||||
|
||||
def get_deployment_model_info(
|
||||
self, model_id: str, model_name: str
|
||||
) -> Optional[ModelInfo]:
|
||||
"""
|
||||
For a given model id, return the model info
|
||||
|
||||
1. Check if model_id is in model info
|
||||
2. If not, check if litellm model name is in model info
|
||||
3. If not, return None
|
||||
"""
|
||||
from litellm.utils import _update_dictionary
|
||||
|
||||
model_info: Optional[ModelInfo] = None
|
||||
litellm_model_name_model_info: Optional[ModelInfo] = None
|
||||
|
||||
try:
|
||||
model_info = litellm.get_model_info(model=model_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
litellm_model_name_model_info = litellm.get_model_info(model=model_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if model_info is not None and litellm_model_name_model_info is not None:
|
||||
model_info = cast(
|
||||
ModelInfo,
|
||||
_update_dictionary(
|
||||
cast(dict, litellm_model_name_model_info).copy(),
|
||||
cast(dict, model_info),
|
||||
),
|
||||
)
|
||||
|
||||
return model_info
|
||||
|
||||
def _set_model_group_info( # noqa: PLR0915
|
||||
self, model_group: str, user_facing_model_group_name: str
|
||||
) -> Optional[ModelGroupInfo]:
|
||||
|
@ -4860,9 +4910,16 @@ class Router:
|
|||
|
||||
# get model info
|
||||
try:
|
||||
model_info = litellm.get_model_info(model=litellm_params.model)
|
||||
model_id = model.get("model_info", {}).get("id", None)
|
||||
if model_id is not None:
|
||||
model_info = self.get_deployment_model_info(
|
||||
model_id=model_id, model_name=litellm_params.model
|
||||
)
|
||||
else:
|
||||
model_info = None
|
||||
except Exception:
|
||||
model_info = None
|
||||
|
||||
# get llm provider
|
||||
litellm_model, llm_provider = "", ""
|
||||
try:
|
||||
|
|
|
@ -19,6 +19,7 @@ class httpxSpecialProvider(str, Enum):
|
|||
SecretManager = "secret_manager"
|
||||
PassThroughEndpoint = "pass_through_endpoint"
|
||||
PromptFactory = "prompt_factory"
|
||||
SSO_HANDLER = "sso_handler"
|
||||
|
||||
|
||||
VerifyTypes = Union[str, bool, ssl.SSLContext]
|
||||
|
|
|
@ -187,6 +187,7 @@ class Tools(TypedDict, total=False):
|
|||
function_declarations: List[FunctionDeclaration]
|
||||
googleSearch: dict
|
||||
googleSearchRetrieval: dict
|
||||
enterpriseWebSearch: dict
|
||||
code_execution: dict
|
||||
retrieval: Retrieval
|
||||
|
||||
|
@ -497,6 +498,51 @@ class OutputConfig(TypedDict, total=False):
|
|||
gcsDestination: GcsDestination
|
||||
|
||||
|
||||
class GcsBucketResponse(TypedDict):
|
||||
"""
|
||||
TypedDict for GCS bucket upload response
|
||||
|
||||
Attributes:
|
||||
kind: The kind of item this is. For objects, this is always storage#object
|
||||
id: The ID of the object
|
||||
selfLink: The link to this object
|
||||
mediaLink: The link to download the object
|
||||
name: The name of the object
|
||||
bucket: The name of the bucket containing this object
|
||||
generation: The content generation of this object
|
||||
metageneration: The metadata generation of this object
|
||||
contentType: The content type of the object
|
||||
storageClass: The storage class of the object
|
||||
size: The size of the object in bytes
|
||||
md5Hash: The MD5 hash of the object
|
||||
crc32c: The CRC32c checksum of the object
|
||||
etag: The ETag of the object
|
||||
timeCreated: The creation time of the object
|
||||
updated: The last update time of the object
|
||||
timeStorageClassUpdated: The time the storage class was last updated
|
||||
timeFinalized: The time the object was finalized
|
||||
"""
|
||||
|
||||
kind: Literal["storage#object"]
|
||||
id: str
|
||||
selfLink: str
|
||||
mediaLink: str
|
||||
name: str
|
||||
bucket: str
|
||||
generation: str
|
||||
metageneration: str
|
||||
contentType: str
|
||||
storageClass: str
|
||||
size: str
|
||||
md5Hash: str
|
||||
crc32c: str
|
||||
etag: str
|
||||
timeCreated: str
|
||||
updated: str
|
||||
timeStorageClassUpdated: str
|
||||
timeFinalized: str
|
||||
|
||||
|
||||
class VertexAIBatchPredictionJob(TypedDict):
|
||||
displayName: str
|
||||
model: str
|
||||
|
|
27
litellm/types/proxy/management_endpoints/ui_sso.py
Normal file
27
litellm/types/proxy/management_endpoints/ui_sso.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
from typing import List, Optional, TypedDict
|
||||
|
||||
|
||||
class MicrosoftGraphAPIUserGroupDirectoryObject(TypedDict, total=False):
|
||||
"""Model for Microsoft Graph API directory object"""
|
||||
|
||||
odata_type: Optional[str]
|
||||
id: Optional[str]
|
||||
deletedDateTime: Optional[str]
|
||||
description: Optional[str]
|
||||
displayName: Optional[str]
|
||||
roleTemplateId: Optional[str]
|
||||
|
||||
|
||||
class MicrosoftGraphAPIUserGroupResponse(TypedDict, total=False):
|
||||
"""Model for Microsoft Graph API user groups response"""
|
||||
|
||||
odata_context: Optional[str]
|
||||
odata_nextLink: Optional[str]
|
||||
value: Optional[List[MicrosoftGraphAPIUserGroupDirectoryObject]]
|
||||
|
||||
|
||||
class MicrosoftServicePrincipalTeam(TypedDict, total=False):
|
||||
"""Model for Microsoft Service Principal Team"""
|
||||
|
||||
principalDisplayName: Optional[str]
|
||||
principalId: Optional[str]
|
|
@ -162,7 +162,15 @@ class CredentialLiteLLMParams(BaseModel):
|
|||
watsonx_region_name: Optional[str] = None
|
||||
|
||||
|
||||
class GenericLiteLLMParams(CredentialLiteLLMParams):
|
||||
class CustomPricingLiteLLMParams(BaseModel):
|
||||
## CUSTOM PRICING ##
|
||||
input_cost_per_token: Optional[float] = None
|
||||
output_cost_per_token: Optional[float] = None
|
||||
input_cost_per_second: Optional[float] = None
|
||||
output_cost_per_second: Optional[float] = None
|
||||
|
||||
|
||||
class GenericLiteLLMParams(CredentialLiteLLMParams, CustomPricingLiteLLMParams):
|
||||
"""
|
||||
LiteLLM Params without 'model' arg (used across completion / assistants api)
|
||||
"""
|
||||
|
@ -184,12 +192,6 @@ class GenericLiteLLMParams(CredentialLiteLLMParams):
|
|||
## LOGGING PARAMS ##
|
||||
litellm_trace_id: Optional[str] = None
|
||||
|
||||
## CUSTOM PRICING ##
|
||||
input_cost_per_token: Optional[float] = None
|
||||
output_cost_per_token: Optional[float] = None
|
||||
input_cost_per_second: Optional[float] = None
|
||||
output_cost_per_second: Optional[float] = None
|
||||
|
||||
max_file_size_mb: Optional[float] = None
|
||||
|
||||
# Deployment budgets
|
||||
|
|
|
@ -2,7 +2,7 @@ import json
|
|||
import time
|
||||
import uuid
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
||||
from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Union
|
||||
|
||||
from aiohttp import FormData
|
||||
from openai._models import BaseModel as OpenAIObject
|
||||
|
@ -120,6 +120,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
|
|||
input_cost_per_character: Optional[float] # only for vertex ai models
|
||||
input_cost_per_audio_token: Optional[float]
|
||||
input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
|
||||
input_cost_per_token_above_200k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai gemini-2.5-pro models
|
||||
input_cost_per_character_above_128k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai models
|
||||
|
@ -136,6 +139,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
|
|||
output_cost_per_token_above_128k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai models
|
||||
output_cost_per_token_above_200k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai gemini-2.5-pro models
|
||||
output_cost_per_character_above_128k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai models
|
||||
|
@ -1703,6 +1709,7 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
|
|||
prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata]
|
||||
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall]
|
||||
applied_guardrails: Optional[List[str]]
|
||||
usage_object: Optional[dict]
|
||||
|
||||
|
||||
class StandardLoggingAdditionalHeaders(TypedDict, total=False):
|
||||
|
@ -1723,6 +1730,7 @@ class StandardLoggingHiddenParams(TypedDict):
|
|||
additional_headers: Optional[StandardLoggingAdditionalHeaders]
|
||||
batch_models: Optional[List[str]]
|
||||
litellm_model_name: Optional[str] # the model name sent to the provider by litellm
|
||||
usage_object: Optional[dict]
|
||||
|
||||
|
||||
class StandardLoggingModelInformation(TypedDict):
|
||||
|
@ -2171,3 +2179,20 @@ class CreateCredentialItem(CredentialBase):
|
|||
if not values.get("credential_values") and not values.get("model_id"):
|
||||
raise ValueError("Either credential_values or model_id must be set")
|
||||
return values
|
||||
|
||||
|
||||
class ExtractedFileData(TypedDict):
|
||||
"""
|
||||
TypedDict for storing processed file data
|
||||
|
||||
Attributes:
|
||||
filename: Name of the file if provided
|
||||
content: The file content in bytes
|
||||
content_type: MIME type of the file
|
||||
headers: Any additional headers for the file
|
||||
"""
|
||||
|
||||
filename: Optional[str]
|
||||
content: bytes
|
||||
content_type: Optional[str]
|
||||
headers: Mapping[str, str]
|
||||
|
|
|
@ -2245,7 +2245,8 @@ def supports_embedding_image_input(
|
|||
####### HELPER FUNCTIONS ################
|
||||
def _update_dictionary(existing_dict: Dict, new_dict: dict) -> dict:
|
||||
for k, v in new_dict.items():
|
||||
existing_dict[k] = v
|
||||
if v is not None:
|
||||
existing_dict[k] = v
|
||||
|
||||
return existing_dict
|
||||
|
||||
|
@ -4532,6 +4533,9 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
input_cost_per_token_above_128k_tokens=_model_info.get(
|
||||
"input_cost_per_token_above_128k_tokens", None
|
||||
),
|
||||
input_cost_per_token_above_200k_tokens=_model_info.get(
|
||||
"input_cost_per_token_above_200k_tokens", None
|
||||
),
|
||||
input_cost_per_query=_model_info.get("input_cost_per_query", None),
|
||||
input_cost_per_second=_model_info.get("input_cost_per_second", None),
|
||||
input_cost_per_audio_token=_model_info.get(
|
||||
|
@ -4556,6 +4560,9 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
output_cost_per_character_above_128k_tokens=_model_info.get(
|
||||
"output_cost_per_character_above_128k_tokens", None
|
||||
),
|
||||
output_cost_per_token_above_200k_tokens=_model_info.get(
|
||||
"output_cost_per_token_above_200k_tokens", None
|
||||
),
|
||||
output_cost_per_second=_model_info.get("output_cost_per_second", None),
|
||||
output_cost_per_image=_model_info.get("output_cost_per_image", None),
|
||||
output_vector_size=_model_info.get("output_vector_size", None),
|
||||
|
@ -6519,6 +6526,10 @@ class ProviderConfigManager:
|
|||
)
|
||||
|
||||
return GoogleAIStudioFilesHandler()
|
||||
elif LlmProviders.VERTEX_AI == provider:
|
||||
from litellm.llms.vertex_ai.files.transformation import VertexAIFilesConfig
|
||||
|
||||
return VertexAIFilesConfig()
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
@ -380,6 +380,7 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": false,
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supported_endpoints": ["/v1/responses", "/v1/batch"]
|
||||
},
|
||||
"o1-pro-2025-03-19": {
|
||||
|
@ -401,6 +402,7 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": false,
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supported_endpoints": ["/v1/responses", "/v1/batch"]
|
||||
},
|
||||
"o1": {
|
||||
|
@ -2409,25 +2411,26 @@
|
|||
"max_tokens": 4096,
|
||||
"max_input_tokens": 131072,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
|
||||
"source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112"
|
||||
},
|
||||
"azure_ai/Phi-4-multimodal-instruct": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 131072,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"input_cost_per_token": 0.00000008,
|
||||
"input_cost_per_audio_token": 0.000004,
|
||||
"output_cost_per_token": 0.00032,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_audio_input": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
|
||||
"source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112"
|
||||
},
|
||||
"azure_ai/Phi-4": {
|
||||
"max_tokens": 16384,
|
||||
|
@ -4511,20 +4514,10 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.00001,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -4535,6 +4528,9 @@
|
|||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-pro-exp-02-05": {
|
||||
|
@ -4547,20 +4543,10 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.00001,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -4571,6 +4557,9 @@
|
|||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-flash-exp": {
|
||||
|
@ -4604,6 +4593,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4628,6 +4619,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-flash-thinking-exp": {
|
||||
|
@ -4661,6 +4654,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4695,6 +4690,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": false,
|
||||
"supports_audio_output": false,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4720,6 +4717,7 @@
|
|||
"supports_audio_output": true,
|
||||
"supports_audio_input": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
|
@ -4742,6 +4740,32 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini-2.0-flash-lite-001": {
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 50,
|
||||
"input_cost_per_audio_token": 0.000000075,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4807,6 +4831,7 @@
|
|||
"supports_audio_output": true,
|
||||
"supports_audio_input": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
|
@ -4832,6 +4857,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-001": {
|
||||
|
@ -4857,6 +4884,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
"gemini/gemini-2.5-pro-preview-03-25": {
|
||||
|
@ -4871,9 +4900,9 @@
|
|||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_audio_token": 0.0000007,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.0000010,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000015,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"rpm": 10000,
|
||||
|
@ -4884,6 +4913,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-exp": {
|
||||
|
@ -4919,6 +4950,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4945,6 +4978,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-thinking-exp": {
|
||||
|
@ -4980,6 +5015,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -5016,6 +5053,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.65.4"
|
||||
version = "1.65.6"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -118,7 +118,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.65.4"
|
||||
version = "1.65.6"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -10,7 +10,6 @@ gunicorn==23.0.0 # server dep
|
|||
uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
|
||||
boto3==1.34.34 # aws bedrock/sagemaker calls
|
||||
redis==5.2.1 # redis caching
|
||||
redisvl==0.4.1 # semantic caching
|
||||
prisma==0.11.0 # for db
|
||||
mangum==0.17.0 # for aws lambda functions
|
||||
pynacl==1.5.0 # for encrypting keys
|
||||
|
|
|
@ -423,25 +423,35 @@ mock_vertex_batch_response = {
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_avertex_batch_prediction():
|
||||
with patch(
|
||||
async def test_avertex_batch_prediction(monkeypatch):
|
||||
monkeypatch.setenv("GCS_BUCKET_NAME", "litellm-local")
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
||||
client = AsyncHTTPHandler()
|
||||
|
||||
async def mock_side_effect(*args, **kwargs):
|
||||
print("args", args, "kwargs", kwargs)
|
||||
url = kwargs.get("url", "")
|
||||
if "files" in url:
|
||||
mock_response.json.return_value = mock_file_response
|
||||
elif "batch" in url:
|
||||
mock_response.json.return_value = mock_vertex_batch_response
|
||||
mock_response.status_code = 200
|
||||
return mock_response
|
||||
|
||||
with patch.object(
|
||||
client, "post", side_effect=mock_side_effect
|
||||
) as mock_post, patch(
|
||||
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post"
|
||||
) as mock_post:
|
||||
) as mock_global_post:
|
||||
# Configure mock responses
|
||||
mock_response = MagicMock()
|
||||
mock_response.raise_for_status.return_value = None
|
||||
|
||||
# Set up different responses for different API calls
|
||||
async def mock_side_effect(*args, **kwargs):
|
||||
url = kwargs.get("url", "")
|
||||
if "files" in url:
|
||||
mock_response.json.return_value = mock_file_response
|
||||
elif "batch" in url:
|
||||
mock_response.json.return_value = mock_vertex_batch_response
|
||||
mock_response.status_code = 200
|
||||
return mock_response
|
||||
|
||||
|
||||
mock_post.side_effect = mock_side_effect
|
||||
mock_global_post.side_effect = mock_side_effect
|
||||
|
||||
# load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
|
@ -455,6 +465,7 @@ async def test_avertex_batch_prediction():
|
|||
file=open(file_path, "rb"),
|
||||
purpose="batch",
|
||||
custom_llm_provider="vertex_ai",
|
||||
client=client
|
||||
)
|
||||
print("Response from creating file=", file_obj)
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@ IGNORE_FUNCTIONS = [
|
|||
"_transform_prompt",
|
||||
"mask_dict",
|
||||
"_serialize", # we now set a max depth for this
|
||||
"_sanitize_request_body_for_spend_logs_payload", # testing added for circular reference
|
||||
"_sanitize_value", # testing added for circular reference
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ def get_all_functions_called_in_tests(base_dir):
|
|||
specifically in files containing the word 'router'.
|
||||
"""
|
||||
called_functions = set()
|
||||
test_dirs = ["local_testing", "router_unit_tests"]
|
||||
test_dirs = ["local_testing", "router_unit_tests", "litellm"]
|
||||
|
||||
for test_dir in test_dirs:
|
||||
dir_path = os.path.join(base_dir, test_dir)
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
|
||||
StandardBuiltInToolCostTracking,
|
||||
)
|
||||
from litellm.types.llms.openai import FileSearchTool, WebSearchOptions
|
||||
from litellm.types.utils import ModelInfo, ModelResponse, StandardBuiltInToolsParams
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
|
||||
from litellm.types.utils import Usage
|
||||
|
||||
|
||||
def test_generic_cost_per_token_above_200k_tokens():
|
||||
model = "gemini-2.5-pro-exp-03-25"
|
||||
custom_llm_provider = "vertex_ai"
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
model_cost_map = litellm.model_cost[model]
|
||||
prompt_tokens = 220 * 1e6
|
||||
completion_tokens = 150
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
prompt_cost, completion_cost = generic_cost_per_token(
|
||||
model=model,
|
||||
usage=usage,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
assert round(prompt_cost, 10) == round(
|
||||
model_cost_map["input_cost_per_token_above_200k_tokens"] * usage.prompt_tokens,
|
||||
10,
|
||||
)
|
||||
assert round(completion_cost, 10) == round(
|
||||
model_cost_map["output_cost_per_token_above_200k_tokens"]
|
||||
* usage.completion_tokens,
|
||||
10,
|
||||
)
|
|
@ -33,3 +33,26 @@ def test_response_format_transformation_unit_test():
|
|||
"agent_doing": {"title": "Agent Doing", "type": "string"}
|
||||
}
|
||||
print(result)
|
||||
|
||||
|
||||
def test_calculate_usage():
|
||||
"""
|
||||
Do not include cache_creation_input_tokens in the prompt_tokens
|
||||
|
||||
Fixes https://github.com/BerriAI/litellm/issues/9812
|
||||
"""
|
||||
config = AnthropicConfig()
|
||||
|
||||
usage_object = {
|
||||
"input_tokens": 3,
|
||||
"cache_creation_input_tokens": 12304,
|
||||
"cache_read_input_tokens": 0,
|
||||
"output_tokens": 550,
|
||||
}
|
||||
usage = config.calculate_usage(usage_object=usage_object, reasoning_content=None)
|
||||
assert usage.prompt_tokens == 3
|
||||
assert usage.completion_tokens == 550
|
||||
assert usage.total_tokens == 3 + 550
|
||||
assert usage.prompt_tokens_details.cached_tokens == 0
|
||||
assert usage._cache_creation_input_tokens == 12304
|
||||
assert usage._cache_read_input_tokens == 0
|
||||
|
|
|
@ -30,9 +30,7 @@ def test_transform_usage():
|
|||
openai_usage = config._transform_usage(usage)
|
||||
assert (
|
||||
openai_usage.prompt_tokens
|
||||
== usage["inputTokens"]
|
||||
+ usage["cacheWriteInputTokens"]
|
||||
+ usage["cacheReadInputTokens"]
|
||||
== usage["inputTokens"] + usage["cacheReadInputTokens"]
|
||||
)
|
||||
assert openai_usage.completion_tokens == usage["outputTokens"]
|
||||
assert openai_usage.total_tokens == usage["totalTokens"]
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
@ -5,15 +6,23 @@ from typing import Optional, cast
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi import Request
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
0, os.path.abspath("../../../")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||
from litellm.proxy.management_endpoints.types import CustomOpenID
|
||||
from litellm.proxy.management_endpoints.ui_sso import MicrosoftSSOHandler
|
||||
from litellm.proxy.management_endpoints.ui_sso import (
|
||||
GoogleSSOHandler,
|
||||
MicrosoftSSOHandler,
|
||||
)
|
||||
from litellm.types.proxy.management_endpoints.ui_sso import (
|
||||
MicrosoftGraphAPIUserGroupDirectoryObject,
|
||||
MicrosoftGraphAPIUserGroupResponse,
|
||||
)
|
||||
|
||||
|
||||
def test_microsoft_sso_handler_openid_from_response():
|
||||
|
@ -27,23 +36,14 @@ def test_microsoft_sso_handler_openid_from_response():
|
|||
"surname": "User",
|
||||
"some_other_field": "value",
|
||||
}
|
||||
|
||||
# Create a mock JWTHandler that returns predetermined team IDs
|
||||
mock_jwt_handler = MagicMock(spec=JWTHandler)
|
||||
expected_team_ids = ["team1", "team2"]
|
||||
mock_jwt_handler.get_team_ids_from_jwt.return_value = expected_team_ids
|
||||
|
||||
# Act
|
||||
# Call the method being tested
|
||||
result = MicrosoftSSOHandler.openid_from_response(
|
||||
response=mock_response, jwt_handler=mock_jwt_handler
|
||||
response=mock_response, team_ids=expected_team_ids
|
||||
)
|
||||
|
||||
# Assert
|
||||
# Verify the JWT handler was called with the correct parameters
|
||||
mock_jwt_handler.get_team_ids_from_jwt.assert_called_once_with(
|
||||
cast(dict, mock_response)
|
||||
)
|
||||
|
||||
# Check that the result is a CustomOpenID object with the expected values
|
||||
assert isinstance(result, CustomOpenID)
|
||||
|
@ -59,13 +59,9 @@ def test_microsoft_sso_handler_openid_from_response():
|
|||
def test_microsoft_sso_handler_with_empty_response():
|
||||
# Arrange
|
||||
# Test with None response
|
||||
mock_jwt_handler = MagicMock(spec=JWTHandler)
|
||||
mock_jwt_handler.get_team_ids_from_jwt.return_value = []
|
||||
|
||||
# Act
|
||||
result = MicrosoftSSOHandler.openid_from_response(
|
||||
response=None, jwt_handler=mock_jwt_handler
|
||||
)
|
||||
result = MicrosoftSSOHandler.openid_from_response(response=None, team_ids=[])
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, CustomOpenID)
|
||||
|
@ -77,5 +73,309 @@ def test_microsoft_sso_handler_with_empty_response():
|
|||
assert result.last_name is None
|
||||
assert result.team_ids == []
|
||||
|
||||
# Make sure the JWT handler was called with an empty dict
|
||||
mock_jwt_handler.get_team_ids_from_jwt.assert_called_once_with({})
|
||||
|
||||
def test_get_microsoft_callback_response():
|
||||
# Arrange
|
||||
mock_request = MagicMock(spec=Request)
|
||||
mock_response = {
|
||||
"mail": "microsoft_user@example.com",
|
||||
"displayName": "Microsoft User",
|
||||
"id": "msft123",
|
||||
"givenName": "Microsoft",
|
||||
"surname": "User",
|
||||
}
|
||||
|
||||
future = asyncio.Future()
|
||||
future.set_result(mock_response)
|
||||
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{"MICROSOFT_CLIENT_SECRET": "mock_secret", "MICROSOFT_TENANT": "mock_tenant"},
|
||||
):
|
||||
with patch(
|
||||
"fastapi_sso.sso.microsoft.MicrosoftSSO.verify_and_process",
|
||||
return_value=future,
|
||||
):
|
||||
# Act
|
||||
result = asyncio.run(
|
||||
MicrosoftSSOHandler.get_microsoft_callback_response(
|
||||
request=mock_request,
|
||||
microsoft_client_id="mock_client_id",
|
||||
redirect_url="http://mock_redirect_url",
|
||||
)
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, CustomOpenID)
|
||||
assert result.email == "microsoft_user@example.com"
|
||||
assert result.display_name == "Microsoft User"
|
||||
assert result.provider == "microsoft"
|
||||
assert result.id == "msft123"
|
||||
assert result.first_name == "Microsoft"
|
||||
assert result.last_name == "User"
|
||||
|
||||
|
||||
def test_get_microsoft_callback_response_raw_sso_response():
|
||||
# Arrange
|
||||
mock_request = MagicMock(spec=Request)
|
||||
mock_response = {
|
||||
"mail": "microsoft_user@example.com",
|
||||
"displayName": "Microsoft User",
|
||||
"id": "msft123",
|
||||
"givenName": "Microsoft",
|
||||
"surname": "User",
|
||||
}
|
||||
|
||||
future = asyncio.Future()
|
||||
future.set_result(mock_response)
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{"MICROSOFT_CLIENT_SECRET": "mock_secret", "MICROSOFT_TENANT": "mock_tenant"},
|
||||
):
|
||||
with patch(
|
||||
"fastapi_sso.sso.microsoft.MicrosoftSSO.verify_and_process",
|
||||
return_value=future,
|
||||
):
|
||||
# Act
|
||||
result = asyncio.run(
|
||||
MicrosoftSSOHandler.get_microsoft_callback_response(
|
||||
request=mock_request,
|
||||
microsoft_client_id="mock_client_id",
|
||||
redirect_url="http://mock_redirect_url",
|
||||
return_raw_sso_response=True,
|
||||
)
|
||||
)
|
||||
|
||||
# Assert
|
||||
print("result from verify_and_process", result)
|
||||
assert isinstance(result, dict)
|
||||
assert result["mail"] == "microsoft_user@example.com"
|
||||
assert result["displayName"] == "Microsoft User"
|
||||
assert result["id"] == "msft123"
|
||||
assert result["givenName"] == "Microsoft"
|
||||
assert result["surname"] == "User"
|
||||
|
||||
|
||||
def test_get_google_callback_response():
|
||||
# Arrange
|
||||
mock_request = MagicMock(spec=Request)
|
||||
mock_response = {
|
||||
"email": "google_user@example.com",
|
||||
"name": "Google User",
|
||||
"sub": "google123",
|
||||
"given_name": "Google",
|
||||
"family_name": "User",
|
||||
}
|
||||
|
||||
future = asyncio.Future()
|
||||
future.set_result(mock_response)
|
||||
|
||||
with patch.dict(os.environ, {"GOOGLE_CLIENT_SECRET": "mock_secret"}):
|
||||
with patch(
|
||||
"fastapi_sso.sso.google.GoogleSSO.verify_and_process", return_value=future
|
||||
):
|
||||
# Act
|
||||
result = asyncio.run(
|
||||
GoogleSSOHandler.get_google_callback_response(
|
||||
request=mock_request,
|
||||
google_client_id="mock_client_id",
|
||||
redirect_url="http://mock_redirect_url",
|
||||
)
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, dict)
|
||||
assert result.get("email") == "google_user@example.com"
|
||||
assert result.get("name") == "Google User"
|
||||
assert result.get("sub") == "google123"
|
||||
assert result.get("given_name") == "Google"
|
||||
assert result.get("family_name") == "User"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_user_groups_from_graph_api():
|
||||
# Arrange
|
||||
mock_response = {
|
||||
"@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
|
||||
"value": [
|
||||
{
|
||||
"@odata.type": "#microsoft.graph.group",
|
||||
"id": "group1",
|
||||
"displayName": "Group 1",
|
||||
},
|
||||
{
|
||||
"@odata.type": "#microsoft.graph.group",
|
||||
"id": "group2",
|
||||
"displayName": "Group 2",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
async def mock_get(*args, **kwargs):
|
||||
mock = MagicMock()
|
||||
mock.json.return_value = mock_response
|
||||
return mock
|
||||
|
||||
with patch(
|
||||
"litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
|
||||
) as mock_client:
|
||||
mock_client.return_value = MagicMock()
|
||||
mock_client.return_value.get = mock_get
|
||||
|
||||
# Act
|
||||
result = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
|
||||
access_token="mock_token"
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 2
|
||||
assert "group1" in result
|
||||
assert "group2" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_user_groups_pagination():
|
||||
# Arrange
|
||||
first_response = {
|
||||
"@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
|
||||
"@odata.nextLink": "https://graph.microsoft.com/v1.0/me/memberOf?$skiptoken=page2",
|
||||
"value": [
|
||||
{
|
||||
"@odata.type": "#microsoft.graph.group",
|
||||
"id": "group1",
|
||||
"displayName": "Group 1",
|
||||
},
|
||||
],
|
||||
}
|
||||
second_response = {
|
||||
"@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
|
||||
"value": [
|
||||
{
|
||||
"@odata.type": "#microsoft.graph.group",
|
||||
"id": "group2",
|
||||
"displayName": "Group 2",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
responses = [first_response, second_response]
|
||||
current_response = {"index": 0}
|
||||
|
||||
async def mock_get(*args, **kwargs):
|
||||
mock = MagicMock()
|
||||
mock.json.return_value = responses[current_response["index"]]
|
||||
current_response["index"] += 1
|
||||
return mock
|
||||
|
||||
with patch(
|
||||
"litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
|
||||
) as mock_client:
|
||||
mock_client.return_value = MagicMock()
|
||||
mock_client.return_value.get = mock_get
|
||||
|
||||
# Act
|
||||
result = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
|
||||
access_token="mock_token"
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 2
|
||||
assert "group1" in result
|
||||
assert "group2" in result
|
||||
assert current_response["index"] == 2 # Verify both pages were fetched
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_user_groups_empty_response():
|
||||
# Arrange
|
||||
mock_response = {
|
||||
"@odata.context": "https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
|
||||
"value": [],
|
||||
}
|
||||
|
||||
async def mock_get(*args, **kwargs):
|
||||
mock = MagicMock()
|
||||
mock.json.return_value = mock_response
|
||||
return mock
|
||||
|
||||
with patch(
|
||||
"litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
|
||||
) as mock_client:
|
||||
mock_client.return_value = MagicMock()
|
||||
mock_client.return_value.get = mock_get
|
||||
|
||||
# Act
|
||||
result = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
|
||||
access_token="mock_token"
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_user_groups_error_handling():
|
||||
# Arrange
|
||||
async def mock_get(*args, **kwargs):
|
||||
raise Exception("API Error")
|
||||
|
||||
with patch(
|
||||
"litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
|
||||
) as mock_client:
|
||||
mock_client.return_value = MagicMock()
|
||||
mock_client.return_value.get = mock_get
|
||||
|
||||
# Act
|
||||
result = await MicrosoftSSOHandler.get_user_groups_from_graph_api(
|
||||
access_token="mock_token"
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 0
|
||||
|
||||
|
||||
def test_get_group_ids_from_graph_api_response():
|
||||
# Arrange
|
||||
mock_response = MicrosoftGraphAPIUserGroupResponse(
|
||||
odata_context="https://graph.microsoft.com/v1.0/$metadata#directoryObjects",
|
||||
odata_nextLink=None,
|
||||
value=[
|
||||
MicrosoftGraphAPIUserGroupDirectoryObject(
|
||||
odata_type="#microsoft.graph.group",
|
||||
id="group1",
|
||||
displayName="Group 1",
|
||||
description=None,
|
||||
deletedDateTime=None,
|
||||
roleTemplateId=None,
|
||||
),
|
||||
MicrosoftGraphAPIUserGroupDirectoryObject(
|
||||
odata_type="#microsoft.graph.group",
|
||||
id="group2",
|
||||
displayName="Group 2",
|
||||
description=None,
|
||||
deletedDateTime=None,
|
||||
roleTemplateId=None,
|
||||
),
|
||||
MicrosoftGraphAPIUserGroupDirectoryObject(
|
||||
odata_type="#microsoft.graph.group",
|
||||
id=None, # Test handling of None id
|
||||
displayName="Invalid Group",
|
||||
description=None,
|
||||
deletedDateTime=None,
|
||||
roleTemplateId=None,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# Act
|
||||
result = MicrosoftSSOHandler._get_group_ids_from_graph_api_response(mock_response)
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 2
|
||||
assert "group1" in result
|
||||
assert "group2" in result
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from io import BytesIO
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from fastapi import Request, UploadFile
|
||||
from fastapi.testclient import TestClient
|
||||
from starlette.datastructures import Headers
|
||||
from starlette.datastructures import UploadFile as StarletteUploadFile
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
|
||||
HttpPassThroughEndpointHelpers,
|
||||
)
|
||||
|
||||
|
||||
# Test is_multipart
|
||||
def test_is_multipart():
|
||||
# Test with multipart content type
|
||||
request = MagicMock(spec=Request)
|
||||
request.headers = Headers({"content-type": "multipart/form-data; boundary=123"})
|
||||
assert HttpPassThroughEndpointHelpers.is_multipart(request) is True
|
||||
|
||||
# Test with non-multipart content type
|
||||
request.headers = Headers({"content-type": "application/json"})
|
||||
assert HttpPassThroughEndpointHelpers.is_multipart(request) is False
|
||||
|
||||
# Test with no content type
|
||||
request.headers = Headers({})
|
||||
assert HttpPassThroughEndpointHelpers.is_multipart(request) is False
|
||||
|
||||
|
||||
# Test _build_request_files_from_upload_file
|
||||
@pytest.mark.asyncio
|
||||
async def test_build_request_files_from_upload_file():
|
||||
# Test with FastAPI UploadFile
|
||||
file_content = b"test content"
|
||||
file = BytesIO(file_content)
|
||||
# Create SpooledTemporaryFile with content type headers
|
||||
headers = {"content-type": "text/plain"}
|
||||
upload_file = UploadFile(file=file, filename="test.txt", headers=headers)
|
||||
upload_file.read = AsyncMock(return_value=file_content)
|
||||
|
||||
result = await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file(
|
||||
upload_file
|
||||
)
|
||||
assert result == ("test.txt", file_content, "text/plain")
|
||||
|
||||
# Test with Starlette UploadFile
|
||||
file2 = BytesIO(file_content)
|
||||
starlette_file = StarletteUploadFile(
|
||||
file=file2,
|
||||
filename="test2.txt",
|
||||
headers=Headers({"content-type": "text/plain"}),
|
||||
)
|
||||
starlette_file.read = AsyncMock(return_value=file_content)
|
||||
|
||||
result = await HttpPassThroughEndpointHelpers._build_request_files_from_upload_file(
|
||||
starlette_file
|
||||
)
|
||||
assert result == ("test2.txt", file_content, "text/plain")
|
||||
|
||||
|
||||
# Test make_multipart_http_request
|
||||
@pytest.mark.asyncio
|
||||
async def test_make_multipart_http_request():
|
||||
# Mock request with file and form field
|
||||
request = MagicMock(spec=Request)
|
||||
request.method = "POST"
|
||||
|
||||
# Mock form data
|
||||
file_content = b"test file content"
|
||||
file = BytesIO(file_content)
|
||||
# Create SpooledTemporaryFile with content type headers
|
||||
headers = {"content-type": "text/plain"}
|
||||
upload_file = UploadFile(file=file, filename="test.txt", headers=headers)
|
||||
upload_file.read = AsyncMock(return_value=file_content)
|
||||
|
||||
form_data = {"file": upload_file, "text_field": "test value"}
|
||||
request.form = AsyncMock(return_value=form_data)
|
||||
|
||||
# Mock httpx client
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.headers = {}
|
||||
|
||||
async_client = MagicMock()
|
||||
async_client.request = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Test the function
|
||||
response = await HttpPassThroughEndpointHelpers.make_multipart_http_request(
|
||||
request=request,
|
||||
async_client=async_client,
|
||||
url=httpx.URL("http://test.com"),
|
||||
headers={},
|
||||
requested_query_params=None,
|
||||
)
|
||||
|
||||
# Verify the response
|
||||
assert response == mock_response
|
||||
|
||||
# Verify the client call
|
||||
async_client.request.assert_called_once()
|
||||
call_args = async_client.request.call_args[1]
|
||||
|
||||
assert call_args["method"] == "POST"
|
||||
assert str(call_args["url"]) == "http://test.com"
|
||||
assert isinstance(call_args["files"], dict)
|
||||
assert isinstance(call_args["data"], dict)
|
||||
assert call_args["data"]["text_field"] == "test value"
|
|
@ -457,7 +457,7 @@ class TestSpendLogsPayload:
|
|||
"model": "gpt-4o",
|
||||
"user": "",
|
||||
"team_id": "",
|
||||
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
|
||||
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
|
||||
"cache_key": "Cache OFF",
|
||||
"spend": 0.00022500000000000002,
|
||||
"total_tokens": 30,
|
||||
|
@ -555,7 +555,7 @@ class TestSpendLogsPayload:
|
|||
"model": "claude-3-7-sonnet-20250219",
|
||||
"user": "",
|
||||
"team_id": "",
|
||||
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
|
||||
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
|
||||
"cache_key": "Cache OFF",
|
||||
"spend": 0.01383,
|
||||
"total_tokens": 2598,
|
||||
|
@ -651,7 +651,7 @@ class TestSpendLogsPayload:
|
|||
"model": "claude-3-7-sonnet-20250219",
|
||||
"user": "",
|
||||
"team_id": "",
|
||||
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
|
||||
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
|
||||
"cache_key": "Cache OFF",
|
||||
"spend": 0.01383,
|
||||
"total_tokens": 2598,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue