mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
docs(gemini.md): add gemini thinking to docs
This commit is contained in:
parent
19c7fe675f
commit
e1ef20d4b9
2 changed files with 154 additions and 2 deletions
|
@ -39,14 +39,164 @@ response = completion(
|
||||||
- temperature
|
- temperature
|
||||||
- top_p
|
- top_p
|
||||||
- max_tokens
|
- max_tokens
|
||||||
|
- max_completion_tokens
|
||||||
- stream
|
- stream
|
||||||
- tools
|
- tools
|
||||||
- tool_choice
|
- tool_choice
|
||||||
|
- functions
|
||||||
- response_format
|
- response_format
|
||||||
- n
|
- n
|
||||||
- stop
|
- stop
|
||||||
|
- logprobs
|
||||||
|
- frequency_penalty
|
||||||
|
- modalities
|
||||||
|
- reasoning_content
|
||||||
|
|
||||||
|
**Anthropic Params**
|
||||||
|
- thinking (used to set max budget tokens across anthropic/gemini models)
|
||||||
|
|
||||||
|
[**See Updated List**](https://github.com/BerriAI/litellm/blob/main/litellm/llms/gemini/chat/transformation.py#L70)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Usage - Thinking / `reasoning_content`
|
||||||
|
|
||||||
|
LiteLLM translates OpenAI's `reasoning_effort` to Gemini's `thinking` parameter. [Code](https://github.com/BerriAI/litellm/blob/620664921902d7a9bfb29897a7b27c1a7ef4ddfb/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py#L362)
|
||||||
|
|
||||||
|
**Mapping**
|
||||||
|
|
||||||
|
| reasoning_effort | thinking |
|
||||||
|
| ---------------- | -------- |
|
||||||
|
| "low" | "budget_tokens": 1024 |
|
||||||
|
| "medium" | "budget_tokens": 2048 |
|
||||||
|
| "high" | "budget_tokens": 4096 |
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="gemini/gemini-2.5-flash-preview-04-17",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
reasoning_effort="low",
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: gemini-2.5-flash
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.5-flash-preview-04-17
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini-2.5-flash",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"reasoning_effort": "low"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```python
|
||||||
|
ModelResponse(
|
||||||
|
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
|
||||||
|
created=1740470510,
|
||||||
|
model='claude-3-7-sonnet-20250219',
|
||||||
|
object='chat.completion',
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="The capital of France is Paris.",
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=68,
|
||||||
|
prompt_tokens=42,
|
||||||
|
total_tokens=110,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None,
|
||||||
|
cached_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
image_tokens=None
|
||||||
|
),
|
||||||
|
cache_creation_input_tokens=0,
|
||||||
|
cache_read_input_tokens=0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pass `thinking` to Gemini models
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Gemini models.
|
||||||
|
|
||||||
|
This is translated to Gemini's [`thinkingConfig` parameter](https://ai.google.dev/gemini-api/docs/thinking#set-budget).
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gemini/gemini-2.5-flash-preview-04-17",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini/gemini-2.5-flash-preview-04-17",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[**See Updated List**](https://github.com/BerriAI/litellm/blob/1c747f3ad372399c5b95cc5696b06a5fbe53186b/litellm/llms/vertex_httpx.py#L122)
|
|
||||||
|
|
||||||
## Passing Gemini Specific Params
|
## Passing Gemini Specific Params
|
||||||
### Response schema
|
### Response schema
|
||||||
|
|
|
@ -16,6 +16,8 @@ Supported Providers:
|
||||||
- Vertex AI (Anthropic) (`vertexai/`)
|
- Vertex AI (Anthropic) (`vertexai/`)
|
||||||
- OpenRouter (`openrouter/`)
|
- OpenRouter (`openrouter/`)
|
||||||
- XAI (`xai/`)
|
- XAI (`xai/`)
|
||||||
|
- Google AI Studio (`google/`)
|
||||||
|
- Vertex AI (`vertex_ai/`)
|
||||||
|
|
||||||
LiteLLM will standardize the `reasoning_content` in the response and `thinking_blocks` in the assistant message.
|
LiteLLM will standardize the `reasoning_content` in the response and `thinking_blocks` in the assistant message.
|
||||||
|
|
||||||
|
@ -23,7 +25,7 @@ LiteLLM will standardize the `reasoning_content` in the response and `thinking_b
|
||||||
"message": {
|
"message": {
|
||||||
...
|
...
|
||||||
"reasoning_content": "The capital of France is Paris.",
|
"reasoning_content": "The capital of France is Paris.",
|
||||||
"thinking_blocks": [
|
"thinking_blocks": [ # only returned for Anthropic models
|
||||||
{
|
{
|
||||||
"type": "thinking",
|
"type": "thinking",
|
||||||
"thinking": "The capital of France is Paris.",
|
"thinking": "The capital of France is Paris.",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue