From fed9c89cc71b527f6f54b76aeffa0a896d8c697f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 12 Sep 2024 13:22:59 -0700 Subject: [PATCH 1/8] add OpenAI o1 config --- litellm/llms/OpenAI/o1_reasoning.py | 72 ++++++++++++++++ ...odel_prices_and_context_window_backup.json | 84 +++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 litellm/llms/OpenAI/o1_reasoning.py diff --git a/litellm/llms/OpenAI/o1_reasoning.py b/litellm/llms/OpenAI/o1_reasoning.py new file mode 100644 index 000000000..c2c48456f --- /dev/null +++ b/litellm/llms/OpenAI/o1_reasoning.py @@ -0,0 +1,72 @@ +""" +Support for o1 model family + +https://platform.openai.com/docs/guides/reasoning + +Translations handled by LiteLLM: +- modalities: image => drop param (if user opts in to dropping param) +- role: system ==> translate to role assistant +- streaming => faked by LiteLLM +- Tools, response_format => drop param (if user opts in to dropping param) +- Logprobs => drop param (if user opts in to dropping param) +""" + +import types +from typing import Optional, Union + +import litellm + +from .openai import OpenAIConfig + + +class OpenAIO1Config(OpenAIConfig): + """ + Reference: https://platform.openai.com/docs/guides/reasoning + + """ + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params(self, model: str) -> list: + """ + Get the supported OpenAI params for the given model + + """ + + all_openai_params = litellm.OpenAIConfig.get_supported_openai_params( + model="gpt-4o" + ) + non_supported_params = [ + "logprobs", + "tools", + "tool_choice", + "parallel_tool_calls", + "function_call", + "functions", + ] + + return [ + param for param in all_openai_params if param not in non_supported_params + ] + + def map_openai_params(self, non_default_params: dict, optional_params: dict): + for param, value in non_default_params.items(): + if param == "max_tokens": + optional_params["max_completion_tokens"] = value + return optional_params diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 37ac23966..6b075e111 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -11,6 +11,42 @@ "supports_parallel_function_calling": true, "supports_vision": true }, + "o1-preview-2024-09-12": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "litellm_provider": "openai", + "mode": "chat" + }, + "o1-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "litellm_provider": "openai", + "mode": "chat" + }, + "o1-mini": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "litellm_provider": "openai", + "mode": "chat" + }, + "o1-mini-2024-09-12": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "litellm_provider": "openai", + "mode": "chat" + }, "gpt-4": { "max_tokens": 4096, "max_input_tokens": 8192, @@ -57,6 +93,54 @@ "supports_parallel_function_calling": true, "supports_vision": true }, + "o1-mini": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "o1-mini-2024-09-12": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "o1-preview": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "o1-preview-2024-09-12": { + "max_tokens": 16384, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, "chatgpt-4o-latest": { "max_tokens": 4096, "max_input_tokens": 128000, From f5e9e9fc9ae4d72ec6d799ddd42476ab99df7352 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 12 Sep 2024 13:40:15 -0700 Subject: [PATCH 2/8] add o1 reasoning tests --- litellm/llms/OpenAI/o1_reasoning.py | 2 +- litellm/tests/test_openai_o1.py | 56 +++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 litellm/tests/test_openai_o1.py diff --git a/litellm/llms/OpenAI/o1_reasoning.py b/litellm/llms/OpenAI/o1_reasoning.py index c2c48456f..03038b7ce 100644 --- a/litellm/llms/OpenAI/o1_reasoning.py +++ b/litellm/llms/OpenAI/o1_reasoning.py @@ -5,7 +5,7 @@ https://platform.openai.com/docs/guides/reasoning Translations handled by LiteLLM: - modalities: image => drop param (if user opts in to dropping param) -- role: system ==> translate to role assistant +- role: system ==> translate to role 'user' - streaming => faked by LiteLLM - Tools, response_format => drop param (if user opts in to dropping param) - Logprobs => drop param (if user opts in to dropping param) diff --git a/litellm/tests/test_openai_o1.py b/litellm/tests/test_openai_o1.py new file mode 100644 index 000000000..f08c71ca9 --- /dev/null +++ b/litellm/tests/test_openai_o1.py @@ -0,0 +1,56 @@ +import json +from datetime import datetime +from unittest.mock import AsyncMock + +import httpx +import pytest +from respx import MockRouter + +import litellm +from litellm import Choices, Message, ModelResponse + + +@pytest.mark.asyncio +@pytest.mark.respx +async def test_o1_handle_system_role(respx_mock: MockRouter): + """ + Tests that: + - max_tokens is translated to 'max_completion_tokens' + - role 'system' is translated to 'user' + """ + litellm.set_verbose = True + + mock_response = ModelResponse( + id="cmpl-mock", + choices=[Choices(message=Message(content="Mocked response", role="assistant"))], + created=int(datetime.now().timestamp()), + model="o1-preview", + ) + + mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock( + return_value=httpx.Response(200, json=mock_response.dict()) + ) + + response = await litellm.acompletion( + model="o1-preview", + max_tokens=10, + messages=[{"role": "system", "content": "Hello!"}], + ) + + assert mock_request.called + request_body = json.loads(mock_request.calls[0].request.content) + + print("request_body: ", request_body) + + assert request_body == { + "model": "o1-preview", + "max_completion_tokens": 10, + "messages": [{"role": "user", "content": "Hello!"}], + } + + print(f"response: {response}") + assert isinstance(response, ModelResponse) + assert response.choices[0].message.content == "Mocked response" + + +# ... existing code ... From a5a0773b19bfc65bb7342e87c8ac48565a1c8645 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 12 Sep 2024 14:09:13 -0700 Subject: [PATCH 3/8] fix handle o1 not supporting system message --- litellm/__init__.py | 3 +++ litellm/llms/OpenAI/o1_reasoning.py | 35 +++++++++++++++++++++++++++-- litellm/llms/OpenAI/openai.py | 15 +++++++++++++ litellm/tests/test_openai_o1.py | 3 --- 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index 95c276edf..6afec1079 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -944,6 +944,9 @@ from .llms.OpenAI.openai import ( GroqConfig, AzureAIStudioConfig, ) +from .llms.OpenAI.o1_reasoning import ( + OpenAIO1Config, +) from .llms.nvidia_nim import NvidiaNimConfig from .llms.cerebras.chat import CerebrasConfig from .llms.AI21.chat import AI21ChatConfig diff --git a/litellm/llms/OpenAI/o1_reasoning.py b/litellm/llms/OpenAI/o1_reasoning.py index 03038b7ce..dcfe2d06c 100644 --- a/litellm/llms/OpenAI/o1_reasoning.py +++ b/litellm/llms/OpenAI/o1_reasoning.py @@ -12,7 +12,7 @@ Translations handled by LiteLLM: """ import types -from typing import Optional, Union +from typing import Any, List, Optional, Union import litellm @@ -49,7 +49,7 @@ class OpenAIO1Config(OpenAIConfig): """ - all_openai_params = litellm.OpenAIConfig.get_supported_openai_params( + all_openai_params = litellm.OpenAIConfig().get_supported_openai_params( model="gpt-4o" ) non_supported_params = [ @@ -70,3 +70,34 @@ class OpenAIO1Config(OpenAIConfig): if param == "max_tokens": optional_params["max_completion_tokens"] = value return optional_params + + def is_model_o1_reasoning_model(self, model: str) -> bool: + if "o1" in model: + return True + return False + + def o1_prompt_factory(self, messages: List[Any]): + """ + Handles limitations of O-1 model family. + - modalities: image => drop param (if user opts in to dropping param) + - role: system ==> translate to role 'user' + """ + + for message in messages: + if message["role"] == "system": + message["role"] = "user" + + if isinstance(message["content"], list): + new_content = [] + for content_item in message["content"]: + if content_item.get("type") == "image_url": + if litellm.drop_params is not True: + raise ValueError( + "Image content is not supported for O-1 models. Set litellm.drop_param to True to drop image content." + ) + # If drop_param is True, we simply don't add the image content to new_content + else: + new_content.append(content_item) + message["content"] = new_content + + return messages diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py index ed4d199f6..d90c04b62 100644 --- a/litellm/llms/OpenAI/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -550,6 +550,8 @@ class OpenAIConfig: ] # works across all models model_specific_params = [] + if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model): + return litellm.OpenAIO1Config().get_supported_openai_params(model=model) if ( model != "gpt-3.5-turbo-16k" and model != "gpt-4" ): # gpt-4 does not support 'response_format' @@ -566,6 +568,12 @@ class OpenAIConfig: def map_openai_params( self, non_default_params: dict, optional_params: dict, model: str ) -> dict: + """ """ + if litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model): + return litellm.OpenAIO1Config().map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + ) supported_openai_params = self.get_supported_openai_params(model) for param, value in non_default_params.items(): if param in supported_openai_params: @@ -861,6 +869,13 @@ class OpenAIChatCompletion(BaseLLM): messages=messages, custom_llm_provider=custom_llm_provider, ) + if ( + litellm.OpenAIO1Config().is_model_o1_reasoning_model(model=model) + and messages is not None + ): + messages = litellm.OpenAIO1Config().o1_prompt_factory( + messages=messages, + ) for _ in range( 2 diff --git a/litellm/tests/test_openai_o1.py b/litellm/tests/test_openai_o1.py index f08c71ca9..7c450d7e7 100644 --- a/litellm/tests/test_openai_o1.py +++ b/litellm/tests/test_openai_o1.py @@ -51,6 +51,3 @@ async def test_o1_handle_system_role(respx_mock: MockRouter): print(f"response: {response}") assert isinstance(response, ModelResponse) assert response.choices[0].message.content == "Mocked response" - - -# ... existing code ... From 14dc7b3b54291f32e80fd53b11a1725a6f28f2d3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 12 Sep 2024 14:15:18 -0700 Subject: [PATCH 4/8] fix linting --- litellm/llms/OpenAI/o1_reasoning.py | 4 +++- litellm/llms/OpenAI/openai.py | 1 + litellm/tests/test_openai_o1.py | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/litellm/llms/OpenAI/o1_reasoning.py b/litellm/llms/OpenAI/o1_reasoning.py index dcfe2d06c..6d15319bb 100644 --- a/litellm/llms/OpenAI/o1_reasoning.py +++ b/litellm/llms/OpenAI/o1_reasoning.py @@ -65,7 +65,9 @@ class OpenAIO1Config(OpenAIConfig): param for param in all_openai_params if param not in non_supported_params ] - def map_openai_params(self, non_default_params: dict, optional_params: dict): + def map_openai_params( + self, non_default_params: dict, optional_params: dict, model: str + ): for param, value in non_default_params.items(): if param == "max_tokens": optional_params["max_completion_tokens"] = value diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py index d90c04b62..89f397032 100644 --- a/litellm/llms/OpenAI/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -573,6 +573,7 @@ class OpenAIConfig: return litellm.OpenAIO1Config().map_openai_params( non_default_params=non_default_params, optional_params=optional_params, + model=model, ) supported_openai_params = self.get_supported_openai_params(model) for param, value in non_default_params.items(): diff --git a/litellm/tests/test_openai_o1.py b/litellm/tests/test_openai_o1.py index 7c450d7e7..39dadc96e 100644 --- a/litellm/tests/test_openai_o1.py +++ b/litellm/tests/test_openai_o1.py @@ -50,4 +50,3 @@ async def test_o1_handle_system_role(respx_mock: MockRouter): print(f"response: {response}") assert isinstance(response, ModelResponse) - assert response.choices[0].message.content == "Mocked response" From ded40e4d4197eb4c76e6716d998762f2ca31861d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 12 Sep 2024 14:18:15 -0700 Subject: [PATCH 5/8] bump openai to 1.45.0 --- .circleci/config.yml | 8 ++++---- poetry.lock | 8 ++++---- pyproject.toml | 2 +- requirements.txt | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 53a3e541f..e301bd96e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -49,7 +49,7 @@ jobs: pip install opentelemetry-api==1.25.0 pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0 - pip install openai==1.40.0 + pip install openai==1.45.0 pip install prisma==0.11.0 pip install "detect_secrets==1.5.0" pip install "httpx==0.24.1" @@ -313,7 +313,7 @@ jobs: pip install "aiodynamo==23.10.1" pip install "asyncio==3.4.3" pip install "PyGithub==1.59.1" - pip install "openai==1.40.0" + pip install "openai==1.45.0" # Run pytest and generate JUnit XML report - run: name: Build Docker image @@ -406,7 +406,7 @@ jobs: pip install "pytest-retry==1.6.3" pip install "pytest-asyncio==0.21.1" pip install aiohttp - pip install "openai==1.40.0" + pip install "openai==1.45.0" python -m pip install --upgrade pip pip install "pydantic==2.7.1" pip install "pytest==7.3.1" @@ -513,7 +513,7 @@ jobs: pip install "pytest-asyncio==0.21.1" pip install "google-cloud-aiplatform==1.43.0" pip install aiohttp - pip install "openai==1.40.0" + pip install "openai==1.45.0" python -m pip install --upgrade pip pip install "pydantic==2.7.1" pip install "pytest==7.3.1" diff --git a/poetry.lock b/poetry.lock index 7b734cfb3..1380e652b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1761,13 +1761,13 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "openai" -version = "1.40.1" +version = "1.45.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.40.1-py3-none-any.whl", hash = "sha256:cf5929076c6ca31c26f1ed207e9fd19eb05404cc9104f64c9d29bb0ac0c5bcd4"}, - {file = "openai-1.40.1.tar.gz", hash = "sha256:cb1294ac1f8c6a1acbb07e090698eb5ad74a7a88484e77126612a4f22579673d"}, + {file = "openai-1.45.0-py3-none-any.whl", hash = "sha256:2f1f7b7cf90f038a9f1c24f0d26c0f1790c102ec5acd07ffd70a9b7feac1ff4e"}, + {file = "openai-1.45.0.tar.gz", hash = "sha256:731207d10637335413aa3c0955f8f8df30d7636a4a0f9c381f2209d32cf8de97"}, ] [package.dependencies] @@ -3484,4 +3484,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi- [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0, !=3.9.7" -content-hash = "ad04b75d2f51072f1ee86bf000a236914b30b02184dcc8b3475c14cd300219f0" +content-hash = "6795344f245df1fac99329e370f6a997bbf5010e6841c723dc5e73cf22c3885d" diff --git a/pyproject.toml b/pyproject.toml index a4b96c166..19a6eda8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ documentation = "https://docs.litellm.ai" [tool.poetry.dependencies] python = ">=3.8.1,<4.0, !=3.9.7" -openai = ">=1.40.0" +openai = ">=1.45.0" python-dotenv = ">=0.2.0" tiktoken = ">=0.7.0" importlib-metadata = ">=6.8.0" diff --git a/requirements.txt b/requirements.txt index ded2d040c..23b225236 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # LITELLM PROXY DEPENDENCIES # anyio==4.4.0 # openai + http req. -openai==1.40.0 # openai req. +openai==1.45.0 # openai req. fastapi==0.111.0 # server dep backoff==2.2.1 # server dep pyyaml==6.0.0 # server dep From 0f24f339f3cfa8ecfed5f8b1410a3aff057bde6f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 12 Sep 2024 14:34:32 -0700 Subject: [PATCH 6/8] fix handle user message --- litellm/llms/OpenAI/o1_reasoning.py | 10 +++++++--- litellm/types/utils.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/litellm/llms/OpenAI/o1_reasoning.py b/litellm/llms/OpenAI/o1_reasoning.py index 6d15319bb..bcab17660 100644 --- a/litellm/llms/OpenAI/o1_reasoning.py +++ b/litellm/llms/OpenAI/o1_reasoning.py @@ -15,6 +15,7 @@ import types from typing import Any, List, Optional, Union import litellm +from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage from .openai import OpenAIConfig @@ -78,16 +79,19 @@ class OpenAIO1Config(OpenAIConfig): return True return False - def o1_prompt_factory(self, messages: List[Any]): + def o1_prompt_factory(self, messages: List[AllMessageValues]): """ Handles limitations of O-1 model family. - modalities: image => drop param (if user opts in to dropping param) - role: system ==> translate to role 'user' """ - for message in messages: + for i, message in enumerate(messages): if message["role"] == "system": - message["role"] = "user" + new_message = ChatCompletionUserMessage( + content=message["content"], role="user" + ) + messages[i] = new_message # Replace the old message with the new one if isinstance(message["content"], list): new_content = [] diff --git a/litellm/types/utils.py b/litellm/types/utils.py index c2a708663..6ed3954f8 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -6,7 +6,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union from openai._models import BaseModel as OpenAIObject from openai.types.audio.transcription_create_params import FileTypes -from openai.types.completion_usage import CompletionUsage +from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage from pydantic import ConfigDict, Field, PrivateAttr from typing_extensions import Callable, Dict, Required, TypedDict, override From 46ce4995b86b162b93515cc5d4e2c8db9afad065 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 12 Sep 2024 14:49:43 -0700 Subject: [PATCH 7/8] fix type errors --- litellm/types/utils.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 6ed3954f8..b5f1ba080 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -473,6 +473,7 @@ class Usage(CompletionUsage): prompt_tokens: Optional[int] = None, completion_tokens: Optional[int] = None, total_tokens: Optional[int] = None, + reasoning_tokens: Optional[int] = None, **params, ): ## DEEPSEEK PROMPT TOKEN HANDLING ## - follow the anthropic format, of having prompt tokens be just the non-cached token input. Enables accurate cost-tracking - Relevant issue: https://github.com/BerriAI/litellm/issues/5285 @@ -482,12 +483,16 @@ class Usage(CompletionUsage): and prompt_tokens is not None ): prompt_tokens = params["prompt_cache_miss_tokens"] - data = { - "prompt_tokens": prompt_tokens or 0, - "completion_tokens": completion_tokens or 0, - "total_tokens": total_tokens or 0, - } - super().__init__(**data) + + completion_tokens_details = CompletionTokensDetails( + reasoning_tokens=reasoning_tokens + ) + super().__init__( + prompt_tokens=prompt_tokens or 0, + completion_tokens=completion_tokens or 0, + total_tokens=total_tokens or 0, + completion_tokens_details=completion_tokens_details or None, + ) ## ANTHROPIC MAPPING ## if "cache_creation_input_tokens" in params and isinstance( From bb38e9cbf82516b64c5918081a7d48e0ccb8e796 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 12 Sep 2024 15:24:04 -0700 Subject: [PATCH 8/8] fix gcs logging --- litellm/types/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/litellm/types/utils.py b/litellm/types/utils.py index b5f1ba080..1283a379b 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -484,9 +484,12 @@ class Usage(CompletionUsage): ): prompt_tokens = params["prompt_cache_miss_tokens"] - completion_tokens_details = CompletionTokensDetails( - reasoning_tokens=reasoning_tokens - ) + # handle reasoning_tokens + completion_tokens_details = None + if reasoning_tokens: + completion_tokens_details = CompletionTokensDetails( + reasoning_tokens=reasoning_tokens + ) super().__init__( prompt_tokens=prompt_tokens or 0, completion_tokens=completion_tokens or 0,