(proxy - RPS) - Get 2K RPS at 4 instances, minor fix aiohttp_openai/ (#7659)

* speed up transform_response

* use 2 workers

* undo changes to uvicorn

* ci/cd run again
This commit is contained in:
Ishaan Jaff 2025-01-09 17:24:18 -08:00 committed by GitHub
parent fd46482916
commit a85de46ef7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 11 additions and 3 deletions

View file

@ -13,7 +13,7 @@ from aiohttp import ClientResponse
from litellm.llms.openai_like.chat.transformation import OpenAILikeChatConfig from litellm.llms.openai_like.chat.transformation import OpenAILikeChatConfig
from litellm.types.llms.openai import AllMessageValues from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ModelResponse from litellm.types.utils import Choices, ModelResponse
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
@ -66,4 +66,12 @@ class AiohttpOpenAIChatConfig(OpenAILikeChatConfig):
json_mode: Optional[bool] = None, json_mode: Optional[bool] = None,
) -> ModelResponse: ) -> ModelResponse:
_json_response = await raw_response.json() _json_response = await raw_response.json()
return ModelResponse(**_json_response) model_response.id = _json_response.get("id")
model_response.choices = [
Choices(**choice) for choice in _json_response.get("choices")
]
model_response.created = _json_response.get("created")
model_response.model = _json_response.get("model")
model_response.object = _json_response.get("object")
model_response.system_fingerprint = _json_response.get("system_fingerprint")
return model_response

View file

@ -6,7 +6,7 @@ import pytest
sys.path.insert( sys.path.insert(
0, os.path.abspath("../../") 0, os.path.abspath("../../")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system-path
import litellm import litellm