use chunk_processort

This commit is contained in:
Ishaan Jaff 2024-09-02 15:51:52 -07:00
parent fbeb6941f1
commit ef6b90a657
2 changed files with 23 additions and 7 deletions

View file

@ -22,6 +22,9 @@ import litellm
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
ModelResponseIterator,
)
from litellm.proxy._types import ( from litellm.proxy._types import (
ConfigFieldInfo, ConfigFieldInfo,
ConfigFieldUpdate, ConfigFieldUpdate,
@ -32,6 +35,7 @@ from litellm.proxy._types import (
) )
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from .streaming_handler import ModelIteratorType, chunk_processor
from .success_handler import PassThroughEndpointLogging from .success_handler import PassThroughEndpointLogging
router = APIRouter() router = APIRouter()
@ -416,9 +420,13 @@ async def pass_through_request(
status_code=e.response.status_code, detail=await e.response.aread() status_code=e.response.status_code, detail=await e.response.aread()
) )
# Create an async generator to yield the response content
async def stream_response() -> AsyncIterable[bytes]: async def stream_response() -> AsyncIterable[bytes]:
async for chunk in response.aiter_bytes(): async for chunk in chunk_processor(
response.aiter_bytes(),
litellm_logging_obj=logging_obj,
iterator_type=ModelIteratorType.VERTEX_AI,
start_time=start_time,
):
yield chunk yield chunk
return StreamingResponse( return StreamingResponse(
@ -454,10 +462,13 @@ async def pass_through_request(
status_code=e.response.status_code, detail=await e.response.aread() status_code=e.response.status_code, detail=await e.response.aread()
) )
# streaming response
# Create an async generator to yield the response content
async def stream_response() -> AsyncIterable[bytes]: async def stream_response() -> AsyncIterable[bytes]:
async for chunk in response.aiter_bytes(): async for chunk in chunk_processor(
response.aiter_bytes(),
litellm_logging_obj=logging_obj,
iterator_type=ModelIteratorType.VERTEX_AI,
start_time=start_time,
):
yield chunk yield chunk
return StreamingResponse( return StreamingResponse(

View file

@ -10,7 +10,12 @@ vertexai.init(
api_transport="rest", api_transport="rest",
) )
model = GenerativeModel(model_name="gemini-1.0-pro") model = GenerativeModel(model_name="gemini-1.5-flash-001")
response = model.generate_content("hi") response = model.generate_content(
"hi tell me a joke and a very long story", stream=True
)
print("response", response) print("response", response)
for chunk in response:
print(chunk)