(feat) Add usage tracking for streaming /anthropic passthrough routes (#6842)

* use 1 file for AnthropicPassthroughLoggingHandler

* add support for anthropic streaming usage tracking

* ci/cd run again

* fix - add real streaming for anthropic pass through

* remove unused function stream_response

* working anthropic streaming logging

* fix code quality

* fix use 1 file for vertex success handler

* use helper for _handle_logging_vertex_collected_chunks

* enforce vertex streaming to use sse for streaming

* test test_basic_vertex_ai_pass_through_streaming_with_spendlog

* fix type hints

* add comment

* fix linting

* add pass through logging unit testing
This commit is contained in:
Ishaan Jaff 2024-11-21 19:36:03 -08:00 committed by GitHub
parent aa6b133557
commit 5fd5bb615c
12 changed files with 688 additions and 295 deletions

View file

@ -779,3 +779,32 @@ class ModelResponseIterator:
raise StopAsyncIteration
except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> GenericStreamingChunk:
"""
Convert a string chunk to a GenericStreamingChunk
Note: This is used for Anthropic pass through streaming logging
We can move __anext__, and __next__ to use this function since it's common logic.
Did not migrate them to minmize changes made in 1 PR.
"""
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
if str_line.startswith("data:"):
data_json = json.loads(str_line[5:])
return self.chunk_parser(chunk=data_json)
else:
return GenericStreamingChunk(
text="",
is_finished=False,
finish_reason="",
usage=None,
index=0,
tool_use=None,
)