diff --git a/docs/docs/concepts/vector_stores_configuration.mdx b/docs/docs/concepts/vector_stores_configuration.mdx new file mode 100644 index 000000000..5b616f1dc --- /dev/null +++ b/docs/docs/concepts/vector_stores_configuration.mdx @@ -0,0 +1,261 @@ +# Vector Stores Configuration + +## Overview + +Llama Stack provides a variety of configuration options for vector stores through the `VectorStoresConfig`. This configuration allows you to customize file processing, chunk retrieval, search behavior, and performance parameters to optimize File Search and your RAG (Retrieval Augmented Generation) applications. + +The configuration affects all vector store providers and operations across the entire stack, particularly the OpenAI-compatible vector store APIs. + +## Configuration Structure + +Vector store configuration is organized into logical subconfigs that group related settings. For example, the yaml below provides an example configuration for the Faiss provider. + +```yaml +vector_stores: + default_provider_id: "faiss" + default_embedding_model: + provider_id: "sentence-transformers" + model_id: "all-MiniLM-L6-v2" + + # Query rewriting for enhanced search + rewrite_query_params: + model: + provider_id: "ollama" + model_id: "llama3.2:3b-instruct-fp16" + prompt: "Rewrite this search query to improve retrieval results by expanding it with relevant synonyms and related terms: {query}" + max_tokens: 100 + temperature: 0.3 + + # File processing during file ingestion + file_ingestion_params: + default_chunk_size_tokens: 512 + default_chunk_overlap_tokens: 128 + + # Chunk retrieval and ranking during search + chunk_retrieval_params: + chunk_multiplier: 5 + max_tokens_in_context: 4000 + default_reranker_strategy: "rrf" + rrf_impact_factor: 60.0 + weighted_search_alpha: 0.5 + + # Batch processing performance settings + file_batch_params: + max_concurrent_files_per_batch: 3 + file_batch_chunk_size: 10 + cleanup_interval_seconds: 86400 + + # Tool output and prompt formatting + file_search_params: + header_template: "## Knowledge Search Results\n\nI found {num_chunks} relevant chunks:\n\n" + footer_template: "\n---\n\nEnd of search results." + + context_prompt_params: + chunk_annotation_template: "**Source {index}:**\n{chunk.content}\n\n" + context_template: "Use the above information to answer: {query}" + + annotation_prompt_params: + enable_annotations: true + annotation_instruction_template: "Cite sources using [Source X] format." + chunk_annotation_template: "[Source {index}] {chunk_text} (File: {file_id})" +``` + +## Configuration Sections + +### File Ingestion Parameters + +The `file_ingestion_params` configuration controls how files are processed during ingestion into vector stores when using `client.vector_stores.files.create()`: + +#### `file_ingestion_params` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `default_chunk_size_tokens` | `int` | `512` | Default token count for file/document chunks when not explicitly specified | +| `default_chunk_overlap_tokens` | `int` | `128` | Number of tokens to overlap between chunks (original default: 512 // 4) | + +```yaml +file_ingestion_params: + default_chunk_size_tokens: 512 # Smaller chunks for precision + default_chunk_overlap_tokens: 128 # Fixed token overlap for context continuity +``` + +**Use Cases:** +- **Smaller chunks (256-512)**: Better for precise factual retrieval +- **Larger chunks (800-1200)**: Better for context-heavy applications +- **Higher overlap (200-300 tokens)**: Reduces context loss at chunk boundaries +- **Lower overlap (50-100 tokens)**: More efficient storage, faster processing + +### Chunk Retrieval Parameters + +The `chunk_retrieval_params` controls search behavior and ranking strategies when using `client.vector_stores.search()`: + +#### `chunk_retrieval_params` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `chunk_multiplier` | `int` | `5` | Over-retrieval factor for OpenAI API compatibility (affects all providers) | +| `max_tokens_in_context` | `int` | `4000` | Maximum tokens allowed in RAG context before truncation | +| `default_reranker_strategy` | `str` | `"rrf"` | Default ranking strategy: `"rrf"`, `"weighted"`, or `"normalized"` | +| `rrf_impact_factor` | `float` | `60.0` | Impact factor for Reciprocal Rank Fusion (RRF) reranking | +| `weighted_search_alpha` | `float` | `0.5` | Alpha weight for weighted search reranking (0.0-1.0) | + +```yaml +chunk_retrieval_params: + chunk_multiplier: 5 # Retrieve 5x chunks for reranking + max_tokens_in_context: 4000 # Context window limit + default_reranker_strategy: "rrf" # Use RRF for hybrid search + rrf_impact_factor: 60.0 # RRF ranking parameter + weighted_search_alpha: 0.5 # 50/50 vector/keyword weight +``` + +**Ranking Strategies:** + +- **RRF (Reciprocal Rank Fusion)**: Combines vector and keyword rankings with configurable impact factor +- **Weighted**: Linear combination with adjustable alpha (0=keyword only, 1=vector only) +- **Normalized**: Normalizes scores before combination + +### File Batch Parameters + +The `file_batch_params` controls performance and concurrency for batch file processing when using `client.vector_stores.file_batches.*`: + +#### `file_batch_params` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `max_concurrent_files_per_batch` | `int` | `3` | Maximum files processed concurrently in file batches | +| `file_batch_chunk_size` | `int` | `10` | Number of files to process in each batch chunk | +| `cleanup_interval_seconds` | `int` | `86400` | Interval for cleaning up expired file batches (24 hours) | + +```yaml +file_batch_params: + max_concurrent_files_per_batch: 3 # Process 3 files simultaneously + file_batch_chunk_size: 10 # Handle 10 files per chunk + cleanup_interval_seconds: 86400 # Clean up daily +``` + +**Performance Tuning:** +- **Higher concurrency**: Faster processing, more memory usage +- **Lower concurrency**: Slower processing, less resource usage +- **Larger chunk size**: Fewer iterations, more memory per iteration +- **Smaller chunk size**: More iterations, better memory distribution + +## Advanced Configuration + +### Default Provider and Model Settings + +Set system-wide defaults for vector operations: + +```yaml +vector_stores: + default_provider_id: "faiss" # Default vector store provider + default_embedding_model: # Default embedding model + provider_id: "sentence-transformers" + model_id: "all-MiniLM-L6-v2" +``` + +### Query Rewriting Configuration + +Enable intelligent query expansion for better search results: + +#### `rewrite_query_params` + +| Parameter | Type | Description | +|-----------|------|-------------| +| `model` | `QualifiedModel` | LLM model for query rewriting/expansion | +| `prompt` | `str` | Prompt template (must contain `{query}` placeholder) | +| `max_tokens` | `int` | Maximum tokens for expansion (1-4096) | +| `temperature` | `float` | Generation temperature (0.0-2.0) | + +```yaml +rewrite_query_params: + model: + provider_id: "meta-reference" + model_id: "llama3.2" + prompt: | + Expand this search query with related terms and synonyms for better vector search. + Keep the expansion focused and relevant. + + Original query: {query} + + Expanded query: + max_tokens: 100 + temperature: 0.3 +``` + +**Note**: Query rewriting is optional. Omit this section to disable query expansion. + +### Output Formatting Configuration + +Customize how search results are formatted for RAG applications: + +#### `file_search_params` + +```yaml +file_search_params: + header_template: | + ## Knowledge Search Results + + I found {num_chunks} relevant chunks from your knowledge base: + + footer_template: | + + --- + + End of search results. Use this information to provide a comprehensive answer. +``` + +#### `context_prompt_params` + +```yaml +context_prompt_params: + chunk_annotation_template: | + **Source {index}:** + {chunk.content} + + *Metadata: {metadata}* + + context_template: | + Based on the search results above, please answer this question: {query} + + Provide specific details from the sources and cite them appropriately. +``` + +#### `annotation_prompt_params` + +```yaml +annotation_prompt_params: + enable_annotations: true + annotation_instruction_template: | + When citing information, use the format [Source X] where X is the source number. + Always cite specific sources for factual claims. + chunk_annotation_template: | + [Source {index}] {chunk_text} + + Source: {file_id} +``` + +## Provider-Specific Considerations + +### OpenAI-Compatible API + +All configuration options affect the OpenAI-compatible vector store API: + +- `chunk_multiplier` affects over-retrieval in search operations +- `file_ingestion_params` control chunking during file attachment +- `file_batch_params` control batch processing performance + +### RAG Tools + +The RAG tool runtime respects these configurations: + +- Uses `default_chunk_size_tokens` for file insertion +- Applies `max_tokens_in_context` for context window management +- Uses formatting templates for tool output + +### All Vector Store Providers + +These settings apply across all vector store providers: + +- **Inline providers**: FAISS, SQLite-vec, Milvus +- **Remote providers**: ChromaDB, Qdrant, Weaviate, PGVector +- **Hybrid providers**: Milvus (supports both inline and remote) diff --git a/docs/docs/providers/tool_runtime/inline_rag-runtime.mdx b/docs/docs/providers/tool_runtime/inline_rag-runtime.mdx index 26c65b1a2..1095a46a3 100644 --- a/docs/docs/providers/tool_runtime/inline_rag-runtime.mdx +++ b/docs/docs/providers/tool_runtime/inline_rag-runtime.mdx @@ -14,7 +14,7 @@ RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunki | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `vector_stores_config` | `VectorStoresConfig` | No | `default_provider_id=None default_embedding_model=None rewrite_query_params=None file_search_params=FileSearchParams(header_template='knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n', footer_template='END of knowledge_search tool results.\n') context_prompt_params=ContextPromptParams(chunk_annotation_template='Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n', context_template='The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{annotation_instruction}\n') annotation_prompt_params=AnnotationPromptParams(enable_annotations=True, annotation_instruction_template=" Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones.", chunk_annotation_template='[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n')` | Configuration for vector store prompt templates and behavior | +| `vector_stores_config` | `VectorStoresConfig` | No | `default_provider_id=None default_embedding_model=None rewrite_query_params=None file_search_params=FileSearchParams(header_template='knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n', footer_template='END of knowledge_search tool results.\n') context_prompt_params=ContextPromptParams(chunk_annotation_template='Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n', context_template='The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n') annotation_prompt_params=AnnotationPromptParams(enable_annotations=True, annotation_instruction_template="Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones.", chunk_annotation_template='[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n') file_ingestion_params=FileIngestionParams(default_chunk_size_tokens=512, default_chunk_overlap_tokens=128) chunk_retrieval_params=ChunkRetrievalParams(chunk_multiplier=5, max_tokens_in_context=4000, default_reranker_strategy='rrf', rrf_impact_factor=60.0, weighted_search_alpha=0.5) file_batch_params=FileBatchParams(max_concurrent_files_per_batch=3, file_batch_chunk_size=10, cleanup_interval_seconds=86400)` | Configuration for vector store prompt templates and behavior | ## Sample Configuration diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 7b4ac5ac8..1977db54d 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -41,6 +41,15 @@ const sidebars: SidebarsConfig = { 'concepts/apis/api_leveling', ], }, + { + type: 'category', + label: 'Vector Stores', + collapsed: true, + items: [ + 'concepts/file_operations_vector_stores', + 'concepts/vector_stores_configuration', + ], + }, 'concepts/distributions', 'concepts/resources', ], diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 08d43fce9..402e0e430 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -18,15 +18,6 @@ from llama_stack.core.storage.datatypes import ( StorageConfig, ) from llama_stack.log import LoggingConfig -from llama_stack.providers.utils.memory.constants import ( - DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE, - DEFAULT_CHUNK_ANNOTATION_TEMPLATE, - DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE, - DEFAULT_CONTEXT_TEMPLATE, - DEFAULT_FILE_SEARCH_FOOTER_TEMPLATE, - DEFAULT_FILE_SEARCH_HEADER_TEMPLATE, - DEFAULT_QUERY_REWRITE_PROMPT, -) from llama_stack_api import ( Api, Benchmark, @@ -367,7 +358,7 @@ class RewriteQueryParams(BaseModel): description="LLM model for query rewriting/expansion in vector search.", ) prompt: str = Field( - default=DEFAULT_QUERY_REWRITE_PROMPT, + default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:", description="Prompt template for query rewriting. Use {query} as placeholder for the original query.", ) max_tokens: int = Field( @@ -407,11 +398,11 @@ class FileSearchParams(BaseModel): """Configuration for file search tool output formatting.""" header_template: str = Field( - default=DEFAULT_FILE_SEARCH_HEADER_TEMPLATE, + default="knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n", description="Template for the header text shown before search results. Available placeholders: {num_chunks} number of chunks found.", ) footer_template: str = Field( - default=DEFAULT_FILE_SEARCH_FOOTER_TEMPLATE, + default="END of knowledge_search tool results.\n", description="Template for the footer text shown after search results.", ) @@ -433,11 +424,11 @@ class ContextPromptParams(BaseModel): """Configuration for LLM prompt content and chunk formatting.""" chunk_annotation_template: str = Field( - default=DEFAULT_CHUNK_ANNOTATION_TEMPLATE, + default="Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", description="Template for formatting individual chunks in search results. Available placeholders: {index} 1-based chunk index, {chunk.content} chunk content, {metadata} chunk metadata dict.", ) context_template: str = Field( - default=DEFAULT_CONTEXT_TEMPLATE, + default='The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n', description="Template for explaining the search results to the model. Available placeholders: {query} user's query, {num_chunks} number of chunks.", ) @@ -470,11 +461,11 @@ class AnnotationPromptParams(BaseModel): description="Whether to include annotation information in results.", ) annotation_instruction_template: str = Field( - default=DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE, + default="Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones.", description="Instructions for how the model should cite sources. Used when enable_annotations is True.", ) chunk_annotation_template: str = Field( - default=DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE, + default="[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n", description="Template for chunks with annotation information. Available placeholders: {index} 1-based chunk index, {metadata_text} formatted metadata, {file_id} document identifier, {chunk_text} chunk content.", ) @@ -499,6 +490,61 @@ class AnnotationPromptParams(BaseModel): return v +class FileIngestionParams(BaseModel): + """Configuration for file processing during ingestion.""" + + default_chunk_size_tokens: int = Field( + default=512, + description="Default chunk size for RAG tool operations when not specified", + ) + default_chunk_overlap_tokens: int = Field( + default=128, + description="Default overlap in tokens between chunks (original default: 512 // 4 = 128)", + ) + + +class ChunkRetrievalParams(BaseModel): + """Configuration for chunk retrieval and ranking during search.""" + + chunk_multiplier: int = Field( + default=5, + description="Multiplier for OpenAI API over-retrieval (affects all providers)", + ) + max_tokens_in_context: int = Field( + default=4000, + description="Maximum tokens allowed in RAG context before truncation", + ) + default_reranker_strategy: str = Field( + default="rrf", + description="Default reranker when not specified: 'rrf', 'weighted', or 'normalized'", + ) + rrf_impact_factor: float = Field( + default=60.0, + description="Impact factor for RRF (Reciprocal Rank Fusion) reranking", + ) + weighted_search_alpha: float = Field( + default=0.5, + description="Alpha weight for weighted search reranking (0.0-1.0)", + ) + + +class FileBatchParams(BaseModel): + """Configuration for file batch processing.""" + + max_concurrent_files_per_batch: int = Field( + default=3, + description="Maximum files processed concurrently in file batches", + ) + file_batch_chunk_size: int = Field( + default=10, + description="Number of files to process in each batch chunk", + ) + cleanup_interval_seconds: int = Field( + default=86400, # 24 hours + description="Interval for cleaning up expired file batches (seconds)", + ) + + class VectorStoresConfig(BaseModel): """Configuration for vector stores in the stack.""" @@ -527,6 +573,19 @@ class VectorStoresConfig(BaseModel): description="Configuration for source annotation and attribution features.", ) + file_ingestion_params: FileIngestionParams = Field( + default_factory=FileIngestionParams, + description="Configuration for file processing during ingestion.", + ) + chunk_retrieval_params: ChunkRetrievalParams = Field( + default_factory=ChunkRetrievalParams, + description="Configuration for chunk retrieval and ranking during search.", + ) + file_batch_params: FileBatchParams = Field( + default_factory=FileBatchParams, + description="Configuration for file batch processing.", + ) + class SafetyConfig(BaseModel): """Configuration for default moderations model.""" diff --git a/src/llama_stack/core/utils/config.py b/src/llama_stack/core/utils/config.py index 2f45d6d99..18ffc2898 100644 --- a/src/llama_stack/core/utils/config.py +++ b/src/llama_stack/core/utils/config.py @@ -11,6 +11,9 @@ def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]: """Redact sensitive information from config before printing.""" sensitive_patterns = ["api_key", "api_token", "password", "secret", "token"] + # Specific configuration field names that should NOT be redacted despite containing "token" + safe_token_fields = ["chunk_size_tokens", "max_tokens", "default_chunk_overlap_tokens"] + def _redact_value(v: Any) -> Any: if isinstance(v, dict): return _redact_dict(v) @@ -21,7 +24,10 @@ def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]: def _redact_dict(d: dict[str, Any]) -> dict[str, Any]: result = {} for k, v in d.items(): - if any(pattern in k.lower() for pattern in sensitive_patterns): + # Don't redact if it's a safe field + if any(safe_field in k.lower() for safe_field in safe_token_fields): + result[k] = _redact_value(v) + elif any(pattern in k.lower() for pattern in sensitive_patterns): result[k] = "********" else: result[k] = _redact_value(v) diff --git a/src/llama_stack/distributions/ci-tests/config.yaml b/src/llama_stack/distributions/ci-tests/config.yaml index 00bf40916..e4113b4a8 100644 --- a/src/llama_stack/distributions/ci-tests/config.yaml +++ b/src/llama_stack/distributions/ci-tests/config.yaml @@ -296,19 +296,32 @@ vector_stores: ' context_template: 'The above results were retrieved to help answer the user''s query: "{query}". Use them as supporting information only in answering this - query.{annotation_instruction} + query. {annotation_instruction} ' annotation_prompt_params: enable_annotations: true - annotation_instruction_template: ' Cite sources immediately at the end of sentences - before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''. + annotation_instruction_template: Cite sources immediately at the end of sentences + before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent - new ones.' + new ones. chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|> {chunk_text} ' + file_ingestion_params: + default_chunk_size_tokens: 512 + default_chunk_overlap_tokens: 128 + chunk_retrieval_params: + chunk_multiplier: 5 + max_tokens_in_context: 4000 + default_reranker_strategy: rrf + rrf_impact_factor: 60.0 + weighted_search_alpha: 0.5 + file_batch_params: + max_concurrent_files_per_batch: 3 + file_batch_chunk_size: 10 + cleanup_interval_seconds: 86400 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index e3db814ea..536ed2857 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -305,19 +305,32 @@ vector_stores: ' context_template: 'The above results were retrieved to help answer the user''s query: "{query}". Use them as supporting information only in answering this - query.{annotation_instruction} + query. {annotation_instruction} ' annotation_prompt_params: enable_annotations: true - annotation_instruction_template: ' Cite sources immediately at the end of sentences - before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''. + annotation_instruction_template: Cite sources immediately at the end of sentences + before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent - new ones.' + new ones. chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|> {chunk_text} ' + file_ingestion_params: + default_chunk_size_tokens: 512 + default_chunk_overlap_tokens: 128 + chunk_retrieval_params: + chunk_multiplier: 5 + max_tokens_in_context: 4000 + default_reranker_strategy: rrf + rrf_impact_factor: 60.0 + weighted_search_alpha: 0.5 + file_batch_params: + max_concurrent_files_per_batch: 3 + file_batch_chunk_size: 10 + cleanup_interval_seconds: 86400 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/config.yaml b/src/llama_stack/distributions/starter-gpu/config.yaml index 2e90e032e..53e0865fc 100644 --- a/src/llama_stack/distributions/starter-gpu/config.yaml +++ b/src/llama_stack/distributions/starter-gpu/config.yaml @@ -299,19 +299,32 @@ vector_stores: ' context_template: 'The above results were retrieved to help answer the user''s query: "{query}". Use them as supporting information only in answering this - query.{annotation_instruction} + query. {annotation_instruction} ' annotation_prompt_params: enable_annotations: true - annotation_instruction_template: ' Cite sources immediately at the end of sentences - before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''. + annotation_instruction_template: Cite sources immediately at the end of sentences + before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent - new ones.' + new ones. chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|> {chunk_text} ' + file_ingestion_params: + default_chunk_size_tokens: 512 + default_chunk_overlap_tokens: 128 + chunk_retrieval_params: + chunk_multiplier: 5 + max_tokens_in_context: 4000 + default_reranker_strategy: rrf + rrf_impact_factor: 60.0 + weighted_search_alpha: 0.5 + file_batch_params: + max_concurrent_files_per_batch: 3 + file_batch_chunk_size: 10 + cleanup_interval_seconds: 86400 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index a245317b5..7a65d8633 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -308,19 +308,32 @@ vector_stores: ' context_template: 'The above results were retrieved to help answer the user''s query: "{query}". Use them as supporting information only in answering this - query.{annotation_instruction} + query. {annotation_instruction} ' annotation_prompt_params: enable_annotations: true - annotation_instruction_template: ' Cite sources immediately at the end of sentences - before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''. + annotation_instruction_template: Cite sources immediately at the end of sentences + before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent - new ones.' + new ones. chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|> {chunk_text} ' + file_ingestion_params: + default_chunk_size_tokens: 512 + default_chunk_overlap_tokens: 128 + chunk_retrieval_params: + chunk_multiplier: 5 + max_tokens_in_context: 4000 + default_reranker_strategy: rrf + rrf_impact_factor: 60.0 + weighted_search_alpha: 0.5 + file_batch_params: + max_concurrent_files_per_batch: 3 + file_batch_chunk_size: 10 + cleanup_interval_seconds: 86400 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/config.yaml b/src/llama_stack/distributions/starter/config.yaml index 59e1acc6a..c3156711f 100644 --- a/src/llama_stack/distributions/starter/config.yaml +++ b/src/llama_stack/distributions/starter/config.yaml @@ -296,19 +296,32 @@ vector_stores: ' context_template: 'The above results were retrieved to help answer the user''s query: "{query}". Use them as supporting information only in answering this - query.{annotation_instruction} + query. {annotation_instruction} ' annotation_prompt_params: enable_annotations: true - annotation_instruction_template: ' Cite sources immediately at the end of sentences - before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''. + annotation_instruction_template: Cite sources immediately at the end of sentences + before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent - new ones.' + new ones. chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|> {chunk_text} ' + file_ingestion_params: + default_chunk_size_tokens: 512 + default_chunk_overlap_tokens: 128 + chunk_retrieval_params: + chunk_multiplier: 5 + max_tokens_in_context: 4000 + default_reranker_strategy: rrf + rrf_impact_factor: 60.0 + weighted_search_alpha: 0.5 + file_batch_params: + max_concurrent_files_per_batch: 3 + file_batch_chunk_size: 10 + cleanup_interval_seconds: 86400 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index fa99647af..fbbe09608 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -305,19 +305,32 @@ vector_stores: ' context_template: 'The above results were retrieved to help answer the user''s query: "{query}". Use them as supporting information only in answering this - query.{annotation_instruction} + query. {annotation_instruction} ' annotation_prompt_params: enable_annotations: true - annotation_instruction_template: ' Cite sources immediately at the end of sentences - before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''. + annotation_instruction_template: Cite sources immediately at the end of sentences + before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent - new ones.' + new ones. chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|> {chunk_text} ' + file_ingestion_params: + default_chunk_size_tokens: 512 + default_chunk_overlap_tokens: 128 + chunk_retrieval_params: + chunk_multiplier: 5 + max_tokens_in_context: 4000 + default_reranker_strategy: rrf + rrf_impact_factor: 60.0 + weighted_search_alpha: 0.5 + file_batch_params: + max_concurrent_files_per_batch: 3 + file_batch_chunk_size: 10 + cleanup_interval_seconds: 86400 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py index 1be7c2d7b..57dd76619 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py @@ -11,11 +11,8 @@ from typing import Any from opentelemetry import trace +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger -from llama_stack.providers.utils.memory.constants import ( - DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE, - DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE, -) from llama_stack_api import ( ImageContentItem, OpenAIChatCompletionContentPartImageParam, @@ -175,8 +172,10 @@ class ToolExecutor: self.vector_stores_config.annotation_prompt_params.annotation_instruction_template ) else: - chunk_annotation_template = DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE - annotation_instruction_template = DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE + # Use defaults from VectorStoresConfig when annotations disabled + default_config = VectorStoresConfig() + chunk_annotation_template = default_config.annotation_prompt_params.chunk_annotation_template + annotation_instruction_template = default_config.annotation_prompt_params.annotation_instruction_template content_items = [] content_items.append(TextContentItem(text=header_template.format(num_chunks=len(search_results)))) diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py index c3f9882d0..55a70f25c 100644 --- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -116,8 +116,10 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime): self, documents: list[RAGDocument], vector_store_id: str, - chunk_size_in_tokens: int = 512, + chunk_size_in_tokens: int | None = None, ) -> None: + if chunk_size_in_tokens is None: + chunk_size_in_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_size_tokens if not documents: return @@ -145,10 +147,11 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime): log.error(f"Failed to upload file for document {doc.document_id}: {e}") continue + overlap_tokens = self.config.vector_stores_config.file_ingestion_params.default_chunk_overlap_tokens chunking_strategy = VectorStoreChunkingStrategyStatic( static=VectorStoreChunkingStrategyStaticConfig( max_chunk_size_tokens=chunk_size_in_tokens, - chunk_overlap_tokens=chunk_size_in_tokens // 4, + chunk_overlap_tokens=overlap_tokens, ) ) @@ -180,7 +183,9 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime): "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID." ) - query_config = query_config or RAGQueryConfig() + query_config = query_config or RAGQueryConfig( + max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context + ) query = await generate_rag_query( query_config.query_generator_config, content, @@ -319,7 +324,9 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime): if query_config: query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config) else: - query_config = RAGQueryConfig() + query_config = RAGQueryConfig( + max_tokens_in_context=self.config.vector_stores_config.chunk_retrieval_params.max_tokens_in_context + ) query = kwargs["query"] result = await self.query( diff --git a/src/llama_stack/providers/utils/memory/__init__.py b/src/llama_stack/providers/utils/memory/__init__.py index 05a832b6f..e45d35507 100644 --- a/src/llama_stack/providers/utils/memory/__init__.py +++ b/src/llama_stack/providers/utils/memory/__init__.py @@ -4,6 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .constants import DEFAULT_QUERY_REWRITE_PROMPT - -__all__ = ["DEFAULT_QUERY_REWRITE_PROMPT"] +__all__ = [] diff --git a/src/llama_stack/providers/utils/memory/constants.py b/src/llama_stack/providers/utils/memory/constants.py deleted file mode 100644 index b5a318bd8..000000000 --- a/src/llama_stack/providers/utils/memory/constants.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# Default prompt template for query rewriting in vector search -DEFAULT_QUERY_REWRITE_PROMPT = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:" - -# Default templates for file search tool output formatting -DEFAULT_FILE_SEARCH_HEADER_TEMPLATE = ( - "knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n" -) -DEFAULT_FILE_SEARCH_FOOTER_TEMPLATE = "END of knowledge_search tool results.\n" - -# Default templates for LLM prompt content and chunk formatting -DEFAULT_CHUNK_ANNOTATION_TEMPLATE = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n" -DEFAULT_CONTEXT_TEMPLATE = 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{annotation_instruction}\n' - -# Default templates for source annotation and attribution features -DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE = " Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones." -DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE = "[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n" diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 2065a34e8..1b86dfe0f 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -15,6 +15,7 @@ from typing import Annotated, Any from fastapi import Body from pydantic import TypeAdapter +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.id_generation import generate_object_id from llama_stack.log import get_logger from llama_stack.providers.utils.memory.vector_store import ( @@ -59,10 +60,6 @@ EMBEDDING_DIMENSION = 768 logger = get_logger(name=__name__, category="providers::utils") # Constants for OpenAI vector stores -CHUNK_MULTIPLIER = 5 -FILE_BATCH_CLEANUP_INTERVAL_SECONDS = 24 * 60 * 60 # 1 day in seconds -MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within a batch -FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size VERSION = "v3" VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::" @@ -85,11 +82,13 @@ class OpenAIVectorStoreMixin(ABC): self, files_api: Files | None = None, kvstore: KVStore | None = None, + vector_stores_config: VectorStoresConfig | None = None, ): self.openai_vector_stores: dict[str, dict[str, Any]] = {} self.openai_file_batches: dict[str, dict[str, Any]] = {} self.files_api = files_api self.kvstore = kvstore + self.vector_stores_config = vector_stores_config or VectorStoresConfig() self._last_file_batch_cleanup_time = 0 self._file_batch_tasks: dict[str, asyncio.Task[None]] = {} self._vector_store_locks: dict[str, asyncio.Lock] = {} @@ -619,7 +618,7 @@ class OpenAIVectorStoreMixin(ABC): else 0.0 ) params = { - "max_chunks": max_num_results * CHUNK_MULTIPLIER, + "max_chunks": max_num_results * self.vector_stores_config.chunk_retrieval_params.chunk_multiplier, "score_threshold": score_threshold, "mode": search_mode, } @@ -1072,7 +1071,10 @@ class OpenAIVectorStoreMixin(ABC): # Run cleanup if needed (throttled to once every 1 day) current_time = int(time.time()) - if current_time - self._last_file_batch_cleanup_time >= FILE_BATCH_CLEANUP_INTERVAL_SECONDS: + if ( + current_time - self._last_file_batch_cleanup_time + >= self.vector_stores_config.file_batch_params.cleanup_interval_seconds + ): logger.info("Running throttled cleanup of expired file batches") asyncio.create_task(self._cleanup_expired_file_batches()) self._last_file_batch_cleanup_time = current_time @@ -1089,7 +1091,7 @@ class OpenAIVectorStoreMixin(ABC): batch_info: dict[str, Any], ) -> None: """Process files with controlled concurrency and chunking.""" - semaphore = asyncio.Semaphore(MAX_CONCURRENT_FILES_PER_BATCH) + semaphore = asyncio.Semaphore(self.vector_stores_config.file_batch_params.max_concurrent_files_per_batch) async def process_single_file(file_id: str) -> tuple[str, bool]: """Process a single file with concurrency control.""" @@ -1108,12 +1110,13 @@ class OpenAIVectorStoreMixin(ABC): # Process files in chunks to avoid creating too many tasks at once total_files = len(file_ids) - for chunk_start in range(0, total_files, FILE_BATCH_CHUNK_SIZE): - chunk_end = min(chunk_start + FILE_BATCH_CHUNK_SIZE, total_files) + chunk_size = self.vector_stores_config.file_batch_params.file_batch_chunk_size + for chunk_start in range(0, total_files, chunk_size): + chunk_end = min(chunk_start + chunk_size, total_files) chunk = file_ids[chunk_start:chunk_end] - chunk_num = chunk_start // FILE_BATCH_CHUNK_SIZE + 1 - total_chunks = (total_files + FILE_BATCH_CHUNK_SIZE - 1) // FILE_BATCH_CHUNK_SIZE + chunk_num = chunk_start // chunk_size + 1 + total_chunks = (total_files + chunk_size - 1) // chunk_size logger.info( f"Processing chunk {chunk_num} of {total_chunks} ({len(chunk)} files, {chunk_start + 1}-{chunk_end} of {total_files} total files)" ) diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index b6a671ddb..3d2a02526 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -17,6 +17,7 @@ import numpy as np from numpy.typing import NDArray from pydantic import BaseModel +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.utils.inference.prompt_adapter import ( @@ -262,6 +263,7 @@ class VectorStoreWithIndex: vector_store: VectorStore index: EmbeddingIndex inference_api: Api.inference + vector_stores_config: VectorStoresConfig | None = None async def insert_chunks( self, @@ -294,6 +296,8 @@ class VectorStoreWithIndex: query: InterleavedContent, params: dict[str, Any] | None = None, ) -> QueryChunksResponse: + config = self.vector_stores_config or VectorStoresConfig() + if params is None: params = {} k = params.get("max_chunks", 3) @@ -302,19 +306,25 @@ class VectorStoreWithIndex: ranker = params.get("ranker") if ranker is None: - reranker_type = RERANKER_TYPE_RRF - reranker_params = {"impact_factor": 60.0} + reranker_type = ( + RERANKER_TYPE_RRF + if config.chunk_retrieval_params.default_reranker_strategy == "rrf" + else config.chunk_retrieval_params.default_reranker_strategy + ) + reranker_params = {"impact_factor": config.chunk_retrieval_params.rrf_impact_factor} else: - strategy = ranker.get("strategy", "rrf") + strategy = ranker.get("strategy", config.chunk_retrieval_params.default_reranker_strategy) if strategy == "weighted": weights = ranker.get("params", {}).get("weights", [0.5, 0.5]) reranker_type = RERANKER_TYPE_WEIGHTED - reranker_params = {"alpha": weights[0] if len(weights) > 0 else 0.5} + reranker_params = { + "alpha": weights[0] if len(weights) > 0 else config.chunk_retrieval_params.weighted_search_alpha + } elif strategy == "normalized": reranker_type = RERANKER_TYPE_NORMALIZED else: reranker_type = RERANKER_TYPE_RRF - k_value = ranker.get("params", {}).get("k", 60.0) + k_value = ranker.get("params", {}).get("k", config.chunk_retrieval_params.rrf_impact_factor) reranker_params = {"impact_factor": k_value} query_string = interleaved_content_as_str(query) diff --git a/tests/integration/responses/recordings/0995df80c05acd7a1c386b09d5b4520ffff5233bf1fdd222607ec879cb5bcdb1.json b/tests/integration/responses/recordings/0995df80c05acd7a1c386b09d5b4520ffff5233bf1fdd222607ec879cb5bcdb1.json new file mode 100644 index 000000000..548364f69 --- /dev/null +++ b/tests/integration/responses/recordings/0995df80c05acd7a1c386b09d5b4520ffff5233bf1fdd222607ec879cb5bcdb1.json @@ -0,0 +1,1569 @@ +{ + "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search_empty_vector_store[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": "How many experts does the Llama 4 Maverick model have?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "index": 0, + "id": "call_ltsd3q9G7fq4by5VmgdvtNRX", + "type": "function", + "function": { + "name": "knowledge_search", + "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_ltsd3q9G7fq4by5VmgdvtNRX", + "content": [ + { + "type": "text", + "text": "knowledge_search tool found 0 chunks:\nBEGIN of knowledge_search tool results.\n" + }, + { + "type": "text", + "text": "END of knowledge_search tool results.\n" + }, + { + "type": "text", + "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. \n" + } + ] + } + ], + "stream": true, + "stream_options": { + "include_usage": true + }, + "tools": [ + { + "type": "function", + "function": { + "name": "knowledge_search", + "description": "Search for information in a database.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The query to search for. Can be a natural language sentence or keywords." + } + }, + "required": [ + "query" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "gpt-4o" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "ARU7agM5Kl6Can" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "exXp4WAEiuPBmMB" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " couldn't", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "xssLzpn" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "0BTaoXZA8mQ" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " any", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "LlSCtzRLjGjH" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " specific", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "orBeElY" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "d6KY" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " regarding", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "PwpmZG" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "id5tKvqSqjid" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " number", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "TEEJU1XUv" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "7NJxYPUfAtl9S" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " experts", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "CLTlAmc3" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "wb1ZNUP4jcRa7" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "TpibHTMgCSHy" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " L", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "3fOnZRCOIleQ8B" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": "lama", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "1tFEt9kQ1tZX" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " ", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "ATTnqQBBl7rMjJh" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": "4", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "eF4iJxsVsjcJl3n" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " Maver", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "lUhJZS5zeH" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": "ick", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Y9r2Eu5wDSKcr" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " model", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "KJc2SkMxFZ" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "BZtlJi2pq1qnnp3" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " It", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Y3fCHRenLmWyK" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " might", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "sHXoKiC9uk" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " be", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "BMrLe0kuk0QQ3" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " a", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "U68ogLmBWy4iJL" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " newly", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "9HTXYuFeVx" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " released", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "u5NWju8" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "BuCRBR2noTbji" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " less", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "3420ZSe04Jf" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": "-d", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "ymAe37w8tzANVu" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": "ocumented", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Tz9kJv4" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " model", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "1BwzPW7hOS" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Fk4890Poh3Z7yxq" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " You", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "RjPw2aE7o0Kk" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " might", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "7iEjaT1qKH" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " want", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "egriJ8QLp50" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "in46MmePgKElY" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " check", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "6vliRbTwMu" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "89WljgCGltV2" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " latest", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "kJiirGKpd" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " research", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "cBDcckf" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " publications", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "aRa" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "u4Y9Etj7caLeN" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " official", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "61DMPlA" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " announcements", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "oJ" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " from", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "UEM28ZoLN9v" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "3hY17lwI2ULe" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " developers", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "1Ilo9" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "XqOjbKuOmKKK" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " detailed", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "bUvPDuF" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "oaD5" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "wrpJZlqJ9chr8wR" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "uTmBJktY1x" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-0995df80c05a", + "choices": [], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": { + "completion_tokens": 53, + "prompt_tokens": 164, + "total_tokens": 217, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + } + }, + "obfuscation": "79TIwlNhYOz5b" + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/responses/recordings/b6ea82498b4cd08dbbfec50c2bf7e20bf3f40ed0acbe79695f18c787ad0e3ed7.json b/tests/integration/responses/recordings/b6ea82498b4cd08dbbfec50c2bf7e20bf3f40ed0acbe79695f18c787ad0e3ed7.json new file mode 100644 index 000000000..00d749fb4 --- /dev/null +++ b/tests/integration/responses/recordings/b6ea82498b4cd08dbbfec50c2bf7e20bf3f40ed0acbe79695f18c787ad0e3ed7.json @@ -0,0 +1,1164 @@ +{ + "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search_empty_vector_store[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/v1/chat/completions", + "headers": {}, + "body": { + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": "How many experts does the Llama 4 Maverick model have?" + }, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "index": 0, + "id": "call_mj487Ks2sh1tl4Ic45p6c05I", + "type": "function", + "function": { + "name": "knowledge_search", + "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_mj487Ks2sh1tl4Ic45p6c05I", + "content": [ + { + "type": "text", + "text": "knowledge_search tool found 0 chunks:\nBEGIN of knowledge_search tool results.\n" + }, + { + "type": "text", + "text": "END of knowledge_search tool results.\n" + }, + { + "type": "text", + "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. \n" + } + ] + } + ], + "stream": true, + "stream_options": { + "include_usage": true + }, + "tools": [ + { + "type": "function", + "function": { + "name": "knowledge_search", + "description": "Search for information in a database.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The query to search for. Can be a natural language sentence or keywords." + } + }, + "required": [ + "query" + ] + } + } + } + ] + }, + "endpoint": "/v1/chat/completions", + "model": "gpt-4o" + }, + "response": { + "body": [ + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": "", + "function_call": null, + "refusal": null, + "role": "assistant", + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "PXCU0Fc9KnVUCW" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": "I", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "JeuRIOenpKCUfwL" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " couldn't", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "9E06kil" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " find", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "XkwagZdEzE2" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " specific", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Ef6pcWm" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " information", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "sYzt" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " about", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "2P5RGumaJF" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "6JVmKwFcUP20" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " number", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "E9fnAugy1" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " of", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "95rIc4FXguQRY" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " experts", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "JIjCDUjS" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " in", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "a5FBV2X2XXgeU" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "ZlUzEPCsbdPO" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " L", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "NoJj6iAJSJRTdV" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": "lama", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "VpMrseEXHoTQ" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " ", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "kYLFuOXJQsHSJH7" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": "4", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "OIJ2TxG0pwv2sf5" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " Maver", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "NNp5ehv1YL" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": "ick", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "z8QsNuXT8BRqp" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " model", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "uNOnmCMa56" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "o36oZirzavxOw9N" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " You", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "2UWZ6AoDYL1k" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " might", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "qUJBriXvuN" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " want", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "6FzX8MHAB6t" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " to", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "75hZZsJCY0qnx" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " check", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "4luqMbwpbt" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "d2KRIqZb07Xv" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " official", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "PyXpPJT" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " documentation", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Qw" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " or", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Q30Y2UWaLD4uN" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " announcements", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Dx" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " from", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "lxjBnf6hdyH" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " the", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "Y1EqsF50UT2l" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " developers", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "ZGMxo" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " for", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "fgZX5F4SKuDX" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " detailed", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "iSMc9ip" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": " specifications", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "2" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": ".", + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "h5p0D7eMT1XgIGE" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [ + { + "delta": { + "content": null, + "function_call": null, + "refusal": null, + "role": null, + "tool_calls": null + }, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": null, + "obfuscation": "YrKrRHEGrZ" + } + }, + { + "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk", + "__data__": { + "id": "rec-b6ea82498b4c", + "choices": [], + "created": 0, + "model": "gpt-4o-2024-08-06", + "object": "chat.completion.chunk", + "service_tier": "default", + "system_fingerprint": "fp_a0e9480a2f", + "usage": { + "completion_tokens": 38, + "prompt_tokens": 164, + "total_tokens": 202, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "prompt_tokens_details": { + "audio_tokens": 0, + "cached_tokens": 0 + } + }, + "obfuscation": "QFS8GRjwzGgvl" + } + } + ], + "is_streaming": true + }, + "id_normalization_mapping": {} +} diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index 9a80d3d29..2064acd1f 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -156,7 +156,6 @@ async def test_query_rewrite_functionality(): from unittest.mock import MagicMock from llama_stack.core.datatypes import QualifiedModel, RewriteQueryParams, VectorStoresConfig - from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT from llama_stack_api import VectorStoreSearchResponsePage mock_routing_table = Mock() @@ -197,7 +196,7 @@ async def test_query_rewrite_functionality(): # Verify default prompt is used prompt_text = chat_call_args.messages[0].content - expected_prompt = DEFAULT_QUERY_REWRITE_PROMPT.format(query="test query") + expected_prompt = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\ntest query\n\nImproved query:" assert prompt_text == expected_prompt # Verify routing table was called with rewritten query and rewrite_query=False diff --git a/tests/unit/core/test_vector_stores_config.py b/tests/unit/core/test_vector_stores_config.py index f17e70fa7..78e39580e 100644 --- a/tests/unit/core/test_vector_stores_config.py +++ b/tests/unit/core/test_vector_stores_config.py @@ -110,22 +110,23 @@ class TestOptionalArchitecture: assert config.annotation_prompt_params is not None assert "{num_chunks}" in config.file_search_params.header_template - def test_guaranteed_defaults_match_constants(self): - """Test that guaranteed defaults match expected constant values.""" - from llama_stack.providers.utils.memory.constants import ( - DEFAULT_CONTEXT_TEMPLATE, - DEFAULT_FILE_SEARCH_HEADER_TEMPLATE, - ) - + def test_guaranteed_defaults_have_expected_values(self): + """Test that guaranteed defaults have expected hardcoded values.""" # Create config with guaranteed defaults config = VectorStoresConfig() - # Verify defaults match constants + # Verify defaults have expected values header_template = config.file_search_params.header_template context_template = config.context_prompt_params.context_template - assert header_template == DEFAULT_FILE_SEARCH_HEADER_TEMPLATE - assert context_template == DEFAULT_CONTEXT_TEMPLATE + assert ( + header_template + == "knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n" + ) + assert ( + context_template + == 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n' + ) # Verify templates can be formatted successfully formatted_header = header_template.format(num_chunks=3) diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index bd60ddb95..cfb8de0cc 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -1091,13 +1091,11 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter): # Give time for the semaphore logic to start processing files await asyncio.sleep(0.2) - # Verify that only MAX_CONCURRENT_FILES_PER_BATCH files are processing concurrently + # Verify that only max_concurrent_files_per_batch files are processing concurrently # The semaphore in _process_files_with_concurrency should limit this - from llama_stack.providers.utils.memory.openai_vector_store_mixin import MAX_CONCURRENT_FILES_PER_BATCH + max_concurrent_files = vector_io_adapter.vector_stores_config.file_batch_params.max_concurrent_files_per_batch - assert active_files == MAX_CONCURRENT_FILES_PER_BATCH, ( - f"Expected {MAX_CONCURRENT_FILES_PER_BATCH} active files, got {active_files}" - ) + assert active_files == max_concurrent_files, f"Expected {max_concurrent_files} active files, got {active_files}" # Verify batch is in progress assert batch.status == "in_progress"