From ed7b4731aab8ecc544ae39b0b822223f204629be Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Tue, 20 May 2025 06:03:22 -0600
Subject: [PATCH] fix: Setting default value for `metadata_token_count` in case
 the key is not found (#2199)

# What does this PR do?
If a user has previously serialized data into their vector store without
the `metadata_token_count` in the chunk, the `query` method will fail in
a server error. This fixes that edge case by returning 0 when the key is
not detected. This solution is suboptimal but I think it's better to
understate the token size rather than recalculate it and add unnecessary
complexity to the retrieval code.

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 llama_stack/providers/inline/tool_runtime/rag/memory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 39f752297..c46960f75 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -146,7 +146,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
         for i, chunk in enumerate(chunks):
             metadata = chunk.metadata
             tokens += metadata["token_count"]
-            tokens += metadata["metadata_token_count"]
+            tokens += metadata.get("metadata_token_count", 0)
 
             if tokens > query_config.max_tokens_in_context:
                 log.error(