Merge branch 'main' into vest_search

This commit is contained in:
Hardik Shah 2025-06-12 15:32:41 -07:00 committed by GitHub
commit 7893641de1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 42 additions and 6 deletions

View file

@ -45,20 +45,22 @@ jobs:
- name: Build distro from config file - name: Build distro from config file
run: | run: |
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
- name: Start Llama Stack server in background - name: Start Llama Stack server in background
if: ${{ matrix.image-type }} == 'venv' if: ${{ matrix.image-type }} == 'venv'
env: env:
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
run: | run: |
uv run pip list # Use the virtual environment created by the build step (name comes from build config)
nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 & source ci-test/bin/activate
uv pip list
nohup llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
- name: Wait for Llama Stack server to be ready - name: Wait for Llama Stack server to be ready
run: | run: |
for i in {1..30}; do for i in {1..30}; do
if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then if ! grep -q "Successfully loaded external provider remote::custom_ollama" server.log; then
echo "Waiting for Llama Stack server to load the provider..." echo "Waiting for Llama Stack server to load the provider..."
sleep 1 sleep 1
else else

View file

@ -180,6 +180,7 @@ def get_provider_registry(
if provider_type_key in ret[api]: if provider_type_key in ret[api]:
logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}") logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
ret[api][provider_type_key] = spec ret[api][provider_type_key] = spec
logger.info(f"Successfully loaded external provider {provider_type_key}")
except yaml.YAMLError as yaml_err: except yaml.YAMLError as yaml_err:
logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}") logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
raise yaml_err raise yaml_err

View file

@ -115,7 +115,7 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
class CustomRichHandler(RichHandler): class CustomRichHandler(RichHandler):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
kwargs["console"] = Console(width=120) kwargs["console"] = Console(width=150)
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def emit(self, record): def emit(self, record):

View file

@ -76,7 +76,7 @@ class WeaviateIndex(EmbeddingIndex):
continue continue
chunks.append(chunk) chunks.append(chunk)
scores.append(1.0 / doc.metadata.distance) scores.append(1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf"))
return QueryChunksResponse(chunks=chunks, scores=scores) return QueryChunksResponse(chunks=chunks, scores=scores)

View file

@ -154,3 +154,36 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
assert len(response.chunks) > 0 assert len(response.chunks) > 0
assert response.chunks[0].metadata["document_id"] == "doc1" assert response.chunks[0].metadata["document_id"] == "doc1"
assert response.chunks[0].metadata["source"] == "precomputed" assert response.chunks[0].metadata["source"] == "precomputed"
def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(client_with_empty_registry, embedding_model_id):
vector_db_id = "test_precomputed_embeddings_db"
client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_id,
embedding_model=embedding_model_id,
embedding_dimension=384,
)
chunks_with_embeddings = [
Chunk(
content="duplicate",
metadata={"document_id": "doc1", "source": "precomputed"},
embedding=[0.1] * 384,
),
]
client_with_empty_registry.vector_io.insert(
vector_db_id=vector_db_id,
chunks=chunks_with_embeddings,
)
response = client_with_empty_registry.vector_io.query(
vector_db_id=vector_db_id,
query="duplicate",
)
# Verify the top result is the expected document
assert response is not None
assert len(response.chunks) > 0
assert response.chunks[0].metadata["document_id"] == "doc1"
assert response.chunks[0].metadata["source"] == "precomputed"