This commit is contained in:
Swapna Lekkala 2025-09-18 16:11:50 -07:00
parent a6baa7b3d4
commit 0b9480a0e3
5 changed files with 0 additions and 18 deletions

View file

@ -590,7 +590,6 @@ class InferenceRouter(Inference):
async def _nonstream_openai_chat_completion(self, provider: Inference, params: dict) -> OpenAIChatCompletion: async def _nonstream_openai_chat_completion(self, provider: Inference, params: dict) -> OpenAIChatCompletion:
response = await provider.openai_chat_completion(**params) response = await provider.openai_chat_completion(**params)
for choice in response.choices: for choice in response.choices:
# some providers return an empty list for no tool calls in non-streaming responses # some providers return an empty list for no tool calls in non-streaming responses
# but the OpenAI API returns None. So, set tool_calls to None if it's empty # but the OpenAI API returns None. So, set tool_calls to None if it's empty

View file

@ -61,7 +61,6 @@ MODEL_ENTRIES = [
), ),
ProviderModelEntry( ProviderModelEntry(
provider_model_id="nomic-ai/nomic-embed-text-v1.5", provider_model_id="nomic-ai/nomic-embed-text-v1.5",
aliases=["nomic-ai/nomic-embed-text-v1.5"],
model_type=ModelType.embedding, model_type=ModelType.embedding,
metadata={ metadata={
"embedding_dimension": 768, "embedding_dimension": 768,

View file

@ -13,13 +13,6 @@ import pytest
from ..test_cases.test_case import TestCase from ..test_cases.test_case import TestCase
@pytest.fixture(autouse=True)
def rate_limit_delay():
"""Add delay between tests to avoid rate limiting from providers like Fireworks"""
yield
time.sleep(30) # 30 second delay after each test
def _normalize_text(text: str) -> str: def _normalize_text(text: str) -> str:
""" """
Normalize Unicode text by removing diacritical marks for comparison. Normalize Unicode text by removing diacritical marks for comparison.

View file

@ -6,7 +6,6 @@
import base64 import base64
import struct import struct
import time
import pytest import pytest
from openai import OpenAI from openai import OpenAI
@ -14,13 +13,6 @@ from openai import OpenAI
from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.core.library_client import LlamaStackAsLibraryClient
@pytest.fixture(autouse=True)
def rate_limit_delay():
"""Add delay between tests to avoid rate limiting from providers like Fireworks"""
yield
time.sleep(30) # 30 second delay after each test
def decode_base64_to_floats(base64_string: str) -> list[float]: def decode_base64_to_floats(base64_string: str) -> list[float]:
"""Helper function to decode base64 string to list of float32 values.""" """Helper function to decode base64 string to list of float32 values."""
embedding_bytes = base64.b64decode(base64_string) embedding_bytes = base64.b64decode(base64_string)

View file

@ -115,7 +115,6 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
"text_model": "accounts/fireworks/models/llama-v3p1-8b-instruct", "text_model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"vision_model": "accounts/fireworks/models/llama-v3p2-90b-vision-instruct", "vision_model": "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
"embedding_model": "nomic-ai/nomic-embed-text-v1.5", "embedding_model": "nomic-ai/nomic-embed-text-v1.5",
# "embedding_model": "accounts/fireworks/models/qwen3-embedding-8b",
}, },
), ),
} }