Merge branch 'main' into vectordb_name

This commit is contained in:
Francisco Arceo 2025-07-06 15:40:20 -04:00 committed by GitHub
commit 74b0ab69ed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
161 changed files with 1844 additions and 11065 deletions

View file

@ -26,8 +26,8 @@ class CerebrasImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
return {
"base_url": DEFAULT_BASE_URL,
"api_key": "${env.CEREBRAS_API_KEY}",
"api_key": api_key,
}

View file

@ -13,13 +13,9 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
class OllamaImplConfig(BaseModel):
url: str = DEFAULT_OLLAMA_URL
raise_on_connect_error: bool = True
@classmethod
def sample_run_config(
cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
) -> dict[str, Any]:
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
return {
"url": url,
"raise_on_connect_error": raise_on_connect_error,
}

View file

@ -84,7 +84,7 @@ MODEL_ENTRIES = [
CoreModelId.llama_guard_3_1b.value,
),
ProviderModelEntry(
provider_model_id="all-minilm:latest",
provider_model_id="all-minilm:l6-v2",
aliases=["all-minilm"],
model_type=ModelType.embedding,
metadata={

View file

@ -94,7 +94,6 @@ class OllamaInferenceAdapter(
def __init__(self, config: OllamaImplConfig) -> None:
self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
self.url = config.url
self.raise_on_connect_error = config.raise_on_connect_error
@property
def client(self) -> AsyncClient:
@ -108,10 +107,7 @@ class OllamaInferenceAdapter(
logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
health_response = await self.health()
if health_response["status"] == HealthStatus.ERROR:
if self.raise_on_connect_error:
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
else:
logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
async def health(self) -> HealthResponse:
"""

View file

@ -24,8 +24,10 @@ class PassthroughImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
def sample_run_config(
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
) -> dict[str, Any]:
return {
"url": "${env.PASSTHROUGH_URL}",
"api_key": "${env.PASSTHROUGH_API_KEY}",
"url": url,
"api_key": api_key,
}

View file

@ -26,5 +26,5 @@ class RunpodImplConfig(BaseModel):
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
return {
"url": "${env.RUNPOD_URL:=}",
"api_token": "${env.RUNPOD_API_TOKEN:=}",
"api_token": "${env.RUNPOD_API_TOKEN}",
}

View file

@ -17,7 +17,11 @@ class TGIImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, url: str = "${env.TGI_URL}", **kwargs):
def sample_run_config(
cls,
url: str = "${env.TGI_URL}",
**kwargs,
):
return {
"url": url,
}

View file

@ -327,7 +327,6 @@ class InferenceEndpointAdapter(_HfAdapter):
# Get the inference endpoint details
api = HfApi(token=config.api_token.get_secret_value())
endpoint = api.get_inference_endpoint(config.endpoint_name)
# Wait for the endpoint to be ready (if not already)
endpoint.wait(timeout=60)

View file

@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return {
"url": "https://api.together.xyz/v1",
"api_key": "${env.TOGETHER_API_KEY:=}",
"api_key": "${env.TOGETHER_API_KEY}",
}