From edd57785a19340098dd2f203a4d70b6a6bb5e998 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Tue, 5 Aug 2025 14:25:16 -0700 Subject: [PATCH] quick fix --- docs/source/distributions/k8s/port-foward.sh | 52 +++++++++++++++++-- .../distributions/k8s/stack-k8s.yaml.template | 10 ++-- .../providers/utils/inference/stream_utils.py | 3 +- 3 files changed, 55 insertions(+), 10 deletions(-) mode change 100644 => 100755 docs/source/distributions/k8s/port-foward.sh diff --git a/docs/source/distributions/k8s/port-foward.sh b/docs/source/distributions/k8s/port-foward.sh old mode 100644 new mode 100755 index 6e0e609ce..924d18183 --- a/docs/source/distributions/k8s/port-foward.sh +++ b/docs/source/distributions/k8s/port-foward.sh @@ -1,4 +1,48 @@ -kubectl port-forward svc/llama-stack-ui-service 8322:8322 & -kubectl port-forward svc/llama-stack-service 8321:8321 & -kubectl port-forward svc/jaeger-dev-query 16686:16686 -n observability & -kubectl port-forward svc/kube-prometheus-stack-1754270486-grafana 3000:3000 -n prometheus +#!/bin/bash + +# Function to port-forward to a pod with fallback to service +port_forward_with_fallback() { + local namespace=$1 + local label_selector=$2 + local service_name=$3 + local local_port=$4 + local pod_port=$5 + + echo "Attempting to port-forward to pod with label $label_selector in namespace $namespace..." + + # Try to get pod name using the label selector + POD_NAME=$(kubectl get pods -n $namespace -l $label_selector -o jsonpath="{.items[0].metadata.name}" 2>/dev/null) + + if [ -n "$POD_NAME" ]; then + echo "Found pod: $POD_NAME. Port-forwarding $local_port:$pod_port..." + kubectl port-forward -n $namespace $POD_NAME $local_port:$pod_port & + else + echo "No pods found with label $label_selector in namespace $namespace." + echo "Falling back to service port-forwarding for $service_name..." + kubectl port-forward -n $namespace svc/$service_name $local_port:$pod_port & + fi +} + +# Port-forward to llama-stack-ui pod or service +port_forward_with_fallback "default" "app.kubernetes.io/name=llama-stack-ui" "llama-stack-ui-service" 8322 8322 + +# Port-forward to llama-stack server pod or service +port_forward_with_fallback "default" "app.kubernetes.io/name=llama-stack,app.kubernetes.io/component=server" "llama-stack-service" 8321 8321 + +# Port-forward to jaeger query pod or service in observability namespace +port_forward_with_fallback "observability" "app.kubernetes.io/component=query,app.kubernetes.io/instance=jaeger-dev" "jaeger-dev-query" 16686 16686 + +# Port-forward to grafana pod or service in prometheus namespace +kubectl port-forward svc/kube-prometheus-stack-1754270486-grafana 3000:80 -n prometheus + +echo "Port-forwarding started for all components." +echo "Access the services at:" +echo " - Llama Stack UI: http://localhost:8322" +echo " - Llama Stack API: http://localhost:8321" +echo " - Jaeger UI: http://localhost:16686" +echo " - Grafana: http://localhost:3000" +echo "" +echo "Press Ctrl+C to stop all port-forwarding processes." + +# Wait for all background processes to complete +wait diff --git a/docs/source/distributions/k8s/stack-k8s.yaml.template b/docs/source/distributions/k8s/stack-k8s.yaml.template index 34a65af1f..7d24a8853 100644 --- a/docs/source/distributions/k8s/stack-k8s.yaml.template +++ b/docs/source/distributions/k8s/stack-k8s.yaml.template @@ -59,12 +59,12 @@ spec: image: llamastack/distribution-starter:0.2.15 resources: requests: - memory: "512Mi" - cpu: "500m" + memory: "2Gi" + cpu: "8000m" ephemeral-storage: "6Gi" limits: - memory: "1Gi" - cpu: "1000m" + memory: "2Gi" + cpu: "8000m" ephemeral-storage: "6Gi" env: - name: ENABLE_CHROMADB @@ -74,7 +74,7 @@ spec: - name: VLLM_URL value: http://vllm-server.default.svc.cluster.local:8001/v1 - name: VLLM_MAX_TOKENS - value: "3072" + value: "80000" - name: OTEL_EXPORTER_OTLP_ENDPOINT value: http://jaeger-dev-collector.observability:4318 - name: OTEL_SERVICE_NAME diff --git a/llama_stack/providers/utils/inference/stream_utils.py b/llama_stack/providers/utils/inference/stream_utils.py index 3221be198..ad9b0f105 100644 --- a/llama_stack/providers/utils/inference/stream_utils.py +++ b/llama_stack/providers/utils/inference/stream_utils.py @@ -111,7 +111,8 @@ async def stream_and_store_openai_completion( final_logprobs = OpenAIChoiceLogprobs(content=logprobs_content) if logprobs_content else None # Ensure finish_reason is a string as required by OpenAIChoice - finish_reason = choice_data["finish_reason"] or "unknown" + # Using explicit None check to guarantee a string value + finish_reason = choice_data["finish_reason"] if choice_data["finish_reason"] is not None else "unknown" assembled_choices.append( OpenAIChoice( finish_reason=finish_reason,