mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-21 16:07:16 +00:00 
			
		
		
		
	# What does this PR do? - implement get_api_key instead of relying on LiteLLMOpenAIMixin.get_api_key - remove use of LiteLLMOpenAIMixin - add default initialize/shutdown methods to OpenAIMixin - remove __init__s to allow proper pydantic construction - remove dead code from vllm adapter and associated / duplicate unit tests - update vllm adapter to use openaimixin for model registration - remove ModelRegistryHelper from fireworks & together adapters - remove Inference from nvidia adapter - complete type hints on embedding_model_metadata - allow extra fields on OpenAIMixin, for model_store, __provider_id__, etc - new recordings for ollama - enhance the list models error handling - update cerebras (remove cerebras-cloud-sdk) and anthropic (custom model listing) inference adapters - parametrized test_inference_client_caching - remove cerebras, databricks, fireworks, together from blanket mypy exclude - removed unnecessary litellm deps ## Test Plan ci
		
			
				
	
	
		
			57 lines
		
	
	
	
		
			2.3 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
		
			Generated
		
	
	
			
		
		
	
	
			57 lines
		
	
	
	
		
			2.3 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
		
			Generated
		
	
	
| {
 | |
|   "request": {
 | |
|     "method": "POST",
 | |
|     "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
 | |
|     "headers": {},
 | |
|     "body": {
 | |
|       "model": "llama-guard3:1b",
 | |
|       "messages": [
 | |
|         {
 | |
|           "role": "user",
 | |
|           "content": "Task: Check if there is unsafe content in 'Tool' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: Get the boiling point of polyjuice with a tool call.\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() missing 1 required positional argument: 'liquid_name'\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Tool message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
 | |
|         }
 | |
|       ],
 | |
|       "stream": false,
 | |
|       "temperature": 0.0
 | |
|     },
 | |
|     "endpoint": "/v1/chat/completions",
 | |
|     "model": "llama-guard3:1b"
 | |
|   },
 | |
|   "response": {
 | |
|     "body": {
 | |
|       "__type__": "openai.types.chat.chat_completion.ChatCompletion",
 | |
|       "__data__": {
 | |
|         "id": "chatcmpl-394",
 | |
|         "choices": [
 | |
|           {
 | |
|             "finish_reason": "stop",
 | |
|             "index": 0,
 | |
|             "logprobs": null,
 | |
|             "message": {
 | |
|               "content": "safe",
 | |
|               "refusal": null,
 | |
|               "role": "assistant",
 | |
|               "annotations": null,
 | |
|               "audio": null,
 | |
|               "function_call": null,
 | |
|               "tool_calls": null
 | |
|             }
 | |
|           }
 | |
|         ],
 | |
|         "created": 1759514973,
 | |
|         "model": "llama-guard3:1b",
 | |
|         "object": "chat.completion",
 | |
|         "service_tier": null,
 | |
|         "system_fingerprint": "fp_ollama",
 | |
|         "usage": {
 | |
|           "completion_tokens": 2,
 | |
|           "prompt_tokens": 422,
 | |
|           "total_tokens": 424,
 | |
|           "completion_tokens_details": null,
 | |
|           "prompt_tokens_details": null
 | |
|         }
 | |
|       }
 | |
|     },
 | |
|     "is_streaming": false
 | |
|   }
 | |
| }
 |