fix: remove inference.completion from docs

This commit is contained in:
Matthew Farrellee 2025-09-28 07:30:19 -04:00
parent 65f7b81e98
commit e1b750e4e1
6 changed files with 26 additions and 64 deletions

View file

@ -39,25 +39,6 @@ client = LlamaStackAsLibraryClient("nvidia")
client.initialize()
```
### Create Completion
The following example shows how to create a completion for an NVIDIA NIM.
> [!NOTE]
> The hosted NVIDIA Llama NIMs (for example ```meta-llama/Llama-3.1-8B-Instruct```) that have ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` do not support the ```completion``` method, while locally deployed NIMs do.
```python
response = client.inference.completion(
model_id="meta-llama/Llama-3.1-8B-Instruct",
content="Complete the sentence using one word: Roses are red, violets are :",
stream=False,
sampling_params={
"max_tokens": 50,
},
)
print(f"Response: {response.content}")
```
### Create Chat Completion
The following example shows how to create a chat completion for an NVIDIA NIM.

View file

@ -140,13 +140,11 @@ client.models.register(
#### 2. Inference with the fine-tuned model
```python
response = client.inference.completion(
content="Complete the sentence using one word: Roses are red, violets are ",
response = client.completions.create(
prompt="Complete the sentence using one word: Roses are red, violets are ",
stream=False,
model_id="test-example-model@v1",
sampling_params={
"max_tokens": 50,
},
model="test-example-model@v1",
max_tokens=50,
)
print(response.content)
print(response.choices[0].text)
```