mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 09:21:45 +00:00
Fix openai_completion tests for ollama
When called via the OpenAI API, ollama is responding with more brief responses than when called via its native API. This adjusts the prompting for its OpenAI calls to ask it to be more verbose.
This commit is contained in:
parent
52b4766949
commit
ef684ff178
2 changed files with 9 additions and 2 deletions
|
@ -348,6 +348,9 @@ class OllamaInferenceAdapter(
|
|||
top_p: Optional[float] = None,
|
||||
user: Optional[str] = None,
|
||||
) -> OpenAICompletion:
|
||||
if not isinstance(prompt, str):
|
||||
raise ValueError("Ollama does not support non-string prompts for completion")
|
||||
|
||||
model_obj = await self._get_model(model)
|
||||
params = {
|
||||
k: v
|
||||
|
|
|
@ -53,9 +53,11 @@ def openai_client(client_with_models, text_model_id):
|
|||
def test_openai_completion_non_streaming(openai_client, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
# ollama needs more verbose prompting for some reason here...
|
||||
prompt = "Respond to this question and explain your answer. " + tc["content"]
|
||||
response = openai_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=tc["content"],
|
||||
prompt=prompt,
|
||||
stream=False,
|
||||
)
|
||||
assert len(response.choices) > 0
|
||||
|
@ -72,9 +74,11 @@ def test_openai_completion_non_streaming(openai_client, text_model_id, test_case
|
|||
def test_openai_completion_streaming(openai_client, text_model_id, test_case):
|
||||
tc = TestCase(test_case)
|
||||
|
||||
# ollama needs more verbose prompting for some reason here...
|
||||
prompt = "Respond to this question and explain your answer. " + tc["content"]
|
||||
response = openai_client.completions.create(
|
||||
model=text_model_id,
|
||||
prompt=tc["content"],
|
||||
prompt=prompt,
|
||||
stream=True,
|
||||
max_tokens=50,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue