From d8c4e7da4b7bdc08eb1c5f198e3822766dd20182 Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Sun, 6 Oct 2024 10:00:55 -0400
Subject: [PATCH] Remove testing code

---
 .../providers/adapters/inference/openai/openai.py   | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/llama_stack/providers/adapters/inference/openai/openai.py b/llama_stack/providers/adapters/inference/openai/openai.py
index 2a1e279d8..06a29873b 100644
--- a/llama_stack/providers/adapters/inference/openai/openai.py
+++ b/llama_stack/providers/adapters/inference/openai/openai.py
@@ -21,14 +21,11 @@ from .config import OpenAIImplConfig
 
 
 class OpenAIInferenceAdapter(Inference):
+    max_tokens: int
+    model_id: str
+
     def __init__(self, config: OpenAIImplConfig) -> None:
         self.config = config
-
-        # For testing purposes
-        # This model's maximum context length is 6144 tokens.
-        self.max_tokens = 6144
-        self.model_id = "mistral-7b-instruct"
-
         tokenizer = Tokenizer.get_instance()
         self.formatter = ChatFormat(tokenizer)
 
@@ -66,7 +63,7 @@ class OpenAIInferenceAdapter(Inference):
 
     def resolve_openai_model(self, model_name: str) -> str:
         # TODO: This should be overriden by other classes
-        return self.model_id
+        return model_name
 
     def get_openai_chat_options(self, request: ChatCompletionRequest) -> dict:
         options = {}
@@ -106,7 +103,7 @@ class OpenAIInferenceAdapter(Inference):
         model_input = self.formatter.encode_dialog_prompt(messages)
 
         input_tokens = len(model_input.tokens)
-        # TODO: There is a potential bug here
+        # TODO: There is a potential bug here to be investigated
         # max_new_tokens = min(
         #     request.sampling_params.max_tokens or (self.max_tokens - input_tokens),
         #     self.max_tokens - input_tokens - 1,