fix aleph alpha client init

2025-04-26 19:24:27 +00:00 · 2023-09-04 15:14:08 -07:00 · 2023-09-04 15:14:08 -07:00 · b8b7d9bf44
commit b8b7d9bf44
parent 5ae420317e
3 changed files with 108 additions and 122 deletions
--- a/litellm/llms/aleph_alpha.py
+++ b/litellm/llms/aleph_alpha.py
@ -1,4 +1,5 @@
-import os, json
+import os
+import json
 from enum import Enum
 import requests
 import time
@ -13,43 +14,30 @@ class AlephAlphaError(Exception):
            self.message
        )  # Call the base class constructor with the parameters it needs

-
-class AlephAlphaLLM:
-    def __init__(
-        self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None
-    ):
-        self.encoding = encoding
-        self.default_max_tokens_to_sample = default_max_tokens_to_sample
-        self.completion_url = "https://api.aleph-alpha.com/complete"
-        self.api_key = api_key
-        self.logging_obj = logging_obj
-        self.validate_environment(api_key=api_key)
-
-    def validate_environment(
-        self, api_key
-    ):  # set up the environment required to run the model
-        # set the api key
-        if self.api_key == None:
-            raise ValueError(
-                "Missing Aleph Alpha API Key - A call is being made to Aleph Alpha but no key is set either in the environment variables or via params"
-            )
-        self.api_key = api_key
-        self.headers = {
+def validate_environment(api_key):
+    headers = {
        "accept": "application/json",
        "content-type": "application/json",
-            "Authorization": "Bearer " + self.api_key,
    }
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    return headers

 def completion(
-        self,
    model: str,
    messages: list,
    model_response: ModelResponse,
    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
    optional_params=None,
    litellm_params=None,
    logger_fn=None,
-    ):  # logic for parsing in - calling - parsing out model completion calls
+    default_max_tokens_to_sample=None,
+):
+    headers = validate_environment(api_key)
+    completion_url = "https://api.aleph-alpha.com/complete"
    model = model
    prompt = ""
    if "control" in model:  # follow the ###Instruction / ###Response format
@ -77,27 +65,27 @@ class AlephAlphaLLM:
    data = {
        "model": model,
        "prompt": prompt,
-            "maximum_tokens": optional_params["maximum_tokens"] if  "maximum_tokens" in optional_params else self.default_max_tokens_to_sample, # required input
+        "maximum_tokens": optional_params["maximum_tokens"] if "maximum_tokens" in optional_params else default_max_tokens_to_sample,  # required input
        **optional_params,
    }

    ## LOGGING
-        self.logging_obj.pre_call(
+    logging_obj.pre_call(
            input=prompt,
-            api_key=self.api_key,
+            api_key=api_key,
            additional_args={"complete_input_dict": data},
        )
    ## COMPLETION CALL
    response = requests.post(
-            self.completion_url, headers=self.headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
+        completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
    )
    if "stream" in optional_params and optional_params["stream"] == True:
        return response.iter_lines()
    else:
        ## LOGGING
-            self.logging_obj.post_call(
+        logging_obj.post_call(
                input=prompt,
-                api_key=self.api_key,
+                api_key=api_key,
                original_response=response.text,
                additional_args={"complete_input_dict": data},
            )
@ -117,10 +105,10 @@ class AlephAlphaLLM:

        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. 
        prompt_tokens = len(
-                self.encoding.encode(prompt)
+            encoding.encode(prompt)
        ) 
        completion_tokens = len(
-                self.encoding.encode(model_response["choices"][0]["message"]["content"])
+            encoding.encode(model_response["choices"][0]["message"]["content"])
        )

        model_response["created"] = time.time()
@ -132,7 +120,6 @@ class AlephAlphaLLM:
        }
        return model_response

-    def embedding(
-        self,
-    ):  # logic for parsing in - calling - parsing out model embedding calls
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
    pass
--- a/litellm/main.py
+++ b/litellm/main.py
@ -25,8 +25,8 @@ from .llms import ai21
 from .llms import sagemaker
 from .llms import bedrock
 from .llms import huggingface_restapi
+from .llms import aleph_alpha
 from .llms.baseten import BasetenLLM
-from .llms.aleph_alpha import AlephAlphaLLM
 import tiktoken
 from concurrent.futures import ThreadPoolExecutor

@ -427,17 +427,10 @@ def completion(
            response = model_response
        elif model in litellm.aleph_alpha_models:
            aleph_alpha_key = (
-                api_key or litellm.aleph_alpha_key or os.environ.get("ALEPH_ALPHA_API_KEY")
+                api_key or litellm.aleph_alpha_key or get_secret("ALEPH_ALPHA_API_KEY") or get_secret("ALEPHALPHA_API_KEY")
            )

-            aleph_alpha_client = AlephAlphaLLM(
-                encoding=encoding,
-                default_max_tokens_to_sample=litellm.max_tokens,
-                api_key=aleph_alpha_key,
-                logging_obj=logging # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
-            )
-
-            model_response = aleph_alpha_client.completion(
+            model_response = aleph_alpha.completion(
                model=model,
                messages=messages,
                model_response=model_response,
@ -445,6 +438,10 @@ def completion(
                optional_params=optional_params,
                litellm_params=litellm_params,
                logger_fn=logger_fn,
+                encoding=encoding,
+                default_max_tokens_to_sample=litellm.max_tokens,
+                api_key=aleph_alpha_key,
+                logging_obj=logging # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
            )

            if "stream" in optional_params and optional_params["stream"] == True:
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -64,6 +64,7 @@ def test_completion_claude():
 #         print(response)
 #     except Exception as e:
 #         pytest.fail(f"Error occurred: {e}")
+# test_completion_aleph_alpha()


 # def test_completion_aleph_alpha_control_models():
@ -75,6 +76,7 @@ def test_completion_claude():
 #         print(response)
 #     except Exception as e:
 #         pytest.fail(f"Error occurred: {e}")
+# test_completion_aleph_alpha_control_models()

 def test_completion_with_litellm_call_id():
    try:
@ -126,8 +128,8 @@ def test_completion_claude_stream():
 #         if "loading" in str(e):
 #             pass
 #         pytest.fail(f"Error occurred: {e}")
-# # test_completion_hf_api()

+# test_completion_hf_api()

 # def test_completion_hf_deployed_api():
 #     try: