petals fixes

2025-04-25 18:54:30 +00:00 · 2023-09-19 09:05:12 -07:00 · 2023-09-19 09:05:12 -07:00 · 385640b743
commit 385640b743
parent f6ccadabc8
4 changed files with 9 additions and 6 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -276,7 +276,7 @@ provider_list: List = [
    "vllm",
    "nlp_cloud",
    "bedrock",
-    "petals,"
+    "petals",
    "custom", # custom apis
 ]
--- a/litellm/llms/petals.py
+++ b/litellm/llms/petals.py
@ -32,8 +32,6 @@ def completion(
    model = model
    # You could also use "meta-llama/Llama-2-70b-chat-hf" or any other supported model from 🤗 Model Hub
    tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, add_bos_token=False)
    model = AutoDistributedModelForCausalLM.from_pretrained(model)
    model = model.cuda()
@ -76,6 +74,8 @@ def completion(
    print_verbose(f"raw model_response: {outputs}")
    ## RESPONSE OBJECT
    output_text = tokenizer.decode(outputs[0])
    print("output text")
    print(output_text)
    model_response["choices"][0]["message"]["content"] = output_text
    ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. 
--- a/litellm/main.py
+++ b/litellm/main.py
@ -959,8 +959,9 @@ def completion(
            or custom_llm_provider == "petals-team"
            or model in litellm.petals_models
        ):
-            custom_llm_provider = "baseten"
+            custom_llm_provider = "petals"
-
+            print("model on petals")
            print(model)
            model_response = petals.completion(
                model=model,
                messages=messages,
@ -970,7 +971,6 @@ def completion(
                litellm_params=litellm_params,
                logger_fn=logger_fn,
                encoding=encoding, 
                api_key=baseten_key, 
                logging_obj=logging
            )
            if inspect.isgenerator(model_response) or (stream == True):
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1122,6 +1122,9 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None):
        ## nlp_cloud
        elif model in litellm.nlp_cloud_models:
            custom_llm_provider = "nlp_cloud"
        ## petals
        elif model in litellm.petals_models:
            custom_llm_provider = "petals"
        if custom_llm_provider is None or custom_llm_provider=="":
            raise ValueError(f"LLM Provider NOT provided. Pass in the LLM provider you are trying to call. E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/{model}',..)` Learn more: https://docs.litellm.ai/docs/providers")