forked from phoenix/litellm-mirror
(docs) encode docstring
This commit is contained in:
parent
313eee17e3
commit
c61fa70ba0
2 changed files with 11 additions and 1 deletions
|
@ -39,7 +39,7 @@ def test_encoding_and_decoding():
|
||||||
# openai tokenizer
|
# openai tokenizer
|
||||||
openai_tokens = token_counter(model="gpt-3.5-turbo", text=sample_text)
|
openai_tokens = token_counter(model="gpt-3.5-turbo", text=sample_text)
|
||||||
|
|
||||||
openai_text = decode(model="gpt-3.5-turbo", tokens=openai_tokens)
|
openai_text = encode(model="gpt-3.5-turbo", tokens=openai_tokens)
|
||||||
|
|
||||||
assert openai_text == sample_text
|
assert openai_text == sample_text
|
||||||
except:
|
except:
|
||||||
|
|
|
@ -911,6 +911,16 @@ def _select_tokenizer(model: str):
|
||||||
return {"type": "openai_tokenizer", "tokenizer": encoding}
|
return {"type": "openai_tokenizer", "tokenizer": encoding}
|
||||||
|
|
||||||
def encode(model: str, text: str):
|
def encode(model: str, text: str):
|
||||||
|
"""
|
||||||
|
Encodes the given text using the specified model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model (str): The name of the model to use for tokenization.
|
||||||
|
text (str): The text to be encoded.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
enc: The encoded text.
|
||||||
|
"""
|
||||||
tokenizer_json = _select_tokenizer(model=model)
|
tokenizer_json = _select_tokenizer(model=model)
|
||||||
enc = tokenizer_json["tokenizer"].encode(text)
|
enc = tokenizer_json["tokenizer"].encode(text)
|
||||||
return enc
|
return enc
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue