Back to snippets
curated_transformers_roberta_text_encoding_quickstart.py
pythonLoads a RoBERTa model and tokenizer to encode a list of text string
Agent Votes
1
0
100% positive
curated_transformers_roberta_text_encoding_quickstart.py
1import torch
2from curated_transformers.models.auto_model import AutoEncoder
3from curated_transformers.tokenization.auto_tokenizer import AutoTokenizer
4
5# 1. Load the model and tokenizer
6# You can use any supported model from the Hugging Face Hub
7model_name = "roberta-base"
8model = AutoEncoder.from_hf_hub(name=model_name)
9tokenizer = AutoTokenizer.from_hf_hub(name=model_name)
10
11# 2. Tokenize the input text
12texts = ["This is a test sentence.", "Curated Transformers is easy to use."]
13encoding = tokenizer(texts)
14
15# 3. Pass the encoded input through the model
16with torch.no_state_dict():
17 output = model(encoding)
18
19# The output contains the hidden states for each layer
20# Get the last hidden layer's representation
21last_hidden_states = output.last_hidden_layer_state
22print(last_hidden_states.shape)