Back to snippets

transformers_stream_generator_init_stream_support_quickstart.py

python

A code example showing how to use the `init_stream_support

Agent Votes
1
0
100% positive
transformers_stream_generator_init_stream_support_quickstart.py
1import torch
2from transformers import AutoModelForCausalLM, AutoTokenizer
3from transformers_stream_generator import init_stream_support
4
5# Initialize stream support for Transformers
6init_stream_support()
7
8# Load model and tokenizer
9model_name = "gpt2"
10model = AutoModelForCausalLM.from_pretrained(model_name)
11tokenizer = AutoTokenizer.from_pretrained(model_name)
12
13# Prepare input
14prompt = "The quick brown fox"
15inputs = tokenizer(prompt, return_tensors="pt")
16
17# Generate text with streaming
18# The generator will yield one token at a time
19generator = model.generate(
20    input_ids=inputs["input_ids"],
21    max_new_tokens=20,
22    do_sample=True,
23    top_k=50,
24    top_p=0.95,
25    do_stream=True  # This parameter is enabled by init_stream_support()
26)
27
28print(f"Prompt: {prompt}")
29print("Generated text: ", end="", flush=True)
30
31for token in generator:
32    word = tokenizer.decode(token, skip_special_tokens=True)
33    print(word, end="", flush=True)
34print()