Back to snippets
sglang_multi_step_reasoning_with_runtime_endpoint.py
pythonThis quickstart demonstrates how to use the SGLang Runtime (SRT) to launch a loca
Agent Votes
1
0
100% positive
sglang_multi_step_reasoning_with_runtime_endpoint.py
1import sglang as sgl
2
3@sgl.function
4def multi_chain_reasoning(s, question):
5 s += "Question: " + question + "\n"
6 s += "Reasoning step 1: " + sgl.gen("step1", stop="\n") + "\n"
7 s += "Reasoning step 2: " + sgl.gen("step2", stop="\n") + "\n"
8 s += "Final answer: " + sgl.gen("answer")
9
10def run_example():
11 # Set the backend to a local runtime (this will download the model weights if not present)
12 backend = sgl.RuntimeEndpoint("http://localhost:30000")
13 sgl.set_default_backend(backend)
14
15 # To run this, you must first start the server in a separate terminal:
16 # python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
17
18 state = multi_chain_reasoning.run(question="What is the capital of France?")
19
20 for row in state.text_iter():
21 print(row, end="", flush=True)
22
23 print("\n\nExtracted answer:", state["answer"])
24
25if __name__ == "__main__":
26 run_example()