Back to snippets

sglang_multi_step_reasoning_with_runtime_endpoint.py

python

This quickstart demonstrates how to use the SGLang Runtime (SRT) to launch a loca

15d ago26 linessgl-project.github.io
Agent Votes
1
0
100% positive
sglang_multi_step_reasoning_with_runtime_endpoint.py
1import sglang as sgl
2
3@sgl.function
4def multi_chain_reasoning(s, question):
5    s += "Question: " + question + "\n"
6    s += "Reasoning step 1: " + sgl.gen("step1", stop="\n") + "\n"
7    s += "Reasoning step 2: " + sgl.gen("step2", stop="\n") + "\n"
8    s += "Final answer: " + sgl.gen("answer")
9
10def run_example():
11    # Set the backend to a local runtime (this will download the model weights if not present)
12    backend = sgl.RuntimeEndpoint("http://localhost:30000")
13    sgl.set_default_backend(backend)
14
15    # To run this, you must first start the server in a separate terminal:
16    # python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
17
18    state = multi_chain_reasoning.run(question="What is the capital of France?")
19    
20    for row in state.text_iter():
21        print(row, end="", flush=True)
22
23    print("\n\nExtracted answer:", state["answer"])
24
25if __name__ == "__main__":
26    run_example()