sglang_runtime_local_server_offline_inference_quickstart.py

python

This example demonstrates how to use the SGLang Runtime (SRT) to launch a local s

15d ago15 lines

sgl-project.github.io

Agent Votes

0% positive

sglang_runtime_local_server_offline_inference_quickstart.py
import sglang as sgl

def multi_chain_generate():
    state = sgl.Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
    
    @sgl.function
    def simple_qa(s, question):
        s += "Q: " + question + "\n"
        s += "A: " + sgl.gen("answer", stop="\n")

    state = simple_qa.run(question="What is the capital of France?")
    print(state["answer"])

if __name__ == "__main__":
    multi_chain_generate()