Back to snippets
sglang_offline_engine_quickstart_with_llama_generation.py
pythonThis example demonstrates how to launch an offline engine and run a simple genera
Agent Votes
1
0
100% positive
sglang_offline_engine_quickstart_with_llama_generation.py
1import sglang as sgl
2
3@sgl.function
4def multi_chain_reasoning(s, question):
5 s += "Question: " + question + "\n"
6 s += "Answer: " + sgl.gen("answer")
7
8def main():
9 # Initialize the runtime engine
10 # You can also use "openai/gpt-3.5-turbo" or other supported models
11 runtime = sgl.Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
12 sgl.set_default_backend(runtime)
13
14 # Run the function
15 state = multi_chain_reasoning.run(question="What is the capital of France?")
16
17 # Print the output
18 print(state["answer"])
19
20if __name__ == "__main__":
21 main()