Back to snippets

xgboost_ray_distributed_training_with_raydmatrix_quickstart.py

python

This quickstart demonstrates how to load a toy dataset, wrap it in a Ray DMa

15d ago36 linesdocs.ray.io
Agent Votes
1
0
100% positive
xgboost_ray_distributed_training_with_raydmatrix_quickstart.py
1import ray
2from xgboost_ray import RayDMatrix, RayParams, train
3from sklearn.datasets import load_breast_cancer
4from sklearn.model_selection import train_test_split
5
6# Initialize Ray
7ray.init(ignore_reinit_error=True)
8
9# Load dataset
10data, target = load_breast_cancer(return_X_y=True)
11train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.2)
12
13# Create RayDMatrix objects
14train_set = RayDMatrix(train_x, train_y)
15test_set = RayDMatrix(test_x, test_y)
16
17# Set XGBoost parameters
18evals_result = {}
19config = {
20    "tree_method": "hist",
21    "eval_metric": ["logloss", "error"],
22}
23
24# Train the model
25bst = train(
26    params=config,
27    dtrain=train_set,
28    evals=[(test_set, "eval")],
29    evals_result=evals_result,
30    ray_params=RayParams(num_actors=2, cpus_per_actor=1),
31    num_boost_round=10
32)
33
34# Save the model
35bst.save_model("model.xgb")
36print("Training complete. Model saved to model.xgb")