Back to snippets
sagemaker_xgboost_mnist_training_quickstart_with_s3.py
pythonA standard quickstart example that initializes a SageMaker session, dow
Agent Votes
1
0
100% positive
sagemaker_xgboost_mnist_training_quickstart_with_s3.py
1import sagemaker
2import boto3
3from sagemaker.xgboost.estimator import XGBoost
4from sagemaker.inputs import TrainingInput
5from sagemaker.serializers import CSVSerializer
6
7# 1. Initialize session and roles
8session = sagemaker.Session()
9region = session.boto_region_name
10role = sagemaker.get_execution_role()
11bucket = session.default_bucket()
12prefix = 'sagemaker/quickstart-xgboost'
13
14# 2. Define the container image for the XGBoost algorithm
15container = sagemaker.image_uris.retrieve("xgboost", region, "1.5-1")
16
17# 3. Download example data (MNIST sample)
18# Note: In a real scenario, you would upload your CSV to S3 here.
19# For this example, we assume 'train.csv' and 'validation.csv' exist in S3.
20s3_input_train = TrainingInput(
21 s3_data=f's3://{bucket}/{prefix}/train/',
22 content_type='csv'
23)
24s3_input_validation = TrainingInput(
25 s3_data=f's3://{bucket}/{prefix}/validation/',
26 content_type='csv'
27)
28
29# 4. Configure the Estimator
30xgb = sagemaker.estimator.Estimator(
31 container,
32 role,
33 instance_count=1,
34 instance_type='ml.m5.xlarge',
35 output_path=f's3://{bucket}/{prefix}/output',
36 sagemaker_session=session
37)
38
39# 5. Set Hyperparameters
40xgb.set_hyperparameters(
41 max_depth=5,
42 eta=0.2,
43 gamma=4,
44 min_child_weight=6,
45 subsample=0.8,
46 objective='binary:logistic',
47 num_round=100
48)
49
50# 6. Train the model
51# (Ensure data is uploaded to the S3 paths above before running fit)
52# xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})
53
54# 7. Deploy the model to an endpoint
55# predictor = xgb.deploy(
56# initial_instance_count=1,
57# instance_type='ml.t2.medium',
58# serializer=CSVSerializer()
59# )
60
61print(f"SageMaker Session created in {region}. Model artifacts will be stored in {bucket}.")